Imported Upstream version 1.22.0 upstream/1.22.0
authorHyeongseok Oh <hseok82.oh@samsung.com>
Wed, 12 Apr 2023 06:42:02 +0000 (15:42 +0900)
committerHyeongseok Oh <hseok82.oh@samsung.com>
Wed, 12 Apr 2023 06:42:02 +0000 (15:42 +0900)
1700 files changed:
.ahub/tcchecker-tca/config.yaml
.gitattributes
.github/workflows/check-format.yml
.github/workflows/check-pr-commit.yml
.github/workflows/deploy-github-pages.yml [new file with mode: 0644]
Makefile.template
compiler/CMakeLists.txt
compiler/circle-eval-diff/src/InputDataLoader.cpp
compiler/circle-execution-plan/CMakeLists.txt
compiler/circle-execution-plan/pal/TargetPlatform.h
compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
compiler/circle-execution-plan/src/ExecutionPlanner.cpp
compiler/circle-execution-plan/src/ExecutionPlanner.h
compiler/circle-inspect/driver/Driver.cpp
compiler/circle-inspect/src/Dump.cpp
compiler/circle-inspect/src/Dump.h
compiler/circle-interpreter-test/CMakeLists.txt [new file with mode: 0644]
compiler/circle-interpreter-test/README.md [new file with mode: 0644]
compiler/circle-interpreter-test/requires.cmake [new file with mode: 0644]
compiler/circle-interpreter-test/src/circle-interpreter.test.cpp [new file with mode: 0644]
compiler/circle-interpreter/src/CircleInterpreter.cpp
compiler/circle-mpqsolver/CMakeLists.txt [new file with mode: 0644]
compiler/circle-mpqsolver/README.md [new file with mode: 0644]
compiler/circle-mpqsolver/requires.cmake [new file with mode: 0644]
compiler/circle-mpqsolver/src/CircleMPQSolver.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/MPQSolver.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/MPQSolver.h [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/BisectionSolver.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/BisectionSolver.h [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/DepthParameterizer.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/DepthParameterizer.h [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/DepthParameterizer.test.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/ErrorApproximator.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/ErrorApproximator.h [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/ErrorApproximator.test.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/ErrorMetric.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/ErrorMetric.h [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/Evaluator.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/Evaluator.h [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/Quantizer.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/Quantizer.h [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/Quantizer.test.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/TestHelper.h [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.cpp [new file with mode: 0644]
compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.h [new file with mode: 0644]
compiler/circle-operator-test/src/circle-operator.test.cpp
compiler/circle-opselector/CMakeLists.txt
compiler/circle-opselector/driver/Driver.cpp
compiler/circle-opselector/src/Driver.test.cpp [deleted file]
compiler/circle-opselector/src/Driver.test.h [deleted file]
compiler/circle-opselector/src/OpSelector.cpp [new file with mode: 0644]
compiler/circle-opselector/src/OpSelector.h [new file with mode: 0644]
compiler/circle-opselector/src/OpSelector.test.cpp [new file with mode: 0644]
compiler/circle-opselector/src/SelectType.h [new file with mode: 0644]
compiler/circle-part-value-test/CMakeLists.txt
compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
compiler/circle-quantizer-dredd-recipe-test/README.md
compiler/circle-quantizer-dredd-recipe-test/test.lst
compiler/circle-quantizer/src/CircleQuantizer.cpp
compiler/circle2circle-dredd-recipe-test/README.md
compiler/circle2circle-dredd-recipe-test/test.lst
compiler/circle2circle-dredd-recipe-test/testall.sh
compiler/circle2circle/src/Circle2Circle.cpp
compiler/common-artifacts/CMakeLists.txt
compiler/common-artifacts/exclude.lst
compiler/dalgona-test/.gitignore [new file with mode: 0644]
compiler/dalgona-test/CMakeLists.txt [new file with mode: 0644]
compiler/dalgona-test/GenH5RandomInputs.py [new file with mode: 0644]
compiler/dalgona-test/RandomDataGenerator.py [new file with mode: 0644]
compiler/dalgona-test/SingleOperatorTest.py [new file with mode: 0644]
compiler/dalgona-test/TestSingleOp.sh [new file with mode: 0755]
compiler/dalgona-test/TestUtil.py [new file with mode: 0644]
compiler/dalgona-test/requires.cmake [new file with mode: 0644]
compiler/dalgona-test/test.lst [new file with mode: 0644]
compiler/dalgona/CMakeLists.txt [new file with mode: 0644]
compiler/dalgona/README.md [new file with mode: 0644]
compiler/dalgona/analysis/AnalysisTemplate.py [new file with mode: 0644]
compiler/dalgona/driver/Driver.cpp [new file with mode: 0644]
compiler/dalgona/include/Dalgona.h [new file with mode: 0644]
compiler/dalgona/include/PythonHooks.h [new file with mode: 0644]
compiler/dalgona/requires.cmake [new file with mode: 0644]
compiler/dalgona/src/Dalgona.cpp [new file with mode: 0644]
compiler/dalgona/src/PostOperatorHook.h [new file with mode: 0644]
compiler/dalgona/src/PreOperatorHook.h [new file with mode: 0644]
compiler/dalgona/src/PythonHooks.cpp [new file with mode: 0644]
compiler/dalgona/src/RandomUtils.cpp [new file with mode: 0644]
compiler/dalgona/src/RandomUtils.h [new file with mode: 0644]
compiler/dalgona/src/RandomUtils.test.cpp [new file with mode: 0644]
compiler/dalgona/src/StringUtils.cpp [new file with mode: 0644]
compiler/dalgona/src/StringUtils.h [new file with mode: 0644]
compiler/dalgona/src/StringUtils.test.cpp [new file with mode: 0644]
compiler/dalgona/src/Utils.cpp [new file with mode: 0644]
compiler/dalgona/src/Utils.h [new file with mode: 0644]
compiler/dredd-rule-lib/rule-lib.sh
compiler/loco/include/loco/IR/Graph.h
compiler/loco/src/IR/Graph.test.cpp
compiler/locomotiv/src/Node/TensorBroadcast.cpp
compiler/locomotiv/src/Node/TensorConstantPad.cpp
compiler/luci-compute/CMakeLists.txt [new file with mode: 0644]
compiler/luci-compute/README.md [new file with mode: 0644]
compiler/luci-eval-driver/CMakeLists.txt
compiler/luci-eval-driver/requires.cmake
compiler/luci-eval-driver/src/EvalDriver.cpp
compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
compiler/luci-interpreter/src/BuddyMemoryManager.cpp
compiler/luci-interpreter/src/core/KernelParams.h
compiler/luci-interpreter/src/kernels/Abs.cpp [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/Abs.h [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/Abs.test.cpp [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/ReduceProd.cpp [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/ReduceProd.h [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/SVDF.cpp
compiler/luci-interpreter/src/kernels/Tanh.cpp
compiler/luci-interpreter/src/kernels/TestUtils.h
compiler/luci-interpreter/src/kernels/TransposeConv.cpp
compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp [new file with mode: 0644]
compiler/luci-interpreter/src/kernels/Utils.cpp
compiler/luci-interpreter/src/kernels/Utils.h
compiler/luci-interpreter/src/loader/GraphLoader.cpp
compiler/luci-interpreter/src/loader/KernelBuilder.cpp
compiler/luci-interpreter/src/loader/nodes/Abs.cpp [new file with mode: 0644]
compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp [new file with mode: 0644]
compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp [new file with mode: 0644]
compiler/luci-micro/CMakeLists.txt [deleted file]
compiler/luci-micro/luci-interpreter/CMakeLists.txt [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h [deleted file]
compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake [deleted file]
compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst [deleted file]
compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake [deleted file]
compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst [deleted file]
compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h [deleted file]
compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake [deleted file]
compiler/luci-micro/luci-interpreter/requires.cmake [deleted file]
compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/CMakeLists.txt [deleted file]
compiler/luci-micro/luci-interpreter/src/Interpreter.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt [deleted file]
compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h [deleted file]
compiler/luci-micro/luci-interpreter/src/core/Kernel.h [deleted file]
compiler/luci-micro/luci-interpreter/src/core/KernelParams.h [deleted file]
compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h [deleted file]
compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h [deleted file]
compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt [deleted file]
compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/kernels/Utils.h [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp [deleted file]
compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp [deleted file]
compiler/luci-micro/requires.cmake [deleted file]
compiler/luci-micro/standalone/CMakeLists.txt [deleted file]
compiler/luci-pass-value-test/CMakeLists.txt
compiler/luci-pass-value-test/test.lst
compiler/luci-value-test/CMakeLists.txt
compiler/luci-value-test/luci_eval_verifier.py
compiler/luci-value-test/luci_eval_verifier_ref.py
compiler/luci-value-test/test.lst
compiler/luci/import/src/GraphBuilder.cpp
compiler/luci/import/src/GraphBuilderMultiOutput.cpp
compiler/luci/import/src/Nodes/CircleBidirectionalSequenceLSTM.cpp
compiler/luci/import/src/Nodes/CircleSVDF.cpp
compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
compiler/luci/import/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
compiler/luci/lang/include/luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h
compiler/luci/lang/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
compiler/luci/log/src/Log.cpp
compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
compiler/luci/partition/src/PartitionPModules.cpp
compiler/luci/pass/CMakeLists.txt
compiler/luci/pass/include/luci/CircleOptimizer.h
compiler/luci/pass/include/luci/Pass/FoldFullyConnectedPass.h [new file with mode: 0644]
compiler/luci/pass/include/luci/Pass/ForwardTransposeOpPass.h [new file with mode: 0644]
compiler/luci/pass/include/luci/Pass/FusePReluPass.h [new file with mode: 0644]
compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
compiler/luci/pass/include/luci/Pass/RemoveDuplicateConstPass.h [new file with mode: 0644]
compiler/luci/pass/include/luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h [new file with mode: 0644]
compiler/luci/pass/src/CircleOptimizer.cpp
compiler/luci/pass/src/CircleQuantizer.cpp
compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp
compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp
compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp
compiler/luci/pass/src/FoldFullyConnectedPass.cpp [new file with mode: 0644]
compiler/luci/pass/src/FoldFullyConnectedPass.test.cpp [new file with mode: 0644]
compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
compiler/luci/pass/src/ForwardTransposeOpPass.cpp [new file with mode: 0644]
compiler/luci/pass/src/ForwardTransposeOpPass.test.cpp [new file with mode: 0644]
compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp
compiler/luci/pass/src/FuseBCQPass.cpp
compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
compiler/luci/pass/src/FusePReluPass.cpp [new file with mode: 0644]
compiler/luci/pass/src/FusePReluPass.test.cpp [new file with mode: 0644]
compiler/luci/pass/src/QuantizationUtils.cpp
compiler/luci/pass/src/QuantizationUtils.h
compiler/luci/pass/src/QuantizeActivation.cpp
compiler/luci/pass/src/QuantizeActivation.h
compiler/luci/pass/src/QuantizeWeights.cpp
compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
compiler/luci/pass/src/QuantizedModelVerifier.h
compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
compiler/luci/pass/src/RemoveDuplicateConstPass.cpp [new file with mode: 0644]
compiler/luci/pass/src/RemoveDuplicateConstPass.test.cpp [new file with mode: 0644]
compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp
compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp
compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp
compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp
compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.cpp [new file with mode: 0644]
compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.test.cpp [new file with mode: 0644]
compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
compiler/luci/pass/src/helpers/NodeFiller.h
compiler/luci/pass/src/helpers/SparsityFormatConverter.h
compiler/luci/pass/src/helpers/Strings.cpp
compiler/luci/pass/src/helpers/Strings.h
compiler/luci/pass/src/helpers/Strings.test.cpp
compiler/luci/pass/src/test/TestIOGraph.h [deleted file]
compiler/luci/pass/src/test/TestIOGraph.test.cpp [deleted file]
compiler/luci/requires.cmake
compiler/luci/service/src/CircleShapeInferenceRule.cpp
compiler/luci/service/src/CircleTypeInferenceRule.cpp
compiler/luci/service/src/Nodes/CircleFullyConnected.cpp
compiler/luci/service/src/Validate.cpp
compiler/luci/tests/test.lst
compiler/mio-tflite280/CMakeLists.txt
compiler/nnc/passes/optimizations/ConstantFoldTranspose.cpp
compiler/nnc/tests/soft_backend/test_main.def
compiler/one-cmds/CMakeLists.txt
compiler/one-cmds/dummy-driver/CMakeLists.txt
compiler/one-cmds/dummy-driver/src/dummyEnv-compile.cpp [new file with mode: 0644]
compiler/one-cmds/how-to-prepare-virtualenv.txt
compiler/one-cmds/how-to-use-one-commands.txt
compiler/one-cmds/one-build
compiler/one-cmds/one-codegen
compiler/one-cmds/one-import
compiler/one-cmds/one-import-bcq
compiler/one-cmds/one-import-onnx
compiler/one-cmds/one-import-pytorch
compiler/one-cmds/one-import-tf
compiler/one-cmds/one-import-tflite
compiler/one-cmds/one-infer
compiler/one-cmds/one-init
compiler/one-cmds/one-optimize
compiler/one-cmds/one-pack
compiler/one-cmds/one-partition
compiler/one-cmds/one-prepare-venv
compiler/one-cmds/one-prepare-venv.u2204 [new file with mode: 0644]
compiler/one-cmds/one-profile
compiler/one-cmds/one-quantize
compiler/one-cmds/onecc
compiler/one-cmds/onecc.template.cfg
compiler/one-cmds/onelib/CfgRunner.py
compiler/one-cmds/onelib/Command.py [new file with mode: 0644]
compiler/one-cmds/onelib/WorkflowRunner.py
compiler/one-cmds/onelib/constant.py
compiler/one-cmds/onelib/export_constant.py [new file with mode: 0644]
compiler/one-cmds/onelib/make_cmd.py
compiler/one-cmds/onelib/utils.py [new file with mode: 0644]
compiler/one-cmds/tests/CMakeLists.txt
compiler/one-cmds/tests/one-build_001.test
compiler/one-cmds/tests/one-build_002.test
compiler/one-cmds/tests/one-build_003.test
compiler/one-cmds/tests/one-build_004.test
compiler/one-cmds/tests/one-build_005.test
compiler/one-cmds/tests/one-build_006.test
compiler/one-cmds/tests/one-build_007.test
compiler/one-cmds/tests/one-build_008.test
compiler/one-cmds/tests/one-build_009.test
compiler/one-cmds/tests/one-build_010.test
compiler/one-cmds/tests/one-build_011.test
compiler/one-cmds/tests/one-build_012.test
compiler/one-cmds/tests/one-build_013.test
compiler/one-cmds/tests/one-build_014.test
compiler/one-cmds/tests/one-build_neg_002.test
compiler/one-cmds/tests/one-build_neg_003.test
compiler/one-cmds/tests/one-build_neg_004.test
compiler/one-cmds/tests/one-build_neg_005.test
compiler/one-cmds/tests/one-build_neg_006.test
compiler/one-cmds/tests/one-build_neg_007.test
compiler/one-cmds/tests/one-build_neg_008.test
compiler/one-cmds/tests/one-build_neg_009.test
compiler/one-cmds/tests/one-codegen_001.test
compiler/one-cmds/tests/one-codegen_002.test
compiler/one-cmds/tests/one-codegen_003.test
compiler/one-cmds/tests/one-codegen_004.test
compiler/one-cmds/tests/one-codegen_neg_001.test
compiler/one-cmds/tests/one-import-bcq_001.test
compiler/one-cmds/tests/one-import-bcq_neg_001.test
compiler/one-cmds/tests/one-import-bcq_neg_002.test
compiler/one-cmds/tests/one-import-bcq_neg_003.test
compiler/one-cmds/tests/one-import-bcq_neg_004.test
compiler/one-cmds/tests/one-import-bcq_neg_005.test
compiler/one-cmds/tests/one-import-bcq_neg_006.test
compiler/one-cmds/tests/one-import-bcq_neg_007.test
compiler/one-cmds/tests/one-import-bcq_neg_008.test
compiler/one-cmds/tests/one-import-bcq_neg_009.test
compiler/one-cmds/tests/one-import-onnx_001.test
compiler/one-cmds/tests/one-import-onnx_002.test
compiler/one-cmds/tests/one-import_001.test
compiler/one-cmds/tests/one-import_002.cfg
compiler/one-cmds/tests/one-import_002.test
compiler/one-cmds/tests/one-import_003.test
compiler/one-cmds/tests/one-import_004.test
compiler/one-cmds/tests/one-import_005.test
compiler/one-cmds/tests/one-import_006.test
compiler/one-cmds/tests/one-import_neg_001.test
compiler/one-cmds/tests/one-import_neg_003.test
compiler/one-cmds/tests/one-import_neg_004.test
compiler/one-cmds/tests/one-import_neg_005.test
compiler/one-cmds/tests/one-import_neg_006.test
compiler/one-cmds/tests/one-import_neg_007.test
compiler/one-cmds/tests/one-import_neg_008.test
compiler/one-cmds/tests/one-import_neg_010.test
compiler/one-cmds/tests/one-infer_001.test
compiler/one-cmds/tests/one-infer_002.test
compiler/one-cmds/tests/one-infer_003.test
compiler/one-cmds/tests/one-infer_004.cfg [new file with mode: 0644]
compiler/one-cmds/tests/one-infer_004.test
compiler/one-cmds/tests/one-infer_005.cfg [deleted file]
compiler/one-cmds/tests/one-infer_005.test
compiler/one-cmds/tests/one-infer_006.test [deleted file]
compiler/one-cmds/tests/one-infer_neg_001.test
compiler/one-cmds/tests/one-infer_neg_002.test
compiler/one-cmds/tests/one-infer_neg_003.test
compiler/one-cmds/tests/one-infer_neg_004.test [deleted file]
compiler/one-cmds/tests/one-infer_neg_005.test [deleted file]
compiler/one-cmds/tests/one-optimize_001.test
compiler/one-cmds/tests/one-optimize_002.test
compiler/one-cmds/tests/one-optimize_003.test [new file with mode: 0644]
compiler/one-cmds/tests/one-optimize_neg_001.test
compiler/one-cmds/tests/one-optimize_neg_002.test
compiler/one-cmds/tests/one-optimize_neg_003.test
compiler/one-cmds/tests/one-pack_001.test
compiler/one-cmds/tests/one-partition_001.test
compiler/one-cmds/tests/one-partition_neg_001.test
compiler/one-cmds/tests/one-profile_001.test
compiler/one-cmds/tests/one-profile_002.test
compiler/one-cmds/tests/one-profile_003.test
compiler/one-cmds/tests/one-profile_004.test
compiler/one-cmds/tests/one-profile_neg_001.test
compiler/one-cmds/tests/one-quantize_001.test
compiler/one-cmds/tests/one-quantize_002.test
compiler/one-cmds/tests/one-quantize_003.test
compiler/one-cmds/tests/one-quantize_004.test
compiler/one-cmds/tests/one-quantize_005.test
compiler/one-cmds/tests/one-quantize_006.test
compiler/one-cmds/tests/one-quantize_007.test
compiler/one-cmds/tests/one-quantize_008.test
compiler/one-cmds/tests/one-quantize_009.test
compiler/one-cmds/tests/one-quantize_010.test
compiler/one-cmds/tests/one-quantize_011.test
compiler/one-cmds/tests/one-quantize_012.test
compiler/one-cmds/tests/one-quantize_013.test
compiler/one-cmds/tests/one-quantize_014.test
compiler/one-cmds/tests/one-quantize_015.test
compiler/one-cmds/tests/one-quantize_016.test [new file with mode: 0644]
compiler/one-cmds/tests/one-quantize_neg_001.test
compiler/one-cmds/tests/one-quantize_neg_002.test
compiler/one-cmds/tests/one-quantize_neg_003.test
compiler/one-cmds/tests/one-quantize_neg_004.test
compiler/one-cmds/tests/one-quantize_neg_005.test
compiler/one-cmds/tests/one-quantize_neg_006.test
compiler/one-cmds/tests/one-quantize_neg_007.test
compiler/one-cmds/tests/one-quantize_neg_008.test
compiler/one-cmds/tests/one-quantize_neg_009.test
compiler/one-cmds/tests/one-quantize_neg_010.test
compiler/one-cmds/tests/one-quantize_neg_011.test
compiler/one-cmds/tests/one-quantize_neg_012.test
compiler/one-cmds/tests/one-quantize_neg_013.test
compiler/one-cmds/tests/one-quantize_neg_014.test
compiler/one-cmds/tests/one-quantize_neg_015.test
compiler/one-cmds/tests/one-quantize_neg_016.test
compiler/one-cmds/tests/one-quantize_neg_017.test
compiler/one-cmds/tests/one-quantize_neg_018.test
compiler/one-cmds/tests/one-quantize_neg_019.test
compiler/one-cmds/tests/one-quantize_neg_020.test
compiler/one-cmds/tests/one-quantize_neg_021.test [new file with mode: 0644]
compiler/one-cmds/tests/onecc_001.cfg
compiler/one-cmds/tests/onecc_001.test
compiler/one-cmds/tests/onecc_002.cfg
compiler/one-cmds/tests/onecc_002.test
compiler/one-cmds/tests/onecc_003.cfg
compiler/one-cmds/tests/onecc_003.test
compiler/one-cmds/tests/onecc_004.cfg
compiler/one-cmds/tests/onecc_004.test
compiler/one-cmds/tests/onecc_005.cfg
compiler/one-cmds/tests/onecc_005.test
compiler/one-cmds/tests/onecc_006.cfg
compiler/one-cmds/tests/onecc_006.test
compiler/one-cmds/tests/onecc_007.cfg
compiler/one-cmds/tests/onecc_007.test
compiler/one-cmds/tests/onecc_008.test
compiler/one-cmds/tests/onecc_009.test
compiler/one-cmds/tests/onecc_010.test
compiler/one-cmds/tests/onecc_011.test
compiler/one-cmds/tests/onecc_012.cfg
compiler/one-cmds/tests/onecc_012.test
compiler/one-cmds/tests/onecc_013.cfg
compiler/one-cmds/tests/onecc_013.test
compiler/one-cmds/tests/onecc_014.cfg
compiler/one-cmds/tests/onecc_014.test
compiler/one-cmds/tests/onecc_015.test
compiler/one-cmds/tests/onecc_016.test
compiler/one-cmds/tests/onecc_017.test
compiler/one-cmds/tests/onecc_018.test
compiler/one-cmds/tests/onecc_019.test
compiler/one-cmds/tests/onecc_020.test
compiler/one-cmds/tests/onecc_021.test
compiler/one-cmds/tests/onecc_022.test
compiler/one-cmds/tests/onecc_023.test
compiler/one-cmds/tests/onecc_024.cfg
compiler/one-cmds/tests/onecc_024.test
compiler/one-cmds/tests/onecc_025.cfg
compiler/one-cmds/tests/onecc_025.test
compiler/one-cmds/tests/onecc_026.test
compiler/one-cmds/tests/onecc_027.cfg
compiler/one-cmds/tests/onecc_027.test
compiler/one-cmds/tests/onecc_028.test
compiler/one-cmds/tests/onecc_028.workflow.json
compiler/one-cmds/tests/onecc_029.test
compiler/one-cmds/tests/onecc_029.workflow.json
compiler/one-cmds/tests/onecc_030.test
compiler/one-cmds/tests/onecc_030.workflow.json
compiler/one-cmds/tests/onecc_031.test
compiler/one-cmds/tests/onecc_031.workflow.json
compiler/one-cmds/tests/onecc_032.test
compiler/one-cmds/tests/onecc_032.workflow.json
compiler/one-cmds/tests/onecc_033.test
compiler/one-cmds/tests/onecc_033.workflow.json
compiler/one-cmds/tests/onecc_034.test
compiler/one-cmds/tests/onecc_035.test
compiler/one-cmds/tests/onecc_036.test
compiler/one-cmds/tests/onecc_037.test
compiler/one-cmds/tests/onecc_037.workflow.json
compiler/one-cmds/tests/onecc_038.test
compiler/one-cmds/tests/onecc_038.workflow.json
compiler/one-cmds/tests/onecc_039.test
compiler/one-cmds/tests/onecc_040.cfg
compiler/one-cmds/tests/onecc_040.test
compiler/one-cmds/tests/onecc_041.test
compiler/one-cmds/tests/onecc_041.workflow.json
compiler/one-cmds/tests/onecc_042.cfg [new file with mode: 0644]
compiler/one-cmds/tests/onecc_042.test [new file with mode: 0644]
compiler/one-cmds/tests/onecc_043.cfg [new file with mode: 0644]
compiler/one-cmds/tests/onecc_043.test [new file with mode: 0644]
compiler/one-cmds/tests/onecc_044.cfg [new file with mode: 0644]
compiler/one-cmds/tests/onecc_044.test [new file with mode: 0644]
compiler/one-cmds/tests/onecc_045.cfg [new file with mode: 0644]
compiler/one-cmds/tests/onecc_045.test [new file with mode: 0644]
compiler/one-cmds/tests/onecc_neg_001.test
compiler/one-cmds/tests/onecc_neg_002.cfg
compiler/one-cmds/tests/onecc_neg_002.test
compiler/one-cmds/tests/onecc_neg_003.cfg
compiler/one-cmds/tests/onecc_neg_003.test
compiler/one-cmds/tests/onecc_neg_004.cfg
compiler/one-cmds/tests/onecc_neg_004.test
compiler/one-cmds/tests/onecc_neg_005.test
compiler/one-cmds/tests/onecc_neg_006.test
compiler/one-cmds/tests/onecc_neg_007.test
compiler/one-cmds/tests/onecc_neg_008.test
compiler/one-cmds/tests/onecc_neg_009.test
compiler/one-cmds/tests/onecc_neg_010.test
compiler/one-cmds/tests/onecc_neg_011.test
compiler/one-cmds/tests/onecc_neg_012.cfg
compiler/one-cmds/tests/onecc_neg_012.test
compiler/one-cmds/tests/onecc_neg_013.test
compiler/one-cmds/tests/onecc_neg_014.test
compiler/one-cmds/tests/onecc_neg_015.test
compiler/one-cmds/tests/onecc_neg_016.test
compiler/one-cmds/tests/onecc_neg_017.test
compiler/one-cmds/tests/onecc_neg_018.test
compiler/one-cmds/tests/onecc_neg_018.workflow.json
compiler/one-cmds/tests/onecc_neg_019.test
compiler/one-cmds/tests/onecc_neg_019.workflow.json
compiler/one-cmds/tests/onecc_neg_020.test
compiler/one-cmds/tests/onecc_neg_020.workflow.json
compiler/one-cmds/tests/onecc_neg_021.test
compiler/one-cmds/tests/onecc_neg_021.workflow.json
compiler/one-cmds/tests/onecc_neg_022.test
compiler/one-cmds/tests/onecc_neg_022.workflow.json
compiler/one-cmds/tests/onecc_neg_023.test
compiler/one-cmds/tests/onecc_neg_023.workflow.json
compiler/one-cmds/tests/onecc_neg_024.cfg [new file with mode: 0644]
compiler/one-cmds/tests/onecc_neg_024.test [new file with mode: 0644]
compiler/one-cmds/tests/onecc_neg_025.cfg [new file with mode: 0644]
compiler/one-cmds/tests/onecc_neg_025.test [new file with mode: 0644]
compiler/one-cmds/tests/onecc_neg_026.cfg [new file with mode: 0644]
compiler/one-cmds/tests/onecc_neg_026.test [new file with mode: 0644]
compiler/one-cmds/tests/prepare_test_materials.sh
compiler/one-cmds/tests/rawdata2hdf5_001.test
compiler/one-cmds/tests/rawdata2hdf5_neg_001.test
compiler/one-cmds/tests/rawdata2hdf5_neg_002.test
compiler/one-cmds/tests/rawdata2hdf5_neg_003.test
compiler/one-cmds/tests/rawdata2hdf5_neg_004.test
compiler/one-cmds/utils.py [deleted file]
compiler/onecc-docker/README.md [new file with mode: 0644]
compiler/onecc-docker/debian/changelog [new file with mode: 0644]
compiler/onecc-docker/debian/compat [new file with mode: 0644]
compiler/onecc-docker/debian/control [new file with mode: 0644]
compiler/onecc-docker/debian/copyright [new file with mode: 0644]
compiler/onecc-docker/debian/onecc-docker.install [new file with mode: 0644]
compiler/onecc-docker/debian/onecc-docker.links [new file with mode: 0644]
compiler/onecc-docker/debian/rules [new file with mode: 0644]
compiler/onecc-docker/docker/Dockerfile [new file with mode: 0644]
compiler/onecc-docker/onecc-docker [new file with mode: 0644]
compiler/pics/CMakeLists.txt [new file with mode: 0644]
compiler/pics/README.md [new file with mode: 0644]
compiler/pota-quantization-value-test/CMakeLists.txt
compiler/pota-quantization-value-test/requires.cmake
compiler/pota-quantization-value-test/test_parallel_record_minmax.sh [new file with mode: 0755]
compiler/record-minmax-conversion-test/CMakeLists.txt
compiler/record-minmax-thread-safety-test/CMakeLists.txt [new file with mode: 0644]
compiler/record-minmax-thread-safety-test/gen_h5_random_inputs.py [new file with mode: 0644]
compiler/record-minmax-thread-safety-test/requires.cmake [new file with mode: 0644]
compiler/record-minmax-thread-safety-test/test.lst [new file with mode: 0644]
compiler/record-minmax-thread-safety-test/testall.sh [new file with mode: 0755]
compiler/record-minmax/CMakeLists.txt
compiler/record-minmax/driver/Driver.cpp
compiler/record-minmax/include/MinMaxObserver.h
compiler/record-minmax/include/RecordFunction.h
compiler/record-minmax/include/RecordMinMax.h
compiler/record-minmax/src/RecordMinMax.cpp
compiler/souschef/src/Gaussian.cpp
compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt
compiler/tf2circle-value-pbtxt-remote-test/testall.sh
compiler/tf2circle/src/CustomopConfLoader.cpp
compiler/tf2tflite/src/CustomopConfLoader.cpp
compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
compiler/tflite2circle/src/DataLookup.cpp
compiler/tflite2circle/src/DataLookup.h
compiler/tflite2circle/src/TFLOperator.lst
compiler/vconone/CMakeLists.txt
compiler/visq-unittest/CMakeLists.txt [new file with mode: 0644]
compiler/visq-unittest/README.md [new file with mode: 0644]
compiler/visq-unittest/requires.cmake [new file with mode: 0644]
compiler/visq-unittest/test/__init__.py [new file with mode: 0644]
compiler/visq-unittest/test/testDotBuilder.py [new file with mode: 0644]
compiler/visq-unittest/test/testPalette.py [new file with mode: 0644]
compiler/visq-unittest/test/testQErrorComputer.py [new file with mode: 0644]
compiler/visq-unittest/test/testUtil.py [new file with mode: 0644]
compiler/visq/CMakeLists.txt [new file with mode: 0644]
compiler/visq/README.md [new file with mode: 0644]
compiler/visq/requires.cmake [new file with mode: 0644]
compiler/visq/visq [new file with mode: 0644]
compiler/visq/visqlib/DotBuilder.py [new file with mode: 0644]
compiler/visq/visqlib/DumpFP32FM.py [new file with mode: 0644]
compiler/visq/visqlib/DumpFakeQuantFM.py [new file with mode: 0644]
compiler/visq/visqlib/Palette.py [new file with mode: 0644]
compiler/visq/visqlib/QErrorComputer.py [new file with mode: 0644]
compiler/visq/visqlib/Util.py [new file with mode: 0644]
compute/cker/CMakeLists.txt
compute/cker/include/cker/operation/Dequantize.h
compute/cker/include/cker/operation/Logistic.h
compute/cker/include/cker/operation/Quantize.h
compute/cker/include/cker/operation/Reduce.h
compute/cker/include/cker/operation/Round.h
docs/conf.py
docs/howto/how-to-build-compiler.md
docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
docs/howto/how-to-build-runtime.md
docs/howto/how-to-contribute.md
docs/howto/how-to-cross-build-runtime-for-aarch64.md
docs/howto/how-to-cross-build-runtime-for-arm.md
docs/howto/how-to-use-specific-backend.md
docs/release/1.22/index.rst [new file with mode: 0644]
docs/release/1.22/release-note-1.22.0.md [new file with mode: 0644]
infra/cmake/modules/ExternalSourceTools.cmake
infra/cmake/modules/IdentifyPlatform.cmake
infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfig.cmake [new file with mode: 0644]
infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfigVersion.cmake [new file with mode: 0644]
infra/cmake/packages/GTestConfig.cmake
infra/cmake/packages/TensorFlowGpuConfig.cmake [deleted file]
infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch [deleted file]
infra/cmake/packages/TensorFlowGpuSourceConfig.cmake [deleted file]
infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt [deleted file]
infra/command/format
infra/command/gen-coverage-report
infra/debian/compiler/changelog
infra/debian/compiler/one-compiler-dev.install
infra/debian/compiler/one-compiler.install
infra/debian/compiler/rules
infra/debian/runtime/rules
infra/docker/bionic/Dockerfile
infra/docker/focal/Dockerfile
infra/docker/jammy/Dockerfile [new file with mode: 0644]
infra/doxygen/Doxyfile
infra/nncc/CMakeLists.txt
infra/nncc/Makefile.arm32
infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake
infra/nncc/config/docker.configuration
infra/nnfw/CMakeLists.txt
infra/nnfw/cmake/CfgOptionFlags.cmake
infra/nnfw/cmake/options/options_aarch64-android.cmake
infra/nnfw/cmake/options/options_aarch64-tizen.cmake
infra/nnfw/cmake/options/options_armv7hl-tizen.cmake
infra/nnfw/cmake/options/options_armv7l-linux.cmake
infra/nnfw/cmake/options/options_armv7l-tizen.cmake
infra/nnfw/cmake/options/options_x86_64-tizen.cmake
infra/nnfw/cmake/packages/ARMComputeConfig.cmake
infra/nnfw/cmake/packages/EigenConfig.cmake
infra/nnfw/cmake/packages/GObject2.0Config.cmake [new file with mode: 0644]
infra/nnfw/cmake/packages/GTestConfig.cmake
infra/nnfw/cmake/packages/Gio2.0Config.cmake [new file with mode: 0644]
infra/nnfw/cmake/packages/Giounix2.0Config.cmake [new file with mode: 0644]
infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfig.cmake [deleted file]
infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfigVersion.cmake [deleted file]
infra/nnfw/cmake/packages/TensorFlowGpuConfig.cmake [new file with mode: 0644]
infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt [deleted file]
infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake [deleted file]
infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfigVersion.cmake [deleted file]
infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt
infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake
infra/nnfw/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt [new file with mode: 0644]
infra/nnfw/command/prepare-model [new file with mode: 0644]
infra/nnfw/config/gbs.conf
infra/onert-micro/CMakeLists.txt [new file with mode: 0644]
infra/onert-micro/cmake/ApplyCompileFlags.cmake [new file with mode: 0644]
infra/onert-micro/cmake/CfgOptionFlags.cmake [new file with mode: 0644]
infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake [new file with mode: 0644]
infra/onert-micro/cmake/buildtool/config/config_linux.cmake [new file with mode: 0644]
infra/onert-micro/cmake/buildtool/config/config_x86_64-linux.cmake [new file with mode: 0644]
infra/onert-micro/cmake/options/options_armv7-r-generic.cmake [new file with mode: 0644]
infra/onert-micro/cmake/options/options_armv7em-generic.cmake [new file with mode: 0644]
infra/onert-micro/cmake/options/options_armv8-m-generic.cmake [new file with mode: 0644]
infra/onert-micro/cmake/options/options_x86_64-linux.cmake [new file with mode: 0644]
infra/onert-micro/utils.cmake [new file with mode: 0644]
infra/packaging/build
infra/packaging/preset/20220323
infra/packaging/preset/20220323_windows
infra/packaging/preset/20221125 [new file with mode: 0644]
infra/packaging/preset/20221125_windows [new file with mode: 0644]
infra/packaging/res/tf2nnpkg.20221125 [new file with mode: 0644]
infra/scripts/build_android_runtime_release.sh [deleted file]
infra/scripts/common.sh
infra/scripts/compiler_modules.sh
infra/scripts/docker_build_cross_aarch64_runtime.sh [deleted file]
infra/scripts/docker_build_cross_arm_runtime.sh [deleted file]
infra/scripts/docker_build_cross_arm_runtime_release.sh [deleted file]
infra/scripts/docker_build_cross_coverage.sh [deleted file]
infra/scripts/docker_build_test_x64.sh [deleted file]
infra/scripts/docker_build_tizen_cross.sh [deleted file]
infra/scripts/docker_build_tizen_gbs.sh [deleted file]
infra/scripts/docker_collect_nnpkg_resources.sh
infra/scripts/docker_coverage_report.sh [deleted file]
infra/scripts/test_arm_nnpkg.sh
infra/scripts/test_coverage.sh
infra/scripts/test_ubuntu_npud.sh [new file with mode: 0755]
infra/scripts/test_ubuntu_runtime.sh
infra/scripts/test_ubuntu_runtime_mixed.sh
infra/scripts/tizen_xu4_test.sh
onert-micro/CMakeLists.txt [new file with mode: 0644]
onert-micro/README.md [moved from compiler/luci-micro/README.md with 100% similarity]
onert-micro/eval-driver/CMakeLists.txt [new file with mode: 0644]
onert-micro/eval-driver/Driver.cpp [new file with mode: 0644]
onert-micro/helpers/GenerateKernelsListHelper.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/CMakeLists.txt [new file with mode: 0644]
onert-micro/luci-interpreter/README.md [moved from compiler/luci-micro/luci-interpreter/README.md with 100% similarity]
onert-micro/luci-interpreter/include/luci_interpreter/Interpreter.h [new file with mode: 0644]
onert-micro/luci-interpreter/include/luci_interpreter/InterpreterConfigure.h [new file with mode: 0644]
onert-micro/luci-interpreter/include/luci_interpreter/core/DataType.h [new file with mode: 0644]
onert-micro/luci-interpreter/include/luci_interpreter/core/ParamsType.h [new file with mode: 0644]
onert-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h [new file with mode: 0644]
onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReader.h [new file with mode: 0644]
onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReaderHelper.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALElu.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALFill.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALMul.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALNeg.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALSub.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h with 100% similarity]
onert-micro/luci-interpreter/pal/cmsisnn/PALUnidirectionalSequenceLSTM.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/PALreference_ops.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/cmsisnn/pal.cmake [new file with mode: 0644]
onert-micro/luci-interpreter/pal/linux/KernelsToBuild.lst [new file with mode: 0644]
onert-micro/luci-interpreter/pal/linux/PALArgMax.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALAveragePool2d.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALBatchMatMul.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALConv2d.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALDepthToSpace.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALDequantize.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALElu.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALFill.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/linux/PALFullyConnected.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALGather.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALL2Normalize.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALL2Pool2D.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALLeakyRelu.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALLogSoftmax.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALMul.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALNeg.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALQuantize.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALRelu.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALRelu6.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALResizeBilinear.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALSVDF.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/linux/PALSlice.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALSoftmax.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALSplit.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/PALSub.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h with 100% similarity]
onert-micro/luci-interpreter/pal/linux/pal.cmake [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALApplyActivationToVector.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALArgMax.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALConv2d.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h [moved from compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALDequantize.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALElu.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALFill.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALFullyConnected.h [moved from compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALL2Normalize.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALMul.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALNeg.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALQuantize.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALSVDF.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALSoftmax.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALSub.h [moved from compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h with 100% similarity]
onert-micro/luci-interpreter/pal/mcu/PALUnidirectionalSequenceLSTM.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/PALreference_ops.h [new file with mode: 0644]
onert-micro/luci-interpreter/pal/mcu/pal.cmake [new file with mode: 0644]
onert-micro/luci-interpreter/requires.cmake [moved from tests/tools/tflite_benchmark_model/.FORMATDENY with 100% similarity]
onert-micro/luci-interpreter/src/CMakeLists.txt [new file with mode: 0644]
onert-micro/luci-interpreter/src/Interpreter.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/core/CMakeLists.txt [new file with mode: 0644]
onert-micro/luci-interpreter/src/core/RuntimeGraph.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/core/RuntimeGraph.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/core/RuntimeModule.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/core/reader/CMakeLists.txt [new file with mode: 0644]
onert-micro/luci-interpreter/src/core/reader/CircleMicroReader.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/core/reader/CircleMicroReaderHelper.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Add.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Add.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Add.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Add.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/ArgMax.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/ArgMax.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/ArgMax.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/AveragePool2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/AveragePool2D.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/BatchMatMul.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/BatchMatMul.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/BinaryOpCommon.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Builders.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/CMakeLists.txt [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Cast.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Cast.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Cast.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Cast.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Concatenation.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Concatenation.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Conv2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Conv2D.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/DepthToSpace.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/DepthToSpace.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Dequantize.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Dequantize.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Dequantize.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Div.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Div.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Div.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Div.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Elu.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Elu.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Elu.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Elu.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Equal.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Equal.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Equal.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Equal.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Exp.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Exp.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Exp.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Exp.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/ExpandDims.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Fill.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Fill.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Fill.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Fill.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Floor.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Floor.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Floor.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Floor.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/FloorDiv.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/FloorDiv.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/FullyConnected.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Gather.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Gather.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Gather.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Gather.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Greater.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Greater.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Greater.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Greater.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/GreaterEqual.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/GreaterEqual.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/If.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/If.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/If.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/If.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/If.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/InstanceNorm.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/InstanceNorm.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/KernelBuilder.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/KernelBuilder.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/L2Normalize.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/L2Normalize.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/L2Pool2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/L2Pool2D.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LeakyRelu.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/LeakyRelu.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Less.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Less.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Less.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Less.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LessEqual.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/LessEqual.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LessEqual.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LogSoftmax.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/LogSoftmax.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LogicalAnd.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/LogicalAnd.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LogicalNot.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/LogicalNot.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LogicalOr.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/LogicalOr.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Logistic.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Logistic.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/MaxPool2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Maximum.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Maximum.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Maximum.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Mean.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Mean.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Mean.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Mean.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Minimum.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Minimum.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Minimum.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/MirrorPad.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/MirrorPad.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Mul.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Mul.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Mul.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Mul.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Neg.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Neg.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Neg.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Neg.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/NotEqual.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/NotEqual.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/NotEqual.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/OneHot.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/OneHot.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/OneHot.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/PRelu.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/PRelu.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/PRelu.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Pack.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Pack.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Pack.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Pack.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Pad.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Pad.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Pad.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Pad.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/PadV2.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/PadV2.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/PadV2.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Pow.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Pow.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Pow.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Pow.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Quantize.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Quantize.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Quantize.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Relu.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Relu.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Relu.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Relu.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Relu6.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Relu6.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Relu6.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Reshape.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Reshape.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/ResizeBilinear.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/ReverseV2.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/ReverseV2.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Rsqrt.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Rsqrt.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SVDF.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/SVDF.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SVDF.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Shape.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Shape.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Shape.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Shape.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Slice.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Slice.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Slice.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Slice.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Softmax.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Softmax.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/SpaceToDepth.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Split.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Split.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Split.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Split.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SplitV.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/SplitV.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SplitV.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Sqrt.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Sqrt.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Sqrt.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Square.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Square.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Square.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Square.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SquaredDifference.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/SquaredDifference.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Squeeze.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Squeeze.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Squeeze.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/StridedSlice.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/StridedSlice.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Sub.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Sub.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Sub.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Sub.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Tanh.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Tanh.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Tanh.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/TestUtils.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/TestUtils.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Transpose.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Transpose.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Transpose.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/TransposeConv.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/TransposeConv.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Unpack.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Unpack.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Unpack.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/Utils.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/Utils.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/kernels/While.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/While.cpp with 100% similarity]
onert-micro/luci-interpreter/src/kernels/While.h [moved from compiler/luci-micro/luci-interpreter/src/kernels/While.h with 100% similarity]
onert-micro/luci-interpreter/src/kernels/While.test.cpp [moved from compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp with 100% similarity]
onert-micro/luci-interpreter/src/loader/CMakeLists.txt [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/GraphLoader.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/GraphLoader.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/ModuleLoader.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/ModuleLoader.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Add.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Cast.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Div.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Elu.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Equal.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Exp.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Fill.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Floor.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Gather.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Greater.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/If.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Less.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Logistic.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Maximum.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Mean.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Minimum.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Mul.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Neg.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/OneHot.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/PRelu.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Pack.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Pad.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/PadV2.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Pow.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Quantize.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Relu.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Relu6.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Reshape.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/SVDF.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Shape.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Slice.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Softmax.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Split.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/SplitV.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Square.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Sub.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Tanh.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Transpose.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/Unpack.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/loader/nodes/While.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.test.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/CMakeLists.txt [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.h [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.cpp [new file with mode: 0644]
onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.h [new file with mode: 0644]
onert-micro/requires.cmake [new file with mode: 0644]
onert-micro/standalone/CMakeLists.txt [new file with mode: 0644]
onert-micro/tests/mbed-os/CMakeLists.txt [new file with mode: 0644]
onert-micro/tests/mbed-os/main.cpp [new file with mode: 0644]
onert-micro/tests/mbed-os/mbed-sources.cmake [new file with mode: 0644]
onert-micro/tests/mbed-os/mbed_config.h [new file with mode: 0644]
onert-micro/tests/mbed-os/startup_stm32h743xx.S [new file with mode: 0644]
packaging/EGL_HEADERS.tar.gz [new file with mode: 0644]
packaging/FLATBUFFERS-1.12.tar.gz [deleted file]
packaging/FLATBUFFERS-2.0.tar.gz [new file with mode: 0644]
packaging/OPENGL_HEADERS.tar.gz [new file with mode: 0644]
packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz [deleted file]
packaging/TENSORFLOW-2.8.0-EIGEN.tar.gz [new file with mode: 0644]
packaging/TENSORFLOW-2.8.0-GEMMLOWP.tar.gz [new file with mode: 0644]
packaging/TENSORFLOW-2.8.0.tar.gz [new file with mode: 0644]
packaging/TENSORFLOW_GPU.tar.gz [deleted file]
packaging/VULKAN.tar.gz [new file with mode: 0644]
packaging/XNNPACK.tar.gz
packaging/nnfw.spec
res/PyTorchExamples/examples/PReLUwConv1d/__init__.py [new file with mode: 0644]
res/PyTorchExamples/examples/PReLUwConv2d/__init__.py [new file with mode: 0644]
res/PyTorchExamples/ptem.py
res/TensorFlowLiteRecipes/FullyConnected_008/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/FullyConnected_008/test.reverse [new file with mode: 0644]
res/TensorFlowLiteRecipes/FullyConnected_008/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/FullyConnected_009/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/FullyConnected_009/test.reverse [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.reverse [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.reverse [new file with mode: 0644]
res/TensorFlowLiteRecipes/Quant_Conv_005/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Quant_Conv_005/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/Quant_Conv_006/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Quant_Conv_006/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.reverse [new file with mode: 0644]
res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.rule [new file with mode: 0644]
res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.recipe [new file with mode: 0644]
res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.rule [new file with mode: 0644]
res/TensorFlowPythonExamples/.gitignore
res/TensorFlowPythonExamples/README.md
res/TensorFlowPythonExamples/examples/GRU_unroll/__init__.py [new file with mode: 0644]
res/TensorFlowPythonExamples/examples/LSTM_batsize/__init__.py [new file with mode: 0644]
res/TensorFlowPythonExamples/examples/LSTM_retseq/__init__.py [new file with mode: 0644]
res/TensorFlowPythonExamples/examples/LSTM_unroll/__init__.py [new file with mode: 0644]
res/TensorFlowPythonExamples/examples/RNN_GRUCell_unroll/__init__.py [new file with mode: 0644]
res/TensorFlowPythonExamples/examples/RNN_LSTMCell_unroll/__init__.py [new file with mode: 0644]
res/TensorFlowPythonExamples/examples/SimpleRNN_unroll/__init__.py [new file with mode: 0644]
res/TensorFlowPythonExamples/tfpem.py
runtime/contrib/android/api/build.gradle
runtime/contrib/android_tflite/CMakeLists.txt [deleted file]
runtime/contrib/android_tflite/builtin_ops_jni.cc [deleted file]
runtime/contrib/labs/opencl_test/src/opencl_test.cc
runtime/libs/benchmark/include/benchmark/Result.h
runtime/libs/benchmark/src/MemoryPoller.cpp
runtime/libs/benchmark/src/Phases.cpp
runtime/libs/misc/CMakeLists.txt
runtime/libs/ndarray/CMakeLists.txt
runtime/libs/profiling/src/profiling/time.cpp
runtime/libs/tflite/CMakeLists.txt
runtime/libs/tflite/include/tflite/CopyInputInitializer.h [deleted file]
runtime/libs/tflite/include/tflite/Diff.h
runtime/libs/tflite/include/tflite/FeatureView.h [deleted file]
runtime/libs/tflite/include/tflite/InterpreterSession.h
runtime/libs/tflite/include/tflite/NNAPISession.h [deleted file]
runtime/libs/tflite/include/tflite/OutputIndex.h [deleted file]
runtime/libs/tflite/include/tflite/OutputResetter.h [deleted file]
runtime/libs/tflite/include/tflite/Quantization.h [deleted file]
runtime/libs/tflite/include/tflite/RandomInputInitializer.h
runtime/libs/tflite/include/tflite/RandomTestRunner.h [deleted file]
runtime/libs/tflite/include/tflite/Session.h
runtime/libs/tflite/include/tflite/TensorLogger.h [deleted file]
runtime/libs/tflite/include/tflite/TensorUtils.h [deleted file]
runtime/libs/tflite/include/tflite/TensorView.h
runtime/libs/tflite/include/tflite/interp/Builder.h [deleted file]
runtime/libs/tflite/include/tflite/interp/FlatBufferBuilder.h [deleted file]
runtime/libs/tflite/include/tflite/interp/FunctionBuilder.h [deleted file]
runtime/libs/tflite/src/CopyInputInitializer.cpp [deleted file]
runtime/libs/tflite/src/Diff.cpp
runtime/libs/tflite/src/FeatureView.cpp [deleted file]
runtime/libs/tflite/src/OutputResetter.cpp [deleted file]
runtime/libs/tflite/src/Quantization.cpp [deleted file]
runtime/libs/tflite/src/RandomInputInitializer.cpp
runtime/libs/tflite/src/RandomTestRunner.cpp [deleted file]
runtime/libs/tflite/src/interp/FlatBufferBuilder.cpp [deleted file]
runtime/libs/tflite/src/interp/FunctionBuilder.cpp [deleted file]
runtime/onert/api/include/nnfw_version.h
runtime/onert/api/src/nnfw_api_internal.cc
runtime/onert/api/src/nnfw_api_internal.h
runtime/onert/backend/acl_cl/KernelGenerator.cc
runtime/onert/backend/acl_common/AclTensorBuilder.h
runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
runtime/onert/backend/cpu/BackendContext.cc
runtime/onert/backend/cpu/KernelGenerator.cc
runtime/onert/backend/gpu_cl/Backend.h
runtime/onert/backend/gpu_cl/BackendContext.cc
runtime/onert/backend/gpu_cl/BackendContext.h
runtime/onert/backend/gpu_cl/CMakeLists.txt
runtime/onert/backend/gpu_cl/ClFunction.h
runtime/onert/backend/gpu_cl/Config.h
runtime/onert/backend/gpu_cl/KernelGenerator.cc
runtime/onert/backend/gpu_cl/KernelGenerator.h
runtime/onert/backend/gpu_cl/MemoryManager.h
runtime/onert/backend/gpu_cl/TensorBuilder.cc
runtime/onert/backend/gpu_cl/TensorBuilder.h
runtime/onert/backend/gpu_cl/TensorBuilderHelper.h [deleted file]
runtime/onert/backend/gpu_cl/TensorManager.cc
runtime/onert/backend/gpu_cl/TensorManager.h
runtime/onert/backend/gpu_cl/TensorRegistry.h
runtime/onert/backend/gpu_cl/Utils.h [new file with mode: 0644]
runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h [deleted file]
runtime/onert/backend/gpu_cl/operand/CLTensor.cc
runtime/onert/backend/gpu_cl/operand/CLTensor.h
runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
runtime/onert/backend/gpu_cl/operand/ICLTensor.h
runtime/onert/backend/ruy/BackendContext.cc
runtime/onert/backend/trix/BackendContext.cc
runtime/onert/backend/trix/BatchThreadPool.cc [new file with mode: 0644]
runtime/onert/backend/trix/BatchThreadPool.h [new file with mode: 0644]
runtime/onert/backend/trix/Convert.cc [new file with mode: 0644]
runtime/onert/backend/trix/Convert.h [new file with mode: 0644]
runtime/onert/backend/trix/DevContext.cc [new file with mode: 0644]
runtime/onert/backend/trix/DevContext.h
runtime/onert/backend/trix/KernelGenerator.cc
runtime/onert/backend/trix/ops/BulkLayer.cc
runtime/onert/backend/trix/ops/BulkLayer.h
runtime/onert/backend/xnnpack/BackendContext.cc
runtime/onert/core/CMakeLists.txt
runtime/onert/core/include/backend/basic/BackendContextHelpers.h
runtime/onert/core/include/compiler/Compiler.h
runtime/onert/core/include/compiler/CompilerFactory.h [new file with mode: 0644]
runtime/onert/core/include/compiler/CompilerOptions.h [new file with mode: 0644]
runtime/onert/core/include/compiler/ICompiler.h [new file with mode: 0644]
runtime/onert/core/include/compiler/LoweredGraph.h
runtime/onert/core/include/compiler/StaticShapeInferer.h
runtime/onert/core/include/exec/Execution.h
runtime/onert/core/include/exec/Executors.h [deleted file]
runtime/onert/core/include/exec/FunctionSequence.h
runtime/onert/core/include/exec/IExecutor.h
runtime/onert/core/include/exec/IExecutors.h [new file with mode: 0644]
runtime/onert/core/include/ir/Graph.h
runtime/onert/core/include/ir/Index.h
runtime/onert/core/include/ir/NNPkg.h
runtime/onert/core/include/ir/OperandIndexSequence.h
runtime/onert/core/include/ir/Shape.h
runtime/onert/core/include/util/Config.lst
runtime/onert/core/include/util/Index.h
runtime/onert/core/include/util/ObjectManager.h
runtime/onert/core/include/util/Utils.h
runtime/onert/core/src/backend/basic/MemoryManager.cc
runtime/onert/core/src/backend/basic/MemoryPlanner.cc
runtime/onert/core/src/backend/basic/StaticTensorManager.cc
runtime/onert/core/src/backend/builtin/BackendContext.cc
runtime/onert/core/src/backend/builtin/IOTensor.h
runtime/onert/core/src/backend/builtin/KernelGenerator.cc
runtime/onert/core/src/backend/builtin/KernelGenerator.h
runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
runtime/onert/core/src/compiler/Compiler.cc
runtime/onert/core/src/compiler/CompilerFactory.cc [new file with mode: 0644]
runtime/onert/core/src/compiler/CompilerOptions.cc [new file with mode: 0644]
runtime/onert/core/src/compiler/ExecutorFactory.cc
runtime/onert/core/src/compiler/ExecutorFactory.h
runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
runtime/onert/core/src/compiler/HEScheduler.cc
runtime/onert/core/src/compiler/HEScheduler.test.cc
runtime/onert/core/src/compiler/LoweredGraph.cc
runtime/onert/core/src/compiler/ManualScheduler.cc
runtime/onert/core/src/compiler/MultiModelCompiler.cc [new file with mode: 0644]
runtime/onert/core/src/compiler/MultiModelCompiler.h [new file with mode: 0644]
runtime/onert/core/src/compiler/StaticShapeInferer.cc
runtime/onert/core/src/compiler/TensorRegistries.h
runtime/onert/core/src/compiler/pass/OddOutputPass.cc
runtime/onert/core/src/compiler/pass/PassRunner.cc
runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
runtime/onert/core/src/exec/Execution.cc
runtime/onert/core/src/exec/Execution.test.cc
runtime/onert/core/src/exec/ExecutionObservee.cc
runtime/onert/core/src/exec/ExecutionObservers.h
runtime/onert/core/src/exec/ExecutorBase.cc
runtime/onert/core/src/exec/ExecutorBase.h
runtime/onert/core/src/exec/Executors.cc
runtime/onert/core/src/exec/Executors.h [new file with mode: 0644]
runtime/onert/core/src/exec/IPermuteFunction.cc [new file with mode: 0644]
runtime/onert/core/src/exec/IPermuteFunction.h
runtime/onert/core/src/exec/IPermuteFunction.test.cc [new file with mode: 0644]
runtime/onert/core/src/exec/ParallelScheduler.cc
runtime/onert/core/src/exec/SingleModelExecutors.cc [new file with mode: 0644]
runtime/onert/core/src/exec/SingleModelExecutors.h [new file with mode: 0644]
runtime/onert/core/src/exec/ThreadPool.cc
runtime/onert/core/src/interp/Buffer.h [deleted file]
runtime/onert/core/src/interp/ExecEnv.h [deleted file]
runtime/onert/core/src/interp/InterpExecutor.cc [deleted file]
runtime/onert/core/src/interp/InterpExecutor.h [deleted file]
runtime/onert/core/src/interp/InterpExecutor.test.cc [deleted file]
runtime/onert/core/src/interp/InterpOps.lst [deleted file]
runtime/onert/core/src/interp/Interpreter.cc [deleted file]
runtime/onert/core/src/interp/Interpreter.h [deleted file]
runtime/onert/core/src/interp/Registration.h [deleted file]
runtime/onert/core/src/interp/Tensor.cc [deleted file]
runtime/onert/core/src/interp/Tensor.h [deleted file]
runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc [deleted file]
runtime/onert/core/src/interp/operations/Concat.cc [deleted file]
runtime/onert/core/src/interp/operations/Conv2D.cc [deleted file]
runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc [deleted file]
runtime/onert/core/src/interp/operations/ElementwiseActivations.cc [deleted file]
runtime/onert/core/src/interp/operations/FullyConnected.cc [deleted file]
runtime/onert/core/src/interp/operations/Gather.cc [deleted file]
runtime/onert/core/src/interp/operations/InstanceNorm.cc [deleted file]
runtime/onert/core/src/interp/operations/OperationUtil.h [deleted file]
runtime/onert/core/src/interp/operations/Pad.cc [deleted file]
runtime/onert/core/src/interp/operations/Pool2D.cc [deleted file]
runtime/onert/core/src/interp/operations/Reshape.cc [deleted file]
runtime/onert/core/src/interp/operations/Softmax.cc [deleted file]
runtime/onert/core/src/interp/operations/TransposeConv.cc [deleted file]
runtime/onert/core/src/ir/Shape.cc
runtime/onert/core/src/ir/Shape.test.cc
runtime/onert/core/src/util/ChromeTracingEventWriter.cc
runtime/onert/core/src/util/MDTableEventWriter.cc
runtime/onert/core/src/util/SNPEEventWriter.cc
runtime/onert/core/src/util/ShapeInference.cc
runtime/onert/frontend/base_loader/include/base_loader.h
runtime/onert/frontend/circle/src/circle_loader.cc
runtime/onert/frontend/nnapi/CMakeLists.txt
runtime/onert/frontend/nnapi/compilation.cc
runtime/onert/frontend/nnapi/execution.cc
runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
runtime/onert/frontend/tflite/src/tflite_loader.cc
runtime/service/npud/CMakeLists.txt
runtime/service/npud/backend/CMakeLists.txt [new file with mode: 0644]
runtime/service/npud/backend/trix/CMakeLists.txt [new file with mode: 0644]
runtime/service/npud/backend/trix/TrixBackend.cc [new file with mode: 0644]
runtime/service/npud/backend/trix/TrixBackend.h [new file with mode: 0644]
runtime/service/npud/core/Backend.h [new file with mode: 0644]
runtime/service/npud/core/CMakeLists.txt [new file with mode: 0644]
runtime/service/npud/core/ContextManager.cc [new file with mode: 0644]
runtime/service/npud/core/ContextManager.h [new file with mode: 0644]
runtime/service/npud/core/Core.cc [new file with mode: 0644]
runtime/service/npud/core/Core.h [new file with mode: 0644]
runtime/service/npud/core/DBus.cc [new file with mode: 0644]
runtime/service/npud/core/DBus.h [new file with mode: 0644]
runtime/service/npud/core/DevManager.cc [new file with mode: 0644]
runtime/service/npud/core/DevManager.h [new file with mode: 0644]
runtime/service/npud/core/DynamicLoader.cc [new file with mode: 0644]
runtime/service/npud/core/DynamicLoader.h [new file with mode: 0644]
runtime/service/npud/core/Server.cc
runtime/service/npud/core/Server.h
runtime/service/npud/core/Signal.cc
runtime/service/npud/core/ir/DataType.h [new file with mode: 0644]
runtime/service/npud/core/ir/Layout.h [new file with mode: 0644]
runtime/service/npud/core/main.cc
runtime/service/npud/core/util/Config.lst [new file with mode: 0644]
runtime/service/npud/core/util/ConfigSource.cc [moved from runtime/service/npud/util/ConfigSource.cc with 100% similarity]
runtime/service/npud/core/util/ConfigSource.h [moved from runtime/service/npud/util/ConfigSource.h with 100% similarity]
runtime/service/npud/core/util/Logging.h [moved from runtime/service/npud/util/Logging.h with 100% similarity]
runtime/service/npud/org.tizen.npud.conf [new file with mode: 0644]
runtime/service/npud/org.tizen.npud.xml [new file with mode: 0644]
runtime/service/npud/tests/CMakeLists.txt [new file with mode: 0644]
runtime/service/npud/tests/core/DBus.test.cc [new file with mode: 0644]
runtime/service/npud/tests/core/Server.test.cc [new file with mode: 0644]
runtime/service/npud/tests/core/Signal.test.cc [new file with mode: 0644]
runtime/service/npud/util/Config.lst [deleted file]
tests/nnapi/CMakeLists.txt
tests/nnapi/include/NeuralNetworksWrapper.h
tests/nnapi/nnapi_gtest.skip.noarch.interp [deleted file]
tests/nnapi/src/TestNeuralNetworksWrapper.h
tests/nnfw_api/CMakeLists.txt
tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc
tests/nnfw_api/src/ValidationTestPipelineSession.test.cc [deleted file]
tests/scripts/README.md
tests/scripts/benchmark.sh
tests/scripts/benchmark_nnapi.sh [deleted file]
tests/scripts/benchmark_nnpkg.sh
tests/scripts/benchmark_ops.sh [new file with mode: 0755]
tests/scripts/command/nnpkg-test
tests/scripts/command/prepare-model
tests/scripts/command/unittest
tests/scripts/command/verify-tflite
tests/scripts/common.sh
tests/scripts/list/nnapi_test.aarch64.list [deleted file]
tests/scripts/list/nnapi_test.armv7l.list [deleted file]
tests/scripts/list/nnpkg_test_list.noarch.interp [deleted file]
tests/scripts/list/tflite_comparator.aarch64.acl_cl.list
tests/scripts/list/tflite_comparator.aarch64.acl_neon.list
tests/scripts/list/tflite_comparator.armv7l.acl_cl.list
tests/scripts/list/tflite_comparator.armv7l.acl_neon.list
tests/scripts/list/tflite_comparator.armv7l.cpu.list
tests/scripts/list/tflite_comparator.noarch.interp.list [deleted file]
tests/scripts/list/tflite_comparator.x86_64.cpu.list
tests/scripts/merge_result_of_benchmark_nnpkg.py
tests/scripts/models/run_test.sh
tests/scripts/onert-test
tests/scripts/test-driver.sh [deleted file]
tests/scripts/test_framework.sh [deleted file]
tests/scripts/test_scheduler_with_profiling.sh
tests/tools/nnapi_test/CMakeLists.txt [deleted file]
tests/tools/nnapi_test/src/args.cc [deleted file]
tests/tools/nnapi_test/src/args.h [deleted file]
tests/tools/nnapi_test/src/nnapi_test.cc [deleted file]
tests/tools/nnpackage_run/CMakeLists.txt [deleted file]
tests/tools/nnpackage_run/README.md [deleted file]
tests/tools/nnpackage_run/src/allocation.h [deleted file]
tests/tools/nnpackage_run/src/args.cc [deleted file]
tests/tools/nnpackage_run/src/args.h [deleted file]
tests/tools/nnpackage_run/src/formatter.h [deleted file]
tests/tools/nnpackage_run/src/h5formatter.cc [deleted file]
tests/tools/nnpackage_run/src/h5formatter.h [deleted file]
tests/tools/nnpackage_run/src/nnfw_util.cc [deleted file]
tests/tools/nnpackage_run/src/nnfw_util.h [deleted file]
tests/tools/nnpackage_run/src/nnpackage_run.cc [deleted file]
tests/tools/nnpackage_run/src/randomgen.cc [deleted file]
tests/tools/nnpackage_run/src/randomgen.h [deleted file]
tests/tools/nnpackage_run/src/rawformatter.cc [deleted file]
tests/tools/nnpackage_run/src/rawformatter.h [deleted file]
tests/tools/nnpackage_run/src/types.h [deleted file]
tests/tools/onert_run/CMakeLists.txt [new file with mode: 0644]
tests/tools/onert_run/README.md [new file with mode: 0644]
tests/tools/onert_run/src/allocation.h [new file with mode: 0644]
tests/tools/onert_run/src/args.cc [new file with mode: 0644]
tests/tools/onert_run/src/args.h [new file with mode: 0644]
tests/tools/onert_run/src/formatter.h [new file with mode: 0644]
tests/tools/onert_run/src/h5formatter.cc [new file with mode: 0644]
tests/tools/onert_run/src/h5formatter.h [new file with mode: 0644]
tests/tools/onert_run/src/nnfw_util.cc [new file with mode: 0644]
tests/tools/onert_run/src/nnfw_util.h [new file with mode: 0644]
tests/tools/onert_run/src/onert_run.cc [new file with mode: 0644]
tests/tools/onert_run/src/randomgen.cc [new file with mode: 0644]
tests/tools/onert_run/src/randomgen.h [new file with mode: 0644]
tests/tools/onert_run/src/rawformatter.cc [new file with mode: 0644]
tests/tools/onert_run/src/rawformatter.h [new file with mode: 0644]
tests/tools/onert_run/src/types.h [new file with mode: 0644]
tests/tools/tflite_benchmark_model/CMakeLists.txt [deleted file]
tests/tools/tflite_benchmark_model/README.md [deleted file]
tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc [deleted file]
tests/tools/tflite_benchmark_model/profile_summarizer.cc [deleted file]
tests/tools/tflite_benchmark_model/stats_calculator.cc [deleted file]
tests/tools/tflite_comparator/src/tflite_comparator.cc
tests/tools/tflite_run/CMakeLists.txt
tests/tools/tflite_run/src/tensor_dumper.cc
tests/tools/tflite_run/src/tensor_dumper.h
tests/tools/tflite_run/src/tensor_loader.cc
tests/tools/tflite_run/src/tensor_loader.h
tests/tools/tflite_run/src/tflite_run.cc
tests/tools/tflite_vanilla_run/CMakeLists.txt [deleted file]
tests/tools/tflite_vanilla_run/src/args.cc [deleted file]
tests/tools/tflite_vanilla_run/src/args.h [deleted file]
tests/tools/tflite_vanilla_run/src/tensor_view.h [deleted file]
tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc [deleted file]
tools/nnpackage_tool/model2nnpkg/README.md
tools/nnpackage_tool/model2nnpkg/model2nnpkg.py [new file with mode: 0755]
tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/README.md
tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh
tools/nnpackage_tool/qnf/qnf.md [new file with mode: 0644]
tools/nnpackage_tool/qnf/qnf.py [new file with mode: 0644]
tools/nnpackage_tool/qnf/requirements.txt [new file with mode: 0644]
tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.sh
tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.sh
tools/pareto_profiler/README.md
tools/pareto_profiler/estimator/brute_force_profiler.py
tools/pareto_profiler/estimator/profile_args.py
tools/pareto_profiler/estimator/runner.py
tools/stab/remote.py
tools/tflitefile_tool/README.md
tools/tflitefile_tool/parser/tflite/tflite_tensor.py
tools/tflitefile_tool/select_operator.py

index 73ec548..154fbf9 100644 (file)
@@ -8,6 +8,7 @@ test:
       - /runtime/libs/misc
       - /runtime/libs/ndarray
       - /runtime/onert
+      - /runtime/service/npud/tests
       - /tests/nnfw_api
 
     testFile:
index 3ef12ef..d106893 100644 (file)
@@ -7,10 +7,11 @@ res/** linguist-detectable=false
 * text eol=lf
 
 # Binary - ignore text file setting
-*.caffemodel -text
-*.png -text
-*.pdf -text
-*.h5 -text
-*.tar.gz -text
-*.tflite -text
-*.bmp -text
+*.bmp binary
+*.caffemodel binary
+*.h5 binary
+*.jar binary
+*.pdf binary
+*.png binary
+*.tar.gz binary
+*.tflite binary
index bcbc3c5..dcfb8d5 100644 (file)
@@ -21,10 +21,10 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Setup python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
           python-version: '3.x'
 
@@ -54,7 +54,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
         with:
           # Fetch all history and branch (default: 1)
           # Require all history to get file creation date
index a3f4c1c..7fa84b1 100644 (file)
@@ -24,7 +24,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
         with:
           # Checkout PR head commit
           # Checkout Action use merge commit as default
diff --git a/.github/workflows/deploy-github-pages.yml b/.github/workflows/deploy-github-pages.yml
new file mode 100644 (file)
index 0000000..d474a27
--- /dev/null
@@ -0,0 +1,40 @@
+name: Build and deploy github page
+
+on:
+  schedule:
+    # Every weeks
+    - cron: '30 19 * * SUN'
+  workflow_dispatch:
+    inputs:
+      publish:
+        description: 'Push to github page branch or not'
+        required: true
+        default: false
+        type: boolean
+
+jobs:
+  build_and_deploy:
+    name: 'Deploy doxygen page'
+    runs-on: 'ubuntu-latest'
+    if: github.repository_owner == 'Samsung'
+
+    steps:
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+      - name: 'Generate HTML'
+        uses: mattnotmitt/doxygen-action@v1.9
+        with:
+          doxyfile-path: 'infra/doxygen/Doxyfile'
+      - name: 'Tar artifact'
+        run: tar -zcf doxygen.tar.gz -C doxygen/html ./
+      - name: 'Generate artifact'
+        uses: actions/upload-artifact@v3
+        with:
+          name: doxygen-html
+          path: doxygen.tar.gz
+      - name: 'Deploy'
+        if: ${{ github.event_name == 'schedule' || github.event.inputs.publish == 'true' }}
+        uses: JamesIves/github-pages-deploy-action@v4
+        with:
+          folder: doxygen/html
+          branch: gh-pages
index 1f194fa..2e5d0bd 100644 (file)
@@ -1,3 +1,5 @@
+#!/usr/bin/make -f
+
 HOST_ARCH?=$(shell uname -m)
 TARGET_ARCH?=$(shell uname -m)
 BUILD_TYPE?=Debug
@@ -5,7 +7,6 @@ CROSS_BUILD?=0
 HOST_OS?=linux
 TARGET_OS?=linux
 COVERAGE_BUILD?=0
-BENCHMARK_ACL_BUILD?=0
 OPTIONS?=
 
 # make TARGET and TYPE to lowercase
@@ -22,9 +23,8 @@ else ifneq (,$(findstring aarch64,$(TARGET_ARCH_BASE)))
        TARGET_ARCH_LC=aarch64
 endif
 ifneq (,$(findstring android,$(TARGET_OS)))
-       # Anndroid only allow aarch64 target-arch
+       # Android only allow aarch64 target-arch
        TARGET_ARCH_LC=aarch64
-       TARGET_OS=android
 endif
 # Set CROSS_BUILD=1 when ROOTFS_DIR is given, and TARGET_ARCH is different to HOST_ARCH.
 ifneq ($(ROOTFS_DIR),)
@@ -38,19 +38,14 @@ ifeq ($(CROSS_BUILD),1)
        OPTIONS+= -DCMAKE_TOOLCHAIN_FILE=$(TOOLCHAIN_FILE)
 endif
 
-ifeq ($(COVERAGE_BUILD),1)
+ifneq ($(filter create-covsuite,$(MAKECMDGOALS)),)
        OPTIONS+= -DENABLE_COVERAGE=ON
 else
-       OPTIONS+= -DENABLE_COVERAGE=OFF
-endif
-
-ifeq ($(BENCHMARK_ACL_BUILD),1)
-       OPTIONS+= -DBUILD_BENCHMARK_ACL=1
-endif
-
-ifneq ($(EXT_HDF5_DIR),)
-  $(info Hello $(EXT_HDF5_DIR))
-       OPTIONS+= -DEXT_HDF5_DIR=$(EXT_HDF5_DIR)
+       ifeq ($(COVERAGE_BUILD),1)
+               OPTIONS+= -DENABLE_COVERAGE=ON
+       else
+               OPTIONS+= -DENABLE_COVERAGE=OFF
+       endif
 endif
 
 ifneq ($(EXTERNAL_VOLUME),)
@@ -87,17 +82,23 @@ WORKHOME=$(CURDIR)/Product
 WORKFOLDER=$(TARGET_ARCH_LC)-$(TARGET_OS).$(BUILD_TYPE_LC)
 WORKSPACE=$(WORKHOME)/$(WORKFOLDER)
 
-BUILD_FOLDER=$(WORKSPACE)/obj
 INSTALL_PATH?=$(WORKSPACE)/out
 OVERLAY_FOLDER?=$(WORKSPACE)/overlay
-BUILD_ALIAS=$(WORKHOME)/obj
 INSTALL_ALIAS=$(WORKHOME)/out
 
 TIMESTAMP_CONFIGURE=$(WORKSPACE)/CONFIGURE
 TIMESTAMP_BUILD=$(WORKSPACE)/BUILD
 TIMESTAMP_INSTALL=$(WORKSPACE)/INSTALL
 
-all: build
+###
+### Common environment variable
+###
+export NNFW_WORKSPACE=$(WORKSPACE)
+
+###
+### Default target
+###
+all: install
 
 ###
 ### Command (public)
@@ -106,78 +107,76 @@ configure: configure_internal
 
 build: build_internal
 
-install: $(TIMESTAMP_INSTALL)
+install: install_all_internal
 
-create_package: runtime_tar_internal
+create-package: runtime_tar_internal
 
-create_acl_tar: acl_tar_internal
+create-aclpack: acl_tar_internal
+
+create-testsuite: test_suite_internal
+
+create-covsuite: coverage_suite_internal
 
 clean:
        rm -rf $(WORKSPACE)
 
 distclean:
-       rm -rf $(WORKSPACE)
-       rm -rf externals/*.stamp
+       rm -rf Product
+       rm -rf externals
        rm -rf tests/nnapi/src/generated/
 
+# create_package, create_acl_tar: to be removed
+create_package: runtime_tar_internal
+create_acl_tar: acl_tar_internal
+
 ###
 ### Command (internal)
 ###
-configure_internal:
-# TODO Remove setting EXT_ACL_FOLDER
-#      Construct overlay folder directly outside (with headers?)
-ifneq ($(EXT_ACL_FOLDER),)
-       mkdir -p $(OVERLAY_FOLDER)/lib
-       cp $(EXT_ACL_FOLDER)/* $(OVERLAY_FOLDER)/lib
-# Make stamp file
-       printf "21.02" > $(OVERLAY_FOLDER)/ARMCOMPUTE.stamp
-endif
+$(WORKSPACE):
+       mkdir -p $@
 
+configure_internal: $(WORKSPACE)
 ifneq ($(DEBIAN_BUILD),)
        test -d externals || mkdir -p externals
        find packaging/ -type f -name "*.tar.gz" | xargs -i tar xf {} -C externals
 endif
-
-       NNFW_WORKSPACE="$(WORKSPACE)" NNFW_INSTALL_PREFIX=$(INSTALL_PATH) ./nnfw configure \
+       NNFW_INSTALL_PREFIX=$(INSTALL_PATH) ./nnfw configure \
                -DCMAKE_BUILD_TYPE=$(BUILD_TYPE_LC) \
                -DNNFW_OVERLAY_DIR=$(OVERLAY_FOLDER) \
                -DEXTERNALS_BUILD_THREADS=$(NPROCS) \
                $(OPTIONS)
-       touch $(TIMESTAMP_CONFIGURE)
 
-build_internal: $(BUILD_FOLDER)
-       NNFW_WORKSPACE="$(WORKSPACE)" ./nnfw build -j $(NPROCS)
-       rm -rf $(BUILD_ALIAS)
-       ln -s $(BUILD_FOLDER) $(BUILD_ALIAS)
-       touch $(TIMESTAMP_BUILD)
+build_internal: configure_internal
+       ./nnfw build -j $(NPROCS)
 
-install_internal:
-       NNFW_WORKSPACE="$(WORKSPACE)" ./nnfw install
+install_internal: build_internal
+       ./nnfw install
        rm -rf $(INSTALL_ALIAS)
        ln -s $(INSTALL_PATH) $(INSTALL_ALIAS)
-       touch $(TIMESTAMP_INSTALL)
 
-runtime_tar_internal: $(TIMESTAMP_BUILD) install_internal
+runtime_tar_internal: build_internal install_internal
        tar -zcf $(WORKSPACE)/onert-package.tar.gz -C $(INSTALL_PATH) lib
        tar -zcf $(WORKSPACE)/onert-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
        tar -zcf $(WORKSPACE)/onert-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
        tar -zcf $(WORKSPACE)/onert-test-package.tar.gz -C $(INSTALL_PATH) $(shell ls $(INSTALL_PATH) -I lib -I include)
 
-acl_tar_internal: $(BUILD_FOLDER)
+acl_tar_internal: configure_internal
        tar -zcf $(WORKSPACE)/onert-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_core.so lib/libarm_compute_graph.so
 
-install_internal_acl:
+install_acl_internal:
 # Workaround to install acl for test (ignore error when there is no file to copy)
        cp $(OVERLAY_FOLDER)/lib/libarm_compute*.so $(INSTALL_ALIAS)/lib || true
 
-build_test_suite: install_internal install_internal_acl
+install_all_internal: install_internal install_acl_internal
+
+test_suite_internal: install_all_internal
        @echo "packaging test suite"
        @rm -rf $(INSTALL_PATH)/test-suite.tar.gz
 # TODO Divide runtime package, external library package, and test suite
        @tar -zcf test-suite.tar.gz tests/scripts infra Product/out --dereference
        @mv test-suite.tar.gz $(INSTALL_PATH)/.
 
-build_coverage_suite: install_internal install_internal_acl
+coverage_suite_internal: install_all_internal
        @echo "packaging test-coverage suite"
        @rm -rf $(INSTALL_PATH)/coverage-suite.tar.gz
        @find Product -name "*.gcno" > include_lists.txt
@@ -185,17 +184,3 @@ build_coverage_suite: install_internal install_internal_acl
        @tar -zcf coverage-suite.tar.gz tests/scripts infra Product/out --dereference -T include_lists.txt
        @rm -rf include_lists.txt tests/scripts/build_path_depth.txt
        @mv coverage-suite.tar.gz $(INSTALL_PATH)/.
-
-###
-### Timestamps
-###
-$(WORKSPACE):
-       mkdir -p $@
-
-$(BUILD_FOLDER): $(WORKSPACE) configure_internal
-
-$(TIMESTAMP_CONFIGURE): configure_internal
-
-$(TIMESTAMP_BUILD): $(TIMESTAMP_CONFIGURE) build_internal
-
-$(TIMESTAMP_INSTALL): $(TIMESTAMP_BUILD) install_internal install_internal_acl
index 7cf12f1..2588cf0 100644 (file)
@@ -1,3 +1,21 @@
+# get CODENAME to perform per codename actions
+# set focal as default
+set(ONE_UBUNTU_CODENAME "focal")
+find_program(LSB_RELEASE_EXEC lsb_release)
+if(LSB_RELEASE_EXEC)
+  # output should be one of 'bionic', 'focal', 'jammy'
+  # others are not tested
+  execute_process(COMMAND "${LSB_RELEASE_EXEC}" --short --codename
+                  OUTPUT_VARIABLE ONE_UBUNTU_CODENAME
+                  OUTPUT_STRIP_TRAILING_WHITESPACE)
+else()
+  message(STATUS "WARNING: lsb_release not found")
+endif()
+
+if(${ONE_UBUNTU_CODENAME} STREQUAL "jammy")
+  set(ONE_UBUNTU_CODENAME_JAMMY TRUE)
+endif()
+
 # TODO Validate the argument of "requires"
 function(get_project_build_order VAR)
   # This file will describe the dependencies among projects
index 99276f3..f15b2ba 100644 (file)
@@ -55,6 +55,7 @@ std::vector<size_t> getEachByteSizeOf(const std::vector<loco::Node *> &nodes)
   for (const auto node : nodes)
   {
     const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+    const auto dtype_size = loco::size(input_node->dtype());
     size_t element_size = 1;
 
     for (uint32_t index = 0; index < input_node->rank(); index++)
@@ -62,7 +63,7 @@ std::vector<size_t> getEachByteSizeOf(const std::vector<loco::Node *> &nodes)
       element_size *= input_node->dim(index).value();
     }
 
-    vec.push_back(element_size);
+    vec.push_back(element_size * dtype_size);
   }
 
   return vec;
index da74e02..0320b52 100644 (file)
@@ -26,6 +26,7 @@ target_link_libraries(circle_execution_plan luci_import)
 target_link_libraries(circle_execution_plan luci_export)
 target_link_libraries(circle_execution_plan luci_plan)
 target_link_libraries(circle_execution_plan arser)
+target_link_libraries(circle_execution_plan luci_log)
 
 target_include_directories(circle_execution_plan PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/pal")
 install(TARGETS circle_execution_plan DESTINATION bin)
index 538a502..7b210d6 100644 (file)
@@ -27,6 +27,18 @@ enum SupportedPlatformType
   CMSISNN
 };
 
+enum RuntimeType
+{
+  ONERT_MICRO,
+  LUCI_INTERPRETER
+};
+
+enum AllocatingMode
+{
+  COMMON,
+  SPLIT
+};
+
 struct TargetPlatform
 {
   SupportedPlatformType platform_type;
index d5ddf0c..345bc05 100644 (file)
@@ -36,6 +36,29 @@ int entry(int argc, char **argv)
   arser.add_argument("input").help("Input circle model");
   arser.add_argument("output").help("Output circle model");
   arser.add_argument("--platform").default_value("linux").help("Platform name: linux mcu cmsisnn");
+  arser.add_argument("--allocating_mode")
+    .default_value("common")
+    .help("Buffer type name (only onert-micro option):"
+          "common - a single buffer is considered for all allocations"
+          "split - there are three buffers: for input,"
+          " for output and for intermediate tensors");
+  arser.add_argument("--runtime")
+    .default_value("onert_micro")
+    .help("Target runtime name: luci-interpreter onert-micro");
+  arser.add_argument("--allocate_const")
+    .nargs(1)
+    .type(arser::DataType::BOOL)
+    .required(false)
+    .default_value(false)
+    .help("Whether or not to take into account constants in memory allocation. "
+          "Default value - false, constants are not counted when allocating memory");
+  arser.add_argument("--allocate_input")
+    .nargs(1)
+    .type(arser::DataType::BOOL)
+    .required(false)
+    .default_value(true)
+    .help("Whether or not to take into account inputs in memory allocation. "
+          "Default value - true, inputs are counted when allocating memory");
   arser.add_argument("--use_dsp")
     .nargs(1)
     .type(arser::DataType::BOOL)
@@ -64,7 +87,11 @@ int entry(int argc, char **argv)
   const std::string input_path = arser.get<std::string>("input");
   const std::string output_path = arser.get<std::string>("output");
   const std::string platform_name = arser.get<std::string>("--platform");
+  const std::string allocating_mode_name = arser.get<std::string>("--allocating_mode");
+  const std::string runtime_name = arser.get<std::string>("--runtime");
   const bool use_dsp = arser.get<bool>("--use_dsp");
+  const bool is_allocate_const = arser.get<bool>("--allocate_const");
+  const bool is_allocate_input = arser.get<bool>("--allocate_input");
   const std::string json_path = arser.get<std::string>("--save_allocations");
 
   if (platform_name != "cmsisnn" && use_dsp)
@@ -92,6 +119,44 @@ int entry(int argc, char **argv)
     return EXIT_FAILURE;
   }
 
+  circle_planner::AllocatingMode allocating_mode;
+  if (allocating_mode_name == "split")
+  {
+    allocating_mode = circle_planner::AllocatingMode::SPLIT;
+  }
+  else if (allocating_mode_name == "common")
+  {
+    allocating_mode = circle_planner::AllocatingMode::COMMON;
+  }
+  else
+  {
+    std::cerr << "ERROR: Invalid allocation mode name '" << allocating_mode_name << "'"
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  circle_planner::RuntimeType runtime_type;
+  if (runtime_name == "onert-micro")
+  {
+    runtime_type = circle_planner::RuntimeType::ONERT_MICRO;
+  }
+  else if (runtime_name == "luci-interpreter")
+  {
+    runtime_type = circle_planner::RuntimeType::LUCI_INTERPRETER;
+  }
+  else
+  {
+    std::cerr << "ERROR: Invalid runtime name '" << runtime_name << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  if (allocating_mode == circle_planner::AllocatingMode::SPLIT and
+      runtime_type == circle_planner::RuntimeType::LUCI_INTERPRETER)
+  {
+    std::cerr << "Split buffer type can only be used with onert-micro runtime" << std::endl;
+    return EXIT_FAILURE;
+  }
+
   bool is_save_allocations = false;
 
   if (!json_path.empty())
@@ -131,7 +196,9 @@ int entry(int argc, char **argv)
   auto module = importer.importModule(circle_model);
 
   // Do main job
-  circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp});
+  circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp},
+                                                     runtime_type, allocating_mode);
+  execution_planner.change_planning_mode(is_allocate_const, is_allocate_input, true);
   execution_planner.make_execution_plan();
 
   if (is_save_allocations)
index a1e6f7e..fe028c0 100644 (file)
 #include "ExecutionPlanner.h"
 #include <loco/IR/Algorithm.h>
 #include <luci/UserSettings.h>
+#include <luci/Log.h>
 
 #include <json.h>
 #include <fstream>
 
+#include <limits> // std::numeric_limits
+
 namespace circle_planner
 {
 namespace
@@ -84,10 +87,164 @@ void create_allocation_node(Json::Value &allocations_node,
   allocations_node.append(allocation_node);
 }
 
+// TODO: Introduce inplace optimization
+bool can_be_inplace_optimization_node(luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    case luci::CircleOpcode::LOGISTIC:
+    case luci::CircleOpcode::RESHAPE:
+    case luci::CircleOpcode::EXPAND_DIMS:
+      return true;
+    default:
+      return false;
+  }
+}
+
 } // namespace
 
-void ExecutionPlanner::make_execution_plan()
+void ExecutionPlanner::make_execution_plan_onert_micro_base()
+{
+  switch (_allocating_mode)
+  {
+    case AllocatingMode::COMMON:
+      make_execution_plan_onert_micro_common_buffer();
+      break;
+    case AllocatingMode::SPLIT:
+      make_execution_plan_onert_micro_split_buffer();
+      break;
+    default:
+      throw std::runtime_error("Unsupported buffer type\n");
+  }
+}
+
+void ExecutionPlanner::write_execution_plan(uint32_t order_offset)
+{
+  _required_size = get_offsets_with_greedy_by_size();
+
+  int32_t counter_ops = 0;
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    const auto circle_node = dynamic_cast<luci::CircleNode *>(_ordered_nodes[i]);
+    if (circle_node->opcode() != luci::CircleOpcode::CIRCLECONST and
+        circle_node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    {
+      luci::CircleNodeExecutionPlan execution_plan(counter_ops + order_offset, _offsets[i]);
+      luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                               execution_plan);
+      counter_ops++;
+    }
+  }
+}
+
+void ExecutionPlanner::make_execution_plan_onert_micro_split_buffer()
+{
+  LOGGER(l);
+
+  const auto input_size = _graph->inputs()->size();
+  const auto output_size = _graph->outputs()->size();
+
+  // Make execution plan for inputs
+  _ordered_nodes = loco::input_nodes(_graph);
+  write_execution_plan(0);
+  dump_inform();
+  VERBOSE(l, 0) << "Input graph buffer required memory = " << _required_size << std::endl;
+
+  // Clear structures for next buffer
+  _ordered_nodes.clear();
+  _alloc_node_inform_vector.clear();
+  _dealloc_node.clear();
+  _alloc_node.clear();
+  _offsets.clear();
+  _required_size = 0;
+
+  // Make execution plan for outputs
+  _ordered_nodes = loco::output_nodes(_graph);
+  write_execution_plan(input_size);
+  dump_inform();
+  VERBOSE(l, 0) << "Output graph buffer required memory = " << _required_size << std::endl;
+
+  // Clear structures for next buffer
+  _ordered_nodes.clear();
+  _alloc_node_inform_vector.clear();
+  _dealloc_node.clear();
+  _alloc_node.clear();
+  _offsets.clear();
+  _required_size = 0;
+
+  // Make execution plan for intermediates calculations
+  get_default_execution_order_plan_without_inputs_and_outputs();
+  write_execution_plan(input_size + output_size);
+  dump_inform();
+  VERBOSE(l, 0) << "Main graph buffer required memory = " << _required_size << std::endl;
+}
+
+void ExecutionPlanner::make_execution_plan_onert_micro_common_buffer()
+{
+  LOGGER(l);
+
+  get_default_execution_order_plan();
+  _required_size = get_offsets_with_greedy_by_size();
+
+  // Find prev nodes for output nodes (actual graph output node, not luci::CircleOutput)
+  const auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+  std::vector<loco::Node *> output_prev_nodes;
+  for (const auto output_node : output_nodes)
+  {
+    const auto prev_nodes = loco::preds(output_node);
+    std::copy(prev_nodes.begin(), prev_nodes.end(), std::back_inserter(output_prev_nodes));
+  }
+  const auto output_nodes_size = output_prev_nodes.size();
+
+  const auto inputs_nodes = loco::input_nodes(_graph);
+  const auto input_nodes_size = inputs_nodes.size();
+
+  int32_t counter_ops = 0;
+  for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+  {
+    const auto circle_node = dynamic_cast<luci::CircleNode *>(_ordered_nodes[i]);
+    // First write to input nodes
+    if (circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT)
+    {
+      // Find input_position for proper position in execution order
+      const auto input_position = std::distance(
+        inputs_nodes.begin(), std::find(inputs_nodes.begin(), inputs_nodes.end(), circle_node));
+      luci::CircleNodeExecutionPlan execution_plan(input_position, _offsets[i]);
+      luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                               execution_plan);
+    }
+    // Second write to actual output nodes (not luci::CircleOutput)
+    else if (std::find(output_prev_nodes.begin(), output_prev_nodes.end(), circle_node) !=
+             output_prev_nodes.end())
+    {
+      // Find output_position for proper position in execution order
+      const auto output_position =
+        std::distance(output_prev_nodes.begin(),
+                      std::find(output_prev_nodes.begin(), output_prev_nodes.end(), circle_node));
+      luci::CircleNodeExecutionPlan execution_plan(input_nodes_size + output_position, _offsets[i]);
+      luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                               execution_plan);
+    }
+    // Finally write to all intermediate nodes
+    else if (circle_node->opcode() != luci::CircleOpcode::CIRCLECONST and
+             circle_node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    {
+      luci::CircleNodeExecutionPlan execution_plan(
+        counter_ops + input_nodes_size + output_nodes_size, _offsets[i]);
+      luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+                               execution_plan);
+      counter_ops++;
+    }
+  }
+
+  dump_inform();
+  VERBOSE(l, 0) << "Buffer required memory = " << _required_size << std::endl;
+}
+
+void ExecutionPlanner::make_execution_plan_luci_interpreter()
 {
+  LOGGER(l);
+
   get_default_execution_order_plan();
   _required_size = get_offsets_with_greedy_by_size();
   for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
@@ -96,6 +253,25 @@ void ExecutionPlanner::make_execution_plan()
     luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
                              execution_plan);
   }
+
+  VERBOSE(l, 0) << "Buffer required memory = " << _required_size << std::endl;
+  dump_inform();
+}
+
+void ExecutionPlanner::make_execution_plan()
+{
+  switch (_runtime_type)
+  {
+    case ONERT_MICRO:
+      make_execution_plan_onert_micro_base();
+      break;
+    case LUCI_INTERPRETER:
+      make_execution_plan_luci_interpreter();
+      break;
+    default:
+      throw std::runtime_error("Unsupported runtime platform\n");
+  }
+
   auto settings = luci::UserSettings::settings();
   settings->set(luci::UserSettings::Key::ExecutionPlanGen, true);
 }
@@ -150,6 +326,34 @@ void ExecutionPlanner::get_default_execution_order_plan()
   _ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)));
 }
 
+void ExecutionPlanner::get_default_execution_order_plan_without_inputs_and_outputs()
+{
+  // Get all nodes
+  _ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)));
+
+  // Get real output nodes (not luci::CircleOutput)
+  const auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+  std::vector<loco::Node *> output_prev_nodes;
+  for (const auto output_node : output_nodes)
+  {
+    const auto prev_nodes = loco::preds(output_node);
+    std::copy(prev_nodes.begin(), prev_nodes.end(), std::back_inserter(output_prev_nodes));
+  }
+
+  // Remove input and real output nodes from _ordered_nodes
+  _ordered_nodes.erase(
+    std::remove_if(_ordered_nodes.begin(), _ordered_nodes.end(),
+                   [&output_prev_nodes](auto node) {
+                     const auto circle_node = dynamic_cast<luci::CircleNode *>(node);
+
+                     return circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT or
+                            circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUT or
+                            std::find(output_prev_nodes.begin(), output_prev_nodes.end(), node) !=
+                              output_prev_nodes.end();
+                   }),
+    _ordered_nodes.end());
+}
+
 void ExecutionPlanner::get_usage_interval()
 {
   // Initialize vectors of first and last nodes for usage interval
@@ -177,6 +381,8 @@ void ExecutionPlanner::get_usage_interval()
   for (auto &output_node : output_nodes(_graph))
   {
     auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), output_node);
+    if (it == _ordered_nodes.end())
+      continue;
     size_t index = std::distance(_ordered_nodes.begin(), it);
     usages_counts[index]++;
   }
@@ -184,6 +390,8 @@ void ExecutionPlanner::get_usage_interval()
   for (auto &input_node : input_nodes(_graph))
   {
     auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), input_node);
+    if (it == _ordered_nodes.end())
+      continue;
     size_t index = std::distance(_ordered_nodes.begin(), it);
     usages_counts[index]++;
     allocate(0, index);
@@ -261,7 +469,7 @@ uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size()
 uint32_t ExecutionPlanner::greedy_by_size_approach()
 {
   size_t result_size = 0;
-  create_alloc_node_inform_vector(_is_null_consts, _is_null_inputs, _is_null_scratchpads);
+  create_alloc_node_inform_vector();
   std::vector<AllocationNodeInformation> ordered_alloc_inform;
   for (auto &current_node : _alloc_node_inform_vector)
   {
@@ -307,8 +515,7 @@ uint32_t ExecutionPlanner::greedy_by_size_approach()
   return result_size;
 }
 
-void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs,
-                                                       bool null_scratchpad)
+void ExecutionPlanner::create_alloc_node_inform_vector()
 {
   auto node_compare = [this](const AllocationNodeInformation &alloc_1,
                              const AllocationNodeInformation &alloc_2) {
@@ -355,11 +562,15 @@ void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool nu
     _alloc_node_inform_vector[i].last_node = _dealloc_node[i];
 
     const auto *const_node = dynamic_cast<const luci::CircleConst *>(circle_node);
-    if (i == 0 && null_inputs)
+    if (circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT && not _is_allocate_inputs)
+    {
+      _alloc_node_inform_vector[i].size = 0;
+    }
+    else if (circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
     {
       _alloc_node_inform_vector[i].size = 0;
     }
-    else if (const_node && null_consts)
+    else if (const_node && not _is_allocate_consts)
     {
       _alloc_node_inform_vector[i].size = 0;
     }
@@ -374,7 +585,7 @@ void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool nu
 
     // Scratchpad If needed
     std::vector<uint32_t> scratchpad_sizes;
-    if (!null_scratchpad)
+    if (_is_allocate_scratchpads)
     {
       switch (circle_node->opcode())
       {
@@ -440,6 +651,7 @@ void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool nu
 
 void ExecutionPlanner::dump_inform()
 {
+  LOGGER(l);
   uint32_t max_breadth = 0;
 
   for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
@@ -466,12 +678,14 @@ void ExecutionPlanner::dump_inform()
     }
 
     auto node = loco::must_cast<luci::CircleNode *>(_ordered_nodes.at(i));
-    printf("node_num = %d   node_name = %s    node_size = %d    node_offset = %d  node_breadth = "
-           "%u node_first_node = %d   node_last_node = %d\n",
-           i, node->name().c_str(), current_node_it->size, current_node_it->offset,
-           current_node_it->breadth, current_node_it->first_node, current_node_it->last_node);
+    VERBOSE(l, 0) << "node_num = " << i << " node_name = " << node->name().c_str()
+                  << " node_size = " << current_node_it->size
+                  << " node_offset = " << current_node_it->offset
+                  << " node_breadth = " << current_node_it->breadth
+                  << " node_first_node = " << current_node_it->first_node
+                  << " node_last_node =  " << current_node_it->last_node << std::endl;
   }
-  printf("Lower bound is = %u\n", max_breadth);
+  VERBOSE(l, 0) << "Lower bound = " << max_breadth << std::endl;
   std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
             [](const AllocationNodeInformation &first, const AllocationNodeInformation &second) {
               if (first.breadth != second.breadth)
index af3fba3..1a25518 100644 (file)
@@ -70,7 +70,9 @@ public:
     _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
   }
 
-  explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform) : _graph(graph)
+  explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform,
+                            RuntimeType runtime_type, AllocatingMode allocating_mode)
+    : _graph(graph), _runtime_type(runtime_type), _allocating_mode(allocating_mode)
   {
     switch (target_platform.platform_type)
     {
@@ -94,29 +96,52 @@ public:
   void make_execution_plan();
 
   // Method change planning mode:
-  // is_null_consts = true - constants are no longer taken into account when planning
-  // is_null_inputs = true - input are no longer taken into account when planning
-  // is_null_scratchpads = true - scratchpads are no longer taken into account when planning
-  void change_planning_mode(bool is_null_consts, bool is_null_inputs, bool is_null_scratchpads)
+  // is_allocate_consts = false - constants are no longer taken into account when planning
+  // is_allocate_inputs = false - input are no longer taken into account when planning
+  // is_allocate_scratchpads = false - scratchpads are no longer taken into account when planning
+  void change_planning_mode(bool is_allocate_consts, bool is_allocate_inputs,
+                            bool is_allocate_scratchpads)
   {
-    _is_null_consts = is_null_consts;
-    _is_null_inputs = is_null_inputs;
-    _is_null_scratchpads = is_null_scratchpads;
+    _is_allocate_consts = is_allocate_consts;
+    _is_allocate_inputs = is_allocate_inputs;
+    _is_allocate_scratchpads = is_allocate_scratchpads;
   };
 
   void create_json_allocation_file(const std::string &json_path);
 
 private:
+  // Save execution plan for onert-micro runtime base function.
+  //
+  // NOTE: First, according to ordered_node, the input nodes are written,
+  // then all outputs, finally all nodes in execution order.
+  // Constants are not written.
+  void make_execution_plan_onert_micro_base();
+
+  // Save execution plan for luci-interpreter runtime base function.
+  void make_execution_plan_luci_interpreter();
+
+  // Save execution plan for onert-micro runtime for common buffer type.
+  void make_execution_plan_onert_micro_common_buffer();
+
+  // Save execution plan for onert-micro runtime for common split type.
+  void make_execution_plan_onert_micro_split_buffer();
+
   // Method gets default execution order plan and saves it in _ordered_nodes vector.
   // There can be different variants of execution order and this method provides main one.
   void get_default_execution_order_plan();
 
+  // Method gets default execution order plan,
+  // but without inputs and output nodes and saves it in _ordered_nodes vector
+  void get_default_execution_order_plan_without_inputs_and_outputs();
+
   // Method provides nodes with usage interval information.
   void get_usage_interval();
 
   // Method dumps execution plan information.
   void dump_inform();
 
+  void write_execution_plan(uint32_t order_offset);
+
   // Method finds required offsets for all nodes from _ordered_nodes, using greedy by size approach.
   // It saves offsets in _offsets vector.
   // Return: required size of buffer.
@@ -127,14 +152,13 @@ private:
   uint32_t greedy_by_size_approach();
 
   // Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes.
-  // null_consts = true - size of const nodes will be equal 0;
-  // null_inputs = true - size of input nodes will be equal 0;
-  // null_scratchpad = true - size of scratchpad nodes will be equal 0;
+  // _is_allocate_const = true - size of const nodes will be equal 0;
+  // _is_allocate_input = true - size of input nodes will be equal 0;
+  // _is_allocate_scratchpad = true - size of scratchpad nodes will be equal 0;
   // It using if we don't want to take input(const or scratchpads) nodes into account
   // when determining offsets and calculating the required buffer size. This is uses for
   // experiments.
-  void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false,
-                                       bool null_scratchpad = false);
+  void create_alloc_node_inform_vector();
 
   // Stores allocation additional information for the all nodes from _graph.
   std::vector<AllocationNodeInformation> _alloc_node_inform_vector;
@@ -164,16 +188,22 @@ private:
   // Calculate size of scratchpad tensors for current platform
   std::unique_ptr<IScratchpadHelper> _scratchpad_helper;
 
+  // Supported runtime type
+  RuntimeType _runtime_type;
+
+  // Supported buffers type
+  AllocatingMode _allocating_mode;
+
   // Required memory size.
   uint32_t _required_size = 0;
 
   // Flags for choosing different planning modes:
-  // _is_null_consts = true - constants are no longer taken into account when planning
-  // _is_null_inputs = true - input are no longer taken into account when planning
-  // _is_null_scratchpads = true - scratchpads are no longer taken into account when planning
-  bool _is_null_consts = false;
-  bool _is_null_inputs = false;
-  bool _is_null_scratchpads = false;
+  // _is_allocate_consts = false - constants are no longer taken into account when planning
+  // _is_allocate_inputs = false - input are no longer taken into account when planning
+  // _is_allocate_scratchpads = false - scratchpads are no longer taken into account when planning
+  bool _is_allocate_consts = true;
+  bool _is_allocate_inputs = true;
+  bool _is_allocate_scratchpads = true;
 };
 
 } // namespace circle_planner
index 318a582..4fa6069 100644 (file)
@@ -34,6 +34,7 @@ int entry(int argc, char **argv)
   arser.add_argument("--conv2d_weight")
     .nargs(0)
     .help("Dump Conv2D series weight operators in circle file");
+  arser.add_argument("--constants").nargs(0).help("Dump constant tensors name");
   arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
   arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors");
   arser.add_argument("circle").help("Circle file to inspect");
@@ -50,7 +51,7 @@ int entry(int argc, char **argv)
   }
 
   if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"] &&
-      !arser["--tensor_dtype"])
+      !arser["--tensor_dtype"] && !arser["--constants"])
   {
     std::cout << "At least one option must be specified" << std::endl;
     std::cout << arser;
@@ -67,6 +68,8 @@ int entry(int argc, char **argv)
     dumps.push_back(std::make_unique<circleinspect::DumpOperatorVersion>());
   if (arser["--tensor_dtype"])
     dumps.push_back(std::make_unique<circleinspect::DumpTensorDType>());
+  if (arser["--constants"])
+    dumps.push_back(std::make_unique<circleinspect::DumpConstants>());
 
   std::string model_file = arser.get<std::string>("circle");
 
index aa8fed2..868fc2b 100644 (file)
@@ -202,3 +202,36 @@ void DumpTensorDType::run(std::ostream &os, const circle::Model *model)
 }
 
 } // namespace circleinspect
+
+namespace circleinspect
+{
+
+void DumpConstants::run(std::ostream &os, const circle::Model *model)
+{
+  mio::circle::Reader reader(model);
+
+  const uint32_t subgraph_size = reader.num_subgraph();
+
+  for (uint32_t g = 0; g < subgraph_size; g++)
+  {
+    reader.select_subgraph(g);
+    auto tensors = reader.tensors();
+
+    for (uint32_t i = 0; i < tensors->Length(); ++i)
+    {
+      const auto tensor = tensors->Get(i);
+      if (tensor->is_variable())
+        continue;
+
+      auto const buffer_id = tensor->buffer();
+
+      auto const buffer_size = reader.buffer_info(buffer_id, nullptr);
+      if (buffer_size == 0)
+        continue;
+
+      os << reader.tensor_name(tensor) << std::endl;
+    }
+  }
+}
+
+} // namespace circleinspect
index 8ca6838..7ab1ebc 100644 (file)
@@ -69,6 +69,15 @@ public:
   void run(std::ostream &os, const circle::Model *model);
 };
 
+class DumpConstants final : public DumpInterface
+{
+public:
+  DumpConstants() = default;
+
+public:
+  void run(std::ostream &os, const circle::Model *model);
+};
+
 } // namespace circleinspect
 
 #endif // __DUMP_H__
diff --git a/compiler/circle-interpreter-test/CMakeLists.txt b/compiler/circle-interpreter-test/CMakeLists.txt
new file mode 100644 (file)
index 0000000..50cd0c6
--- /dev/null
@@ -0,0 +1,17 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_PATH testDataGenerator BINARY_DIR)
+get_target_property(CIRCLE_INTERPRETER_PATH circle-interpreter BINARY_DIR)
+set(CIRCLE_INTERPRETER_PATH "${CIRCLE_INTERPRETER_PATH}/circle-interpreter")
+
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+GTest_AddTest(circle-interpreter-test ${TESTS})
+
+set_tests_properties(circle-interpreter-test
+                     PROPERTIES
+                     ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_INTERPRETER_PATH=${CIRCLE_INTERPRETER_PATH}"
+                     )
diff --git a/compiler/circle-interpreter-test/README.md b/compiler/circle-interpreter-test/README.md
new file mode 100644 (file)
index 0000000..66f80e7
--- /dev/null
@@ -0,0 +1,9 @@
+# circle-interpreter-test
+
+`circle-interpreter-test` checks if _circle-interpreter_ is working as expected.
+
+Current tests includes
+- input arguments test
+- output data test
+- printing help message test
+- validation of arguments and error message test
diff --git a/compiler/circle-interpreter-test/requires.cmake b/compiler/circle-interpreter-test/requires.cmake
new file mode 100644 (file)
index 0000000..8d8585b
--- /dev/null
@@ -0,0 +1,2 @@
+require("common-artifacts")
+require("circle-interpreter")
diff --git a/compiler/circle-interpreter-test/src/circle-interpreter.test.cpp b/compiler/circle-interpreter-test/src/circle-interpreter.test.cpp
new file mode 100644 (file)
index 0000000..4a5e81b
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <vector>
+
+#define READSIZE 4096
+
+class circle_interpreter_test : public ::testing::Test
+{
+protected:
+  bool initialize(void);
+  bool run(const std::string &command);
+
+protected:
+  bool compare(const std::string &file1, const std::string &file2);
+
+protected:
+  std::string _artifacts_path;
+  std::string _circle_interpreter_path;
+  std::string _result;
+};
+
+bool circle_interpreter_test::initialize(void)
+{
+  char *path = std::getenv("ARTIFACTS_PATH");
+  if (path == nullptr)
+  {
+    std::cerr << "ARTIFACTS_PATH not found" << std::endl;
+    return false;
+  }
+  _artifacts_path = path;
+
+  path = std::getenv("CIRCLE_INTERPRETER_PATH");
+  if (path == nullptr)
+  {
+    std::cerr << "CIRCLE_INTERPRETER_PATH  not found" << std::endl;
+    return false;
+  }
+  _circle_interpreter_path = path;
+
+  return true;
+}
+
+bool circle_interpreter_test::run(const std::string &command)
+{
+  std::vector<char> buffer(READSIZE);
+  std::string result = "";
+  std::string cmd_err = command + " 2>&1";
+  FILE *pipe = popen(cmd_err.c_str(), "r");
+  if (!pipe)
+  {
+    return false;
+  }
+  try
+  {
+    while (fgets(&buffer[0], buffer.size(), pipe) != NULL)
+    {
+      result += &buffer[0];
+    }
+  }
+  catch (...)
+  {
+    pclose(pipe);
+    return false;
+  }
+  pclose(pipe);
+  _result = result;
+
+  std::cout << _result << std::endl;
+
+  return true;
+}
+
+bool circle_interpreter_test::compare(const std::string &file1, const std::string &file2)
+{
+  std::ifstream f1(file1.c_str(), std::ifstream::in | std::ifstream::binary);
+  std::ifstream f2(file2.c_str(), std::ifstream::in | std::ifstream::binary);
+
+  if (!f1.is_open() || !f2.is_open())
+  {
+    return false;
+  }
+
+  typedef unsigned char BYTE;
+  std::vector<BYTE> vBuffer1(READSIZE);
+  std::vector<BYTE> vBuffer2(READSIZE);
+
+  do
+  {
+    f1.read((char *)&vBuffer1[0], READSIZE);
+    std::streamsize f1_bytes = f1.gcount();
+    f2.read((char *)&vBuffer2[0], READSIZE);
+    std::streamsize f2_bytes = f2.gcount();
+
+    if (f1_bytes != f2_bytes)
+    {
+      return false;
+    }
+
+    if (!std::equal(vBuffer1.begin(), vBuffer1.end(), vBuffer2.begin()))
+    {
+      return false;
+    }
+  } while (f1.good() || f2.good());
+  return true;
+}
+
+TEST_F(circle_interpreter_test, show_help_msg)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string command = _circle_interpreter_path + " -h";
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Usage: ./circle-interpreter");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, valid_command)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Conv2D_000.circle";
+  std::string input_prefix = _artifacts_path + "/Conv2D_000.circle.input";
+  std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+  std::string generated_output = output_prefix + "0";
+  std::remove(generated_output.c_str());
+  std::string command =
+    _circle_interpreter_path + " " + model + " " + input_prefix + " " + output_prefix;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  std::string expected_output = _artifacts_path + "/Conv2D_000.circle.output0";
+
+  if (!compare(generated_output, expected_output))
+  {
+    FAIL();
+    return;
+  }
+}
+
+TEST_F(circle_interpreter_test, invalid_option_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Conv2D_000.circle";
+  std::string command = _circle_interpreter_path + " " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Invalid argument");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, not_existing_model_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string not_existing_model = _artifacts_path + "/non_exist_file.foo";
+  std::string input_prefix = _artifacts_path + "/Conv2D_000.circle.input";
+  std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+  std::remove(output_prefix.c_str());
+  std::string command =
+    _circle_interpreter_path + " " + not_existing_model + " " + input_prefix + " " + output_prefix;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Failed to load");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, invalid_input_prefix_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Conv2D_000.circle";
+  std::string input_prefix = _artifacts_path + "/non_exist_file.foo";
+  std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+  std::remove(output_prefix.c_str());
+  std::string command =
+    _circle_interpreter_path + " " + model + " " + input_prefix + " " + output_prefix;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Cannot open file");
+  ASSERT_NE(std::string::npos, pos);
+}
index 1d24127..48c29a5 100644 (file)
@@ -33,7 +33,9 @@ void readDataFromFile(const std::string &filename, char *data, size_t data_size)
   if (fs.fail())
     throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
   if (fs.read(data, data_size).fail())
-    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+    throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
+  if (fs.peek() != EOF)
+    throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
 }
 
 void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
diff --git a/compiler/circle-mpqsolver/CMakeLists.txt b/compiler/circle-mpqsolver/CMakeLists.txt
new file mode 100644 (file)
index 0000000..25e318e
--- /dev/null
@@ -0,0 +1,37 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-mpqsolver "${SOURCES}")
+target_include_directories(circle-mpqsolver PRIVATE src)
+target_link_libraries(circle-mpqsolver arser)
+target_link_libraries(circle-mpqsolver vconone)
+target_link_libraries(circle-mpqsolver safemain)
+target_link_libraries(circle-mpqsolver luci_lang)
+target_link_libraries(circle-mpqsolver luci_service)
+target_link_libraries(circle-mpqsolver luci_pass)
+target_link_libraries(circle-mpqsolver luci_interpreter)
+target_link_libraries(circle-mpqsolver dio_hdf5)
+target_link_libraries(circle-mpqsolver luci_import)
+target_link_libraries(circle-mpqsolver luci_export)
+target_link_libraries(circle-mpqsolver luci_log)
+target_link_libraries(circle-mpqsolver nncc_common)
+
+install(TARGETS circle-mpqsolver DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# circle-mpqsolver is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources uesd for tests.
+set(TEST_SOURCES
+    "src/bisection/DepthParameterizer.cpp"
+    "src/bisection/Quantizer.cpp"
+    "src/bisection/ErrorApproximator.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(circle_mpqsolver_test ${TESTS} ${TEST_SOURCES})
+target_link_libraries(circle_mpqsolver_test luci_lang)
+target_link_libraries(circle_mpqsolver_test luci_service)
+target_link_libraries(circle_mpqsolver_test luci_pass)
diff --git a/compiler/circle-mpqsolver/README.md b/compiler/circle-mpqsolver/README.md
new file mode 100644 (file)
index 0000000..9f4c2e7
--- /dev/null
@@ -0,0 +1,68 @@
+# circle-mpqsolver
+_circle-mpqsolver_ provides light-weight methods for finding a high-quality mixed-precision model 
+within a reasonable time.
+
+## Methods
+
+### Bisection
+A model is split into two parts: front and back. One of them is quantized in uint8 and another in 
+int16. The precision of front and back is determined by our proxy metric, upperbound of total layer 
+errors. (See https://github.com/Samsung/ONE/pull/10170#discussion_r1042246598 for more details)
+
+The boundary between the front and the back is decided by the depth of operators (depth: distance 
+from input to the operator), i.e., given a depth d, layers with a depth less than d are included 
+in front, and the rest are included in back. Bisection performs binary search to find a proper 
+depth which achieves a qerror less than target_qerror.
+
+In case front is quantized into Q16 the pseudocode is the following: 
+```
+    until |_depth_max_ - _depth_min_| <=1 do
+        _current_depth_ = 0.5 * (_depth_max_ + _depth_min_)
+        if Loss(_current_depth_) < _target_loss_
+            _depth_max_ = _current_depth_
+        else
+            _depth_min_ = _current_depth_
+```
+, where Loss(current_depth) is the qerror of the mixied-precision model split at current_depth. 
+As every iteration halves the remaining range (|depth_max - depth_min|), it converges in 
+_~log2(max_depth)_ iterations.
+
+## Usage 
+Run _circle-mpqsolver_ with the following arguments.  
+
+--data: .h5 file with test data
+
+--input_model: Input float model initialized with min-max (recorded model)
+
+--output_model: Output qunatized mode
+
+--qerror_ratio: Target quantization error ratio. It should be in [0, 1]. 0 indicates qerror of full int16 model, 1 indicates qerror of full uint8 model. The lower `qerror_ratio` indicates the more accurate solution.
+
+--bisection _mode_: input nodes should be at Q16 precision ['auto', 'true', 'false']
+
+```
+$ ./circle-mpqsolver
+  --data <.h5 data>
+  --input_model <input_recorded_model>
+  --output_model <output_model_pat>
+  --qerror_ratio <optional value for reproducing target _qerror_ default is 0.5>
+  --bisection <whether input nodes should be quantized into Q16 default is 'auto'>
+```
+
+For example:
+```
+$./circle-mpqsolver
+    --data dataset.h5
+    --input_model model.recorded.circle
+    --output_model model.q_opt.circle
+    --qerror_ratio 0.4f
+    --bisection true
+```
+
+It will produce _model.q_opt.circle_, which is _model.recorded.circle_ quantized to mixed precision 
+using _dataset.h5_, with input nodes set to _Q16_ precision and quantization error (_qerror_) of 
+_model.q_opt.circle_ will be less than
+```
+ _qerror(full_q16) + qerror_ratio * (qerror(full_q8) - qerror(full_q16))_
+ ```
+ (_full_q16_ - model quantized using Q16 precision, _full_q8_ - model quantized using Q8 precision).
diff --git a/compiler/circle-mpqsolver/requires.cmake b/compiler/circle-mpqsolver/requires.cmake
new file mode 100644 (file)
index 0000000..73492a1
--- /dev/null
@@ -0,0 +1,6 @@
+require("safemain")
+require("arser")
+require("vconone")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
diff --git a/compiler/circle-mpqsolver/src/CircleMPQSolver.cpp b/compiler/circle-mpqsolver/src/CircleMPQSolver.cpp
new file mode 100644 (file)
index 0000000..23e8fd4
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+#include <luci/Log.h>
+
+#include "bisection/BisectionSolver.h"
+
+#include <iostream>
+#include <iomanip>
+#include <chrono>
+
+void print_version(void)
+{
+  std::cout << "circle-mpqsolver version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+int entry(int argc, char **argv)
+{
+  LOGGER(l);
+
+  const std::string bisection_str = "--bisection";
+
+  arser::Arser arser("circle-mpqsolver provides light-weight methods for finding a high-quality "
+                     "mixed-precision model within a reasonable time.");
+
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
+
+  arser.add_argument("--data").required(true).help("Path to the test data");
+  arser.add_argument("--data_format").required(false).help("Test data format (default: h5)");
+
+  arser.add_argument("--qerror_ratio")
+    .type(arser::DataType::FLOAT)
+    .default_value(0.5f)
+    .help("quantization error ratio ([0, 1])");
+
+  arser.add_argument(bisection_str)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Single optional argument for bisection method. "
+          "Whether input node should be quantized to Q16: 'auto', 'true', 'false'.");
+
+  arser.add_argument("--input_model")
+    .required(true)
+    .help("Input float model with min max initialized");
+
+  arser.add_argument("--input_dtype")
+    .type(arser::DataType::STR)
+    .default_value("uint8")
+    .help("Data type of quantized model's inputs (default: uint8)");
+
+  arser.add_argument("--output_dtype")
+    .type(arser::DataType::STR)
+    .default_value("uint8")
+    .help("Data type of quantized model's outputs (default: uint8)");
+
+  arser.add_argument("--output_model").required(true).help("Output quantized model");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cout << arser;
+    return EXIT_FAILURE;
+  }
+
+  if (arser.get<bool>("--verbose"))
+  {
+    // The third parameter of setenv means REPLACE.
+    // If REPLACE is zero, it does not overwrite an existing value.
+    setenv("LUCI_LOG", "100", 0);
+  }
+
+  auto data_path = arser.get<std::string>("--data");
+  auto input_model_path = arser.get<std::string>("--input_model");
+  auto output_model_path = arser.get<std::string>("--output_model");
+  auto input_dtype = arser.get<std::string>("--input_dtype");
+  auto output_dtype = arser.get<std::string>("--output_dtype");
+
+  float qerror_ratio = arser.get<float>("--qerror_ratio");
+  if (qerror_ratio < 0.0 || qerror_ratio > 1.f)
+  {
+    std::cerr << "ERROR: quantization ratio must be in [0, 1]" << std::endl;
+    return EXIT_FAILURE;
+  }
+  auto start = std::chrono::high_resolution_clock::now();
+
+  if (arser[bisection_str])
+  {
+    // optimize
+    using namespace mpqsolver::bisection;
+
+    BisectionSolver solver(data_path, qerror_ratio, input_dtype, output_dtype);
+    {
+      auto value = arser.get<std::string>(bisection_str);
+      if (value == "auto")
+      {
+        solver.algorithm(BisectionSolver::Algorithm::Auto);
+      }
+      else if (value == "true")
+      {
+        solver.algorithm(BisectionSolver::Algorithm::ForceQ16Front);
+      }
+      else if (value == "false")
+      {
+        solver.algorithm(BisectionSolver::Algorithm::ForceQ16Back);
+      }
+      else
+      {
+        std::cerr << "ERROR: Unrecognized option for bisection algortithm" << input_model_path
+                  << std::endl;
+        return EXIT_FAILURE;
+      }
+    }
+
+    auto optimized = solver.run(input_model_path);
+    if (optimized == nullptr)
+    {
+      std::cerr << "ERROR: Failed to build mixed precision model" << input_model_path << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    // save optimized
+    {
+      luci::CircleExporter exporter;
+      luci::CircleFileExpContract contract(optimized.get(), output_model_path);
+      if (!exporter.invoke(&contract))
+      {
+        std::cerr << "ERROR: Failed to export mixed precision model" << input_model_path
+                  << std::endl;
+        return EXIT_FAILURE;
+      }
+    }
+  }
+  else
+  {
+    std::cerr << "ERROR: Unrecognized solver" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  auto duration = std::chrono::duration_cast<std::chrono::seconds>(
+    std::chrono::high_resolution_clock::now() - start);
+  VERBOSE(l, 0) << "Elapsed Time: " << std::setprecision(5) << duration.count() / 60.f
+                << " minutes." << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-mpqsolver/src/MPQSolver.cpp b/compiler/circle-mpqsolver/src/MPQSolver.cpp
new file mode 100644 (file)
index 0000000..02732eb
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MPQSolver.h"
+
+using namespace mpqsolver;
+
+MPQSolver::MPQSolver(const std::string &input_data_path, float qerror_ratio,
+                     const std::string &input_quantization, const std::string &output_quantization)
+  : _input_data_path(input_data_path), _qerror_ratio(qerror_ratio),
+    _input_quantization(input_quantization), _output_quantization(output_quantization)
+{
+}
diff --git a/compiler/circle-mpqsolver/src/MPQSolver.h b/compiler/circle-mpqsolver/src/MPQSolver.h
new file mode 100644 (file)
index 0000000..7c018d0
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_MPQSOLEVR_SOLVER_H__
+#define __MPQSOLVER_MPQSOLEVR_SOLVER_H__
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <string>
+
+namespace mpqsolver
+{
+
+class MPQSolver
+{
+public:
+  /**
+   * @brief construct Solver using input_data_path for .h5 file,
+   * qerror_ratio to set target qerror, and input_quantization/output_quantization to set
+   * quantization type at input/output respectively
+   */
+  MPQSolver(const std::string &input_data_path, float qerror_ratio,
+            const std::string &input_quantization, const std::string &output_quantization);
+  virtual ~MPQSolver() = default;
+
+  /**
+   * @brief run solver for recorded float module at module_path
+   */
+  virtual std::unique_ptr<luci::Module> run(const std::string &module_path) = 0;
+
+protected:
+  std::string _input_data_path;
+  std::string _input_quantization;
+  std::string _output_quantization;
+  float _qerror_ratio = 0.f; // quantization error ratio
+};
+
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_MPQSOLEVR_SOLVER_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/BisectionSolver.cpp b/compiler/circle-mpqsolver/src/bisection/BisectionSolver.cpp
new file mode 100644 (file)
index 0000000..d506802
--- /dev/null
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BisectionSolver.h"
+#include "DepthParameterizer.h"
+#include "ErrorMetric.h"
+#include "ErrorApproximator.h"
+
+#include <luci/ImporterEx.h>
+#include <luci/Log.h>
+
+#include <cmath>
+#include <iostream>
+
+using namespace mpqsolver::bisection;
+
+namespace
+{
+
+bool error_at_input_is_larger_than_at_output(const NodeDepthType &nodes_depth, float cut_depth)
+{
+  LOGGER(l);
+
+  float error_at_input = 0;
+  float error_at_output = 0;
+  for (auto &iter : nodes_depth)
+  {
+    float cur_error = approximate(iter.first);
+    if (iter.second < cut_depth)
+    {
+      error_at_input += cur_error;
+    }
+    else
+    {
+      error_at_output += cur_error;
+    }
+  }
+
+  if (error_at_input > error_at_output)
+  {
+    VERBOSE(l, 0) << "Q16 will be set at input due to ";
+  }
+  else
+  {
+    VERBOSE(l, 0) << "Q8 will be set at input due to ";
+  }
+  VERBOSE(l, 0) << error_at_input << " error at input vs ";
+  VERBOSE(l, 0) << error_at_output << " error at output." << std::endl;
+
+  return error_at_input > error_at_output;
+}
+
+std::unique_ptr<luci::Module> read_module(const std::string &path)
+{
+  luci::ImporterEx importerex;
+  auto module = importerex.importVerifyModule(path);
+  if (module.get() == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load " << path << std::endl;
+    return nullptr;
+  }
+
+  return module;
+}
+
+} // namespace
+
+BisectionSolver::BisectionSolver(const std::string &input_data_path, float qerror_ratio,
+                                 const std::string &input_quantization,
+                                 const std::string &output_quantization)
+  : MPQSolver(input_data_path, qerror_ratio, input_quantization, output_quantization)
+{
+  _quantizer = std::make_unique<Quantizer>(_input_quantization, _output_quantization);
+}
+
+float BisectionSolver::evaluate(const DatasetEvaluator &evaluator, const std::string &flt_path,
+                                const std::string &def_quant, LayerParams &layers)
+{
+  auto model = read_module(flt_path);
+  // get fake quantized model for evaluation
+  if (!_quantizer->fake_quantize(model.get(), def_quant, layers))
+  {
+    throw std::runtime_error("Failed to produce fake-quantized model.");
+  }
+
+  return evaluator.evaluate(model.get());
+}
+
+void BisectionSolver::algorithm(Algorithm algorithm) { _algorithm = algorithm; }
+
+std::unique_ptr<luci::Module> BisectionSolver::run(const std::string &module_path)
+{
+  LOGGER(l);
+
+  auto module = read_module(module_path);
+
+  float min_depth = 0.f;
+  float max_depth = 0.f;
+  NodeDepthType nodes_depth;
+  if (compute_depth(module.get(), nodes_depth, min_depth, max_depth) !=
+      ParameterizerResult::SUCCESS)
+  {
+    std::cerr << "ERROR: Invalid graph for bisectioning" << std::endl;
+    return nullptr;
+  }
+
+  std::unique_ptr<MAEMetric> metric = std::make_unique<MAEMetric>();
+  DatasetEvaluator evaluator(module.get(), _input_data_path, *metric.get());
+
+  LayerParams layer_params;
+  float int16_qerror =
+    evaluate(evaluator, module_path, "int16" /* default quant_dtype */, layer_params);
+  VERBOSE(l, 0) << "Full int16 model quantization error " << int16_qerror << std::endl;
+
+  float uint8_qerror =
+    evaluate(evaluator, module_path, "uint8" /* default quant_dtype */, layer_params);
+  VERBOSE(l, 0) << "Full uint8 model quantization error " << uint8_qerror << std::endl;
+
+  if (int16_qerror > uint8_qerror)
+  {
+    throw std::runtime_error("Q8 model's qerror is less than Q16 model's qerror.");
+  }
+
+  _qerror = int16_qerror + _qerror_ratio * std::fabs(uint8_qerror - int16_qerror);
+  VERBOSE(l, 0) << "Target quantization error " << _qerror << std::endl;
+
+  if (uint8_qerror <= _qerror)
+  {
+    // no need for bisectioning just return Q8 model
+    if (!_quantizer->quantize(module.get(), "uint8", layer_params))
+    {
+      std::cerr << "ERROR: Failed to quantize model" << std::endl;
+      return nullptr;
+    }
+  }
+
+  int last_depth = -1;
+  float best_depth = -1;
+  LayerParams best_params;
+  if (module->size() != 1)
+  {
+    throw std::runtime_error("Unsupported module");
+  }
+  auto graph = module->graph(0);
+  auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
+  // input and output nodes are not valid for quantization, so let's remove them
+  for (auto node : loco::input_nodes(graph))
+  {
+    active_nodes.erase(node);
+  }
+  for (auto node : loco::output_nodes(graph))
+  {
+    active_nodes.erase(node);
+  }
+
+  // let's decide whether nodes at input are more suspectible to be quantized into Q16, than at
+  // output
+  bool int16_front = true;
+  switch (_algorithm)
+  {
+    case Algorithm::Auto:
+      int16_front =
+        error_at_input_is_larger_than_at_output(nodes_depth, 0.5f * (max_depth + min_depth));
+      break;
+    case Algorithm::ForceQ16Front:
+      int16_front = true;
+      break;
+    case Algorithm::ForceQ16Back:
+      int16_front = true;
+      break;
+  }
+
+  while (true)
+  {
+    int cut_depth = static_cast<int>(std::floor(0.5f * (min_depth + max_depth)));
+
+    if (last_depth == cut_depth)
+    {
+      break;
+    }
+    last_depth = cut_depth;
+
+    LayerParams layer_params;
+    for (auto &node : active_nodes)
+    {
+      auto cur_node = loco::must_cast<luci::CircleNode *>(node);
+      auto iter = nodes_depth.find(cur_node);
+      if (iter == nodes_depth.end())
+      {
+        continue; // to filter out nodes like weights
+      }
+
+      float depth = iter->second;
+
+      if ((depth <= cut_depth && int16_front) || (depth >= cut_depth && !int16_front))
+      {
+        auto layer_param = std::make_shared<LayerParam>();
+        {
+          layer_param->name = cur_node->name();
+          layer_param->dtype = "int16";
+          layer_param->granularity = "channel";
+        }
+
+        layer_params.emplace_back(layer_param);
+      }
+    }
+
+    float cur_accuracy = evaluate(evaluator, module_path, "uint8", layer_params);
+    VERBOSE(l, 0) << cut_depth << " : " << cur_accuracy << std::endl;
+
+    if (cur_accuracy < _qerror)
+    {
+      int16_front ? (max_depth = cut_depth) : (min_depth = cut_depth);
+      best_params = layer_params;
+      best_depth = cut_depth;
+    }
+    else
+    {
+      int16_front ? (min_depth = cut_depth) : (max_depth = cut_depth);
+    }
+  }
+
+  VERBOSE(l, 0) << "Found the best configuration at " << best_depth << " depth." << std::endl;
+  if (!_quantizer->quantize(module.get(), "uint8", best_params))
+  {
+    std::cerr << "ERROR: Failed to quantize model" << std::endl;
+    return nullptr;
+  }
+
+  return module;
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/BisectionSolver.h b/compiler/circle-mpqsolver/src/bisection/BisectionSolver.h
new file mode 100644 (file)
index 0000000..7ec2e69
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_SOLVER_H__
+#define __MPQSOLVER_BISECTION_SOLVER_H__
+
+#include "Quantizer.h"
+#include "Evaluator.h"
+#include <MPQSolver.h>
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <string>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+class BisectionSolver final : public MPQSolver
+{
+public:
+  /**
+   * @brief Algorithm options for running bisection algorithm
+   */
+  enum Algorithm
+  {
+    Auto,
+    ForceQ16Front,
+    ForceQ16Back,
+  };
+
+public:
+  /**
+   * @brief construct Solver using input_data_path for .h5 file,
+   * qerror_ratio to set target qerror, and input_quantization/output_quantization to set
+   * quantization type at input/output respectively
+   */
+  BisectionSolver(const std::string &input_data_path, float qerror_ratio,
+                  const std::string &input_quantization, const std::string &output_quantization);
+  BisectionSolver() = delete;
+
+  /**
+   * @brief run bisection for recorded float module at module_path
+   */
+  std::unique_ptr<luci::Module> run(const std::string &module_path) override;
+
+  /**
+   * @brief set used algorithm
+   */
+  void algorithm(Algorithm algorithm);
+
+private:
+  float evaluate(const DatasetEvaluator &evaluator, const std::string &module_path,
+                 const std::string &def_quant, LayerParams &layers);
+
+private:
+  float _qerror = 0.f; // quantization error
+  Algorithm _algorithm = Algorithm::ForceQ16Front;
+  std::unique_ptr<Quantizer> _quantizer;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_SOLVER_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.cpp b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.cpp
new file mode 100644 (file)
index 0000000..cbf1b96
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthParameterizer.h"
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+/**
+ * @brief compute maximal distance from graph inputs to graph nodes along with min/max values of
+ * distance and return status of computation (Assumes graph has no cycles)
+ */
+ParameterizerResult compute_depth(const luci::Module *module, NodeDepthType &nodes_depth,
+                                  float &min_depth, float &max_depth)
+{
+  if (module == nullptr)
+    return ParameterizerResult::FAILURE;
+
+  if (module->size() != 1)
+    return ParameterizerResult::FAILURE;
+
+  auto graph = module->graph(0);
+  if (!graph)
+    return ParameterizerResult::FAILURE;
+
+  // initializing
+  std::vector<luci::CircleNode *> to_process;
+  std::map<std::string, float> named_depth;
+  {
+    auto inputs = loco::input_nodes(graph);
+    for (auto &node : inputs)
+    {
+      auto cnode = loco::must_cast<luci::CircleNode *>(node);
+      to_process.emplace_back(cnode);
+      nodes_depth[cnode] = 0.f;
+      named_depth[cnode->name()] = 0.f;
+    }
+  }
+
+  // enumerating
+  while (!to_process.empty())
+  {
+    auto cur_node = to_process.back();
+    to_process.pop_back();
+    auto iter = nodes_depth.find(cur_node);
+    if (iter == nodes_depth.end())
+    {
+      return ParameterizerResult::FAILURE; // unexpected
+    }
+    float cur_depth = iter->second + 1;
+    // processing children
+    auto children = loco::succs(cur_node);
+    for (auto &child : children)
+    {
+      auto cichild = loco::must_cast<luci::CircleNode *>(child);
+      auto node_depth = nodes_depth.find(cichild);
+      if (node_depth == nodes_depth.end() || node_depth->second < cur_depth)
+      {
+        // initialize depth
+        nodes_depth[cichild] = cur_depth;
+        to_process.push_back(cichild);
+        named_depth[cichild->name()] = cur_depth;
+      }
+    }
+  }
+
+  // compute min/max of depth parameter
+  auto minmax = std::minmax_element(
+    nodes_depth.begin(), nodes_depth.end(),
+    [=](const std::pair<luci::CircleNode *, float> &el1,
+        const std::pair<luci::CircleNode *, float> &el2) { return el1.second < el2.second; });
+
+  min_depth = minmax.first->second;
+  max_depth = minmax.second->second;
+
+  return ParameterizerResult::SUCCESS;
+}
+
+} // namespace bisection
+} // namespace mpqsolver
diff --git a/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.h b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.h
new file mode 100644 (file)
index 0000000..6a96aa1
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_DEPTH_PARAMETERIZER_H__
+#define __MPQSOLVER_DEPTH_PARAMETERIZER_H__
+
+#include <luci/IR/Module.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+using NodeDepthType = std::map<luci::CircleNode *, float>;
+
+/**
+ * @brief status of parameterization
+ */
+enum class ParameterizerResult : int32_t
+{
+  SUCCESS = 0,
+  FAILURE = 1
+};
+
+/**
+ * @brief compute maximal distance from graph inputs to graph nodes along with min/max values of
+ * distance and return status of compuation (success/failure)
+ */
+ParameterizerResult compute_depth(const luci::Module *module, NodeDepthType &nodes_depth,
+                                  float &min_depth, float &max_depth);
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_DEPTH_PARAMETERIZER_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.test.cpp b/compiler/circle-mpqsolver/src/bisection/DepthParameterizer.test.cpp
new file mode 100644 (file)
index 0000000..d3865e4
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "DepthParameterizer.h"
+#include "TestHelper.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+class NConvGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    _filter = _g->nodes()->create<luci::CircleConst>();
+    _filter->dtype(loco::DataType::FLOAT32);
+    _filter->shape({_channel_size, 1, 1, _channel_size});
+    _filter->name("conv_filter");
+
+    _bias = _g->nodes()->create<luci::CircleConst>();
+    _bias->dtype(loco::DataType::FLOAT32);
+    _bias->shape({_channel_size});
+    _bias->name("conv_bias");
+
+    _conv = _g->nodes()->create<luci::CircleConv2D>();
+    _conv->padding(luci::Padding::SAME);
+    _conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _conv->dtype(loco::DataType::FLOAT32);
+    _conv->shape({1, _width, _height, _channel_size});
+    _conv->name("conv");
+    _conv->filter(_filter);
+    _conv->bias(_bias);
+    _conv->input(input);
+
+    return _conv;
+  }
+
+public:
+  luci::CircleConv2D *_conv = nullptr;
+  luci::CircleConst *_filter = nullptr;
+  luci::CircleConst *_bias = nullptr;
+};
+
+} // namespace
+
+TEST(CircleMPQSolverDepthParameteriserTest, verifyResultsTest)
+{
+  auto m = luci::make_module();
+  NConvGraph g;
+  g.init();
+  auto conv = g._conv;
+  auto input = g._input;
+  auto output = g._output;
+
+  g.transfer_to(m.get());
+
+  mpqsolver::bisection::NodeDepthType nodes_depth;
+  float min_depth = std::numeric_limits<float>().max();
+  float max_depth = -std::numeric_limits<float>().max();
+  auto status = mpqsolver::bisection::compute_depth(m.get(), nodes_depth, min_depth, max_depth);
+
+  EXPECT_TRUE(status == mpqsolver::bisection::ParameterizerResult::SUCCESS);
+  EXPECT_TRUE(max_depth == 2 && min_depth == 0);
+  EXPECT_TRUE(nodes_depth[input] == min_depth);
+  EXPECT_TRUE(nodes_depth[conv] == 1);
+  EXPECT_TRUE(nodes_depth[output] == max_depth);
+}
+
+TEST(CircleMPQSolverDepthParameteriserTest, verifyResultsTest_NEG)
+{
+  auto m = luci::make_module();
+  mpqsolver::bisection::NodeDepthType nodes_depth;
+  float min_depth = std::numeric_limits<float>().max();
+  float max_depth = -std::numeric_limits<float>().max();
+  auto status = mpqsolver::bisection::compute_depth(m.get(), nodes_depth, min_depth, max_depth);
+
+  EXPECT_TRUE(status == mpqsolver::bisection::ParameterizerResult::FAILURE);
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/ErrorApproximator.cpp b/compiler/circle-mpqsolver/src/bisection/ErrorApproximator.cpp
new file mode 100644 (file)
index 0000000..ebd7258
--- /dev/null
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ErrorApproximator.h"
+
+#include <cmath>
+#include <limits>
+#include <vector>
+#include <functional>
+#include <luci/IR/CircleNode.h>
+
+namespace
+{
+
+using namespace luci;
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+inline bool has_min_max(const CircleNode *node)
+{
+  return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
+}
+
+inline uint32_t cal_offset(const loco::TensorShape &dimension, uint32_t *indices)
+{
+  return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
+           dimension.dim(3).value() +
+         indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
+         indices[2] * dimension.dim(3).value() + indices[3];
+}
+
+uint32_t get_channel_dim_index(const CircleNode *node)
+{
+  uint32_t index = 0;
+  auto opcode = node->opcode();
+  switch (opcode)
+  {
+    case CircleOpcode::CONV_2D:
+    case CircleOpcode::TRANSPOSE_CONV:
+    case CircleOpcode::FULLY_CONNECTED:
+      index = 0;
+      break;
+    case CircleOpcode::DEPTHWISE_CONV_2D:
+      index = 3;
+      break;
+    default:
+      throw std::runtime_error("Failed to find channel index in " + node->name());
+  }
+
+  return index;
+}
+
+bool set_weight_dim(const CircleNode *node, const CircleConst *weights,
+                    loco::TensorShape &dimension)
+{
+  auto opcode = node->opcode();
+  switch (opcode)
+  {
+    case CircleOpcode::CONV_2D:
+    case CircleOpcode::TRANSPOSE_CONV:
+    case CircleOpcode::DEPTHWISE_CONV_2D:
+      assert(node->rank() == 4);
+      dimension.rank(node->rank());
+      dimension.dim(0).set(weights->dim(0).value());
+      dimension.dim(1).set(weights->dim(1).value());
+      dimension.dim(2).set(weights->dim(2).value());
+      dimension.dim(3).set(weights->dim(3).value());
+      break;
+    case CircleOpcode::FULLY_CONNECTED:
+      assert(node->rank() == 2);
+      dimension.rank(4);
+      dimension.dim(0).set(weights->dim(0).value());
+      dimension.dim(1).set(1); // Set FC layer like CONV
+      dimension.dim(2).set(1);
+      dimension.dim(3).set(weights->dim(1).value());
+      break;
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+loco::Node *get_weight(const CircleNode *node)
+{
+  loco::Node *weight = nullptr;
+  auto opcode = node->opcode();
+  switch (opcode)
+  {
+    case CircleOpcode::CONV_2D:
+    {
+      auto conv = loco::must_cast<const CircleConv2D *>(node);
+      weight = conv->filter();
+    }
+    break;
+    case CircleOpcode::DEPTHWISE_CONV_2D:
+    {
+      auto dconv = loco::must_cast<const CircleDepthwiseConv2D *>(node);
+      weight = dconv->filter();
+    }
+    break;
+    case CircleOpcode::TRANSPOSE_CONV:
+    {
+      auto tconv = loco::must_cast<const CircleTransposeConv *>(node);
+      weight = tconv->filter();
+    }
+    break;
+    case CircleOpcode::FULLY_CONNECTED:
+    {
+      auto fc = loco::must_cast<const CircleFullyConnected *>(node);
+      weight = fc->weights();
+    }
+    break;
+    default:
+      break;
+  }
+
+  return weight;
+}
+
+inline CircleConst *get_constant_weight(const CircleNode *node)
+{
+  CircleConst *weight = dynamic_cast<CircleConst *>(get_weight(node));
+  if (weight == nullptr)
+  {
+    throw std::runtime_error("Unsupported non-constant weights in convolution node " +
+                             node->name());
+  }
+
+  return weight;
+}
+
+void iterate_per_channel(const CircleNode *node, IterFunc func)
+{
+  CircleConst *weight = get_constant_weight(node);
+
+  loco::TensorShape dimension;
+  set_weight_dim(node, weight, dimension);
+  uint32_t indices[4] = {
+    0,
+  };
+
+  auto channel_dim_index = get_channel_dim_index(node);
+
+  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+  {
+    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+    {
+      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+      {
+        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+        {
+          func(indices, dimension, channel_dim_index);
+        }
+      }
+    }
+  }
+}
+
+void cal_minmax_per_channel(const CircleNode *node, std::vector<float> &min,
+                            std::vector<float> &max)
+{
+  CircleConst *weight = get_constant_weight(node);
+
+  loco::TensorShape dimension;
+  set_weight_dim(node, weight, dimension);
+
+  auto channel_dim_index = get_channel_dim_index(node);
+  auto size = dimension.dim(channel_dim_index).value();
+
+  std::vector<bool> has_min_max_value(size, false);
+  min.resize(size);
+  max.resize(size);
+
+  auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension,
+                        uint32_t channel_dim_index) {
+    uint32_t channel_idx = indices[channel_dim_index];
+    auto data = weight->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    if (has_min_max_value[channel_idx])
+    {
+      min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+      max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+    }
+    else
+    {
+      min[channel_idx] = data;
+      max[channel_idx] = data;
+      has_min_max_value[channel_idx] = true;
+    }
+  };
+
+  iterate_per_channel(node, cal_minmax);
+}
+
+bool get_shape(const CircleNode *circle_node, std::vector<uint32_t> &shape)
+{
+  if (circle_node->shape_status() == ShapeStatus::VALID)
+  {
+    auto rank = circle_node->rank();
+    if (rank != 4)
+      return false;
+
+    shape.resize(rank);
+    for (uint32_t i = 0; i < rank; i++)
+    {
+      shape[i] = circle_node->dim(i).value();
+    }
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ * @brief get_additions_per_channel computes W * H * CIN * KW * KH.
+ *
+ * W, H - width/height of OFM; KW, KH - convolution kernel width/height;
+ * CIN - number of channels in IFM (for depthwise its unity)
+ * See
+ * https://github.com/Samsung/ONE/pull/10170#discussion_r1065371638
+ * for derivation.
+ */
+uint32_t get_additions_per_channel(const CircleNode *node)
+{
+  uint32_t adds_per_channel = 1;
+  std::vector<uint32_t> ofm_shape;
+  if (!get_shape(node, ofm_shape)) // [BATCH, W, H, channels_out]
+  {
+    throw std::runtime_error("Failed to find correct shape " + node->name());
+  }
+
+  adds_per_channel *= ofm_shape[1] * ofm_shape[2]; // adds_per_channel *= W * H
+
+  auto weights = loco::must_cast<CircleNode *>(get_weight(node));
+  {
+    std::vector<uint32_t> w_shape;
+    if (get_shape(weights, w_shape)) // [channels_out, k_x, k_y, channels_in]
+    {
+      adds_per_channel *= (w_shape[1] * w_shape[2]); // adds_per_channel *= k_x * k_y
+    }
+    if (node->opcode() != CircleOpcode::DEPTHWISE_CONV_2D)
+    {
+      // for not depthwise convolutions we need to scale it by CIN
+      adds_per_channel *= w_shape[3]; // adds_per_channel *= c_in
+    }
+  }
+
+  return adds_per_channel;
+}
+
+void get_min_max_ifm_values(const CircleNode *node, float &ci_min, float &ci_max)
+{
+  auto preds = loco::preds(node);
+  for (const auto &pred : preds)
+  {
+    auto parent_node = loco::must_cast<const luci::CircleNode *>(pred);
+    if (has_min_max(parent_node))
+    {
+      auto quantparam = parent_node->quantparam();
+      if (quantparam->min.size() > 0)
+      {
+        ci_min = quantparam->min[0];
+        ci_max = quantparam->max[0];
+      }
+    }
+  }
+}
+
+/**
+ * @brief Return upper bound of quantization error for CONV, DCONV, TCONV.
+ *
+ * See
+ * https://github.com/Samsung/ONE/pull/10170#discussion_r1065371638 for details.
+ */
+float approximate_conv(const CircleNode *node)
+{
+  float volume_W_A_err = 0.f;
+  {
+    // activation min-max values
+    float ci_min = 0.f;
+    float ci_max = 0.f;
+    get_min_max_ifm_values(node, ci_min, ci_max);
+
+    // channel-wise min, max
+    std::vector<float> min_values;
+    std::vector<float> max_values;
+    cal_minmax_per_channel(node, min_values, max_values);
+    assert(not min_values.empty());
+    assert(not max_values.empty());
+
+    // ranges  = (max_values - min_values)
+    std::vector<float> ranges;
+    std::transform(max_values.begin(), max_values.end(), min_values.begin(),
+                   std::back_inserter(ranges), std::minus<float>());
+
+    // maximal weight value across all channels
+    float w_max = 0;
+    {
+      assert(max_values.size() == min_values.size());
+      for (size_t i = 0; i < max_values.size(); ++i)
+      {
+        w_max = std::max(w_max, std::abs(max_values[i]));
+        w_max = std::max(w_max, std::abs(min_values[i]));
+      }
+    }
+
+    // total weight quantization error across all channels
+    // so maximal error of quantization is ~ (max_value - min_value) / 255
+    // omitting 255 term we get that maximal error of quantization is just its range
+    float sum_err = 0.f;
+    for (auto cur_err : ranges)
+    {
+      sum_err += cur_err;
+    }
+
+    uint32_t adds_per_channel = get_additions_per_channel(node);
+    uint32_t num_of_channels = ranges.size();
+
+    // maximal error introduced by weights quantization (for all channels)
+    volume_W_A_err = sum_err * std::max(::fabs(ci_max), ::fabs(ci_min));
+    // plus total error introduced by activation quantization (for all channels)
+    volume_W_A_err += w_max * num_of_channels * ::fabs(ci_max - ci_min);
+    // scale by volume of adds per channel
+    volume_W_A_err *= adds_per_channel;
+    // scale to get more readable output values
+    volume_W_A_err /= 1.e+6f;
+  }
+
+  return volume_W_A_err;
+}
+
+} // namespace
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+/**
+ * How Approximate works?
+ *
+ * Currently it works just for convolution layers, but may be generalized for other types as well.
+ * See discussion at https://github.com/Samsung/ONE/pull/10170#discussion_r1042246598
+ * Convolution can be expressed as a matrix multiplication.
+ * While quantizing we introduce quantization error into convolution operand (activations) as well
+ * as into convolution weights. A_q * W_q = (A + q_err(A)) * (W + q_err(W)) = A * W + A * q_err(W) +
+ * W * q_err(A) + q_err(A) * q_err(W), assuming q_err(A) * q_err(W) are negligible as quadratic
+ * terms, we get A_q * W_q ~ A * W + A * q_err(W) +  W * q_err(A) , q_err - quantization error,
+ * W - weight matrix, A - activations from previous layer (IFM), so quantization error of matrix
+ * multiplication can be approximated as A * q_err(W) + W * q_err(A). Estimating its upper bound
+ * we get A * q_err(W) + W * q_err(A) <=
+ * number_of_additions * (A_max * (W_max - W_min) / 255 + W_max * (A_max - A_min) / 255)
+ * The following code tries to get total error for quantizing convolution node into Q8.
+ * It's just an heuristic (Metric sensitivity depends highly on derivatives as well).
+ */
+float approximate(const CircleNode *node)
+{
+  auto opcode = node->opcode();
+  float qerror = 0.f;
+  switch (opcode)
+  {
+    case CircleOpcode::DEPTHWISE_CONV_2D:
+    case CircleOpcode::CONV_2D:
+    case CircleOpcode::TRANSPOSE_CONV:
+      qerror = approximate_conv(node);
+      break;
+    default: // TODO (FULLY_CONNECTED e.g.)
+      qerror = 0.f;
+  }
+
+  return qerror;
+}
+
+} // namespace bisection
+} // namespace mpqsolver
diff --git a/compiler/circle-mpqsolver/src/bisection/ErrorApproximator.h b/compiler/circle-mpqsolver/src/bisection/ErrorApproximator.h
new file mode 100644 (file)
index 0000000..efbfa82
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_ERROR_APPROXIMATOR_H__
+#define __MPQSOLVER_BISECTION_ERROR_APPROXIMATOR_H__
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodeDecl.h>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+/**
+ * @brief approximate error introduced while quantizing node into Q8
+ */
+float approximate(const luci::CircleNode *node);
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif // __MPQSOLVER_BISECTION_ERROR_APPROXIMATOR_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/ErrorApproximator.test.cpp b/compiler/circle-mpqsolver/src/bisection/ErrorApproximator.test.cpp
new file mode 100644 (file)
index 0000000..015f83b
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ErrorApproximator.h"
+#include "TestHelper.h"
+
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <cmath>
+
+namespace
+{
+
+inline uint32_t cal_offset(uint32_t shape[4], uint32_t *indices)
+{
+  return indices[0] * shape[1] * shape[2] * shape[3] + indices[1] * shape[2] * shape[3] +
+         indices[2] * shape[3] + indices[3];
+}
+
+class NConvGraph final : public SimpleGraph
+{
+protected:
+  void initInput(loco::Node *input) override
+  {
+    auto ci_input = loco::must_cast<luci::CircleNode *>(input);
+    ci_input->shape_status(luci::ShapeStatus::VALID);
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->min.assign(_channel_size, _a_min);
+    qparam->max.assign(_channel_size, _a_max);
+    ci_input->quantparam(std::move(qparam));
+  }
+
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    _filter = _g->nodes()->create<luci::CircleConst>();
+    _filter->dtype(loco::DataType::FLOAT32);
+    _filter->shape({_channel_size, _f_w, _f_h, _channel_size});
+    _filter->shape_status(luci::ShapeStatus::VALID);
+    _filter->name("conv_filter");
+    uint32_t indices[4] = {
+      0,
+    };
+
+    uint32_t w_shape[4] = {_filter->dim(0).value(), _filter->dim(1).value(),
+                           _filter->dim(2).value(), _filter->dim(3).value()};
+
+    _filter->size<loco::DataType::FLOAT32>(w_shape[0] * w_shape[1] * w_shape[2] * w_shape[3]);
+
+    for (indices[0] = 0; indices[0] < w_shape[0]; ++indices[0])
+    {
+      for (indices[1] = 0; indices[1] < w_shape[1]; ++indices[1])
+      {
+        for (indices[2] = 0; indices[2] < w_shape[2]; ++indices[2])
+        {
+          for (indices[3] = 0; indices[3] < w_shape[3]; ++indices[3])
+          {
+            uint32_t offset = cal_offset(w_shape, indices);
+            _filter->at<loco::DataType::FLOAT32>(offset) = (offset % 2 == 0) ? _w_max : _w_min;
+          }
+        }
+      }
+    }
+
+    _bias = _g->nodes()->create<luci::CircleConst>();
+    _bias->dtype(loco::DataType::FLOAT32);
+    _bias->shape({_channel_size});
+    _bias->name("conv_bias");
+
+    _conv = _g->nodes()->create<luci::CircleConv2D>();
+    _conv->padding(luci::Padding::SAME);
+    _conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _conv->dtype(loco::DataType::FLOAT32);
+    _conv->shape({1, _width, _height, _channel_size});
+    _conv->shape_status(luci::ShapeStatus::VALID);
+    _conv->name("conv");
+    _conv->filter(_filter);
+    _conv->bias(_bias);
+    _conv->input(input);
+
+    return _conv;
+  }
+
+public:
+  luci::CircleConv2D *_conv = nullptr;
+  luci::CircleConst *_filter = nullptr;
+  luci::CircleConst *_bias = nullptr;
+  uint32_t _f_w = 1;
+  uint32_t _f_h = 1;
+  float _w_min = -1.f;
+  float _w_max = 1.f;
+  float _a_min = -1.f;
+  float _a_max = 1.f;
+};
+
+} // namespace
+
+TEST(CircleMPQSolverErrorApproximatorTest, verifyResultsTest)
+{
+  NConvGraph g;
+  g.init();
+
+  auto value = mpqsolver::bisection::approximate(g._conv);
+  float expected = ((g._w_max - g._w_min) * g._channel_size * std::max(g._a_max, g._a_min) +
+                    (g._a_max - g._a_min) * g._channel_size * std::max(g._w_max, g._w_min)) *
+                   g._f_h * g._f_w * g._height * g._width * g._channel_size / 1.e+6f;
+  EXPECT_FLOAT_EQ(expected, value);
+}
+
+TEST(CircleMPQSolverErrorApproximatorTest, verifyResultsTest_NEG)
+{
+  NConvGraph g;
+  g.init();
+
+  auto value = mpqsolver::bisection::approximate(g._input);
+  float expected = 0.f;
+  EXPECT_FLOAT_EQ(expected, value);
+
+  value = mpqsolver::bisection::approximate(g._output);
+  expected = 0.f;
+  EXPECT_FLOAT_EQ(expected, value);
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/ErrorMetric.cpp b/compiler/circle-mpqsolver/src/bisection/ErrorMetric.cpp
new file mode 100644 (file)
index 0000000..74c7fd6
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ErrorMetric.h"
+
+#include <loco/IR/DataType.h>
+#include <loco/IR/DataTypeTraits.h>
+
+#include <cmath>
+#include <cassert>
+
+using namespace mpqsolver::bisection;
+
+/**
+ * @brief compare first and second operands in MAE (Mean Average Error metric)
+ */
+float MAEMetric::compute(const WholeOutput &first, const WholeOutput &second) const
+{
+  assert(first.size() == second.size());
+
+  float error = 0.f;
+  size_t output_size = 0;
+
+  for (size_t sample_index = 0; sample_index < first.size(); ++sample_index)
+  {
+    assert(first[sample_index].size() == second[sample_index].size());
+    for (size_t out_index = 0; out_index < first[sample_index].size(); ++out_index)
+    {
+      const Buffer &first_elementary = first[sample_index][out_index];
+      const Buffer &second_elementary = second[sample_index][out_index];
+      assert(first_elementary.size() == second_elementary.size());
+      size_t cur_size = first_elementary.size() / loco::size(loco::DataType::FLOAT32);
+
+      const float *first_floats = reinterpret_cast<const float *>(first_elementary.data());
+      const float *second_floats = reinterpret_cast<const float *>(second_elementary.data());
+      for (size_t index = 0; index < cur_size; index++)
+      {
+        float ref_value = *(first_floats + index);
+        float cur_value = *(second_floats + index);
+        error += std::fabs(ref_value - cur_value);
+      }
+      output_size += cur_size;
+    }
+  }
+
+  return error / output_size;
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/ErrorMetric.h b/compiler/circle-mpqsolver/src/bisection/ErrorMetric.h
new file mode 100644 (file)
index 0000000..9ef8662
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_ERROR_METRIC_H__
+#define __MPQSOLVER_BISECTION_ERROR_METRIC_H__
+
+#include <vector>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+using Buffer = std::vector<char>;
+using Output = std::vector<Buffer>;
+using WholeOutput = std::vector<Output>;
+
+class ErrorMetric
+{
+public:
+  virtual ~ErrorMetric() = default;
+
+  /**
+   * @brief abstract method for comparing first and second operands
+   */
+  virtual float compute(const WholeOutput &first, const WholeOutput &second) const = 0;
+};
+
+// Mean Absolute Error
+class MAEMetric final : public ErrorMetric
+{
+public:
+  /**
+   * @brief compare first and second operands in MAE (Mean Average Error metric)
+   */
+  float compute(const WholeOutput &first, const WholeOutput &second) const;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_ERROR_METRIC_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/Evaluator.cpp b/compiler/circle-mpqsolver/src/bisection/Evaluator.cpp
new file mode 100644 (file)
index 0000000..94d46a3
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Evaluator.h"
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <dio_hdf5/HDF5Importer.h>
+
+using namespace mpqsolver::bisection;
+
+using Shape = std::vector<loco::Dimension>;
+
+namespace
+{
+
+using namespace luci;
+
+template <typename NodeT> size_t get_tensor_size(const NodeT *node)
+{
+  uint32_t tensor_size = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    tensor_size *= node->dim(i).value();
+  return tensor_size;
+}
+
+WholeOutput compute_outputs(const luci::Module *module, const std::string &h5file)
+{
+  dio::hdf5::HDF5Importer importer{h5file};
+  importer.importGroup("value");
+
+  bool is_raw_data = importer.isRawData();
+
+  const auto num_records = importer.numData();
+  if (num_records == 0)
+    throw std::runtime_error("The input data file does not contain any record.");
+  const auto input_nodes = loco::input_nodes(module->graph());
+  const auto num_inputs = input_nodes.size();
+
+  WholeOutput dataset_output;
+
+  // Create interpreter.
+  luci_interpreter::Interpreter interpreter(module);
+  for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+  {
+    if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
+      throw std::runtime_error("Wrong number of inputs.");
+    for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+    {
+      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+      assert(input_node->index() == input_idx);
+
+      std::vector<char> input_data(get_tensor_size(input_node));
+
+      if (!is_raw_data)
+      {
+        loco::DataType dtype;
+        Shape shape;
+        importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data());
+      }
+      else
+      {
+        // Skip type/shape check for raw data
+        importer.readTensor(record_idx, input_idx, input_data.data());
+      }
+
+      interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+    }
+
+    interpreter.interpret();
+
+    Output nn_output;
+
+    // Get output.
+    const auto output_nodes = loco::output_nodes(module->graph());
+    for (size_t i = 0; i < module->graph()->outputs()->size(); i++)
+    {
+      const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+      Buffer output_data(get_tensor_size(output_node));
+      interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+      // output
+      nn_output.push_back(output_data);
+    }
+    dataset_output.push_back(nn_output);
+  }
+
+  return dataset_output;
+}
+
+} // namespace
+
+DatasetEvaluator::DatasetEvaluator(const luci::Module *ref_module, const std::string &h5file,
+                                   const ErrorMetric &metric)
+  : _ref_module(ref_module), _h5file(h5file), _metric(&metric)
+{
+  _ref_output = compute_outputs(_ref_module, _h5file);
+}
+
+void DatasetEvaluator::validate(const luci::Module *trgt_fq_module) const
+{
+  const auto output_nodes = loco::output_nodes(trgt_fq_module->graph());
+  for (size_t out_index = 0; out_index < output_nodes.size(); ++out_index)
+  {
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[out_index]);
+    loco::DataType out_dtype = output_node->dtype();
+    if (out_dtype != loco::DataType::FLOAT32)
+      throw std::runtime_error("Unsupported output dtype " + output_node->name());
+  }
+}
+
+float DatasetEvaluator::evaluate(const luci::Module *trgt_fq_module) const
+{
+  if (trgt_fq_module == nullptr)
+    throw std::runtime_error("Invalid target module");
+
+  if (_metric == nullptr)
+    throw std::runtime_error("Invalid metric");
+
+  validate(trgt_fq_module);
+
+  const WholeOutput &cur_output = compute_outputs(trgt_fq_module, _h5file);
+  float error = _metric->compute(_ref_output, cur_output);
+  return error;
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/Evaluator.h b/compiler/circle-mpqsolver/src/bisection/Evaluator.h
new file mode 100644 (file)
index 0000000..144c86c
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_EVALUATOR_H__
+#define __MPQSOLVER_BISECTION_EVALUATOR_H__
+
+#include "ErrorMetric.h"
+
+#include <luci/IR/Module.h>
+#include <luci/CircleQuantizer.h>
+
+#include <string>
+#include <vector>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+class DatasetEvaluator final
+{
+public:
+  /**
+   * @brief create Evaluator for comparing output of ref_module on h5file
+   */
+  DatasetEvaluator(const luci::Module *ref_module, const std::string &h5file,
+                   const ErrorMetric &metric);
+  DatasetEvaluator() = delete;
+  ~DatasetEvaluator() = default;
+
+  /**
+   * @brief evaluate trgt_fq_module (fake-quantized)
+   * returns error-metric
+   */
+  float evaluate(const luci::Module *trgt_fq_module) const;
+
+private:
+  /**
+   * @brief throws if there is something wrong with the module
+   */
+  void validate(const luci::Module *module) const;
+
+private:
+  const luci::Module *_ref_module = nullptr;
+  std::string _h5file;
+  WholeOutput _ref_output;
+  const ErrorMetric *_metric = nullptr;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_EVALUATOR_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/Quantizer.cpp b/compiler/circle-mpqsolver/src/bisection/Quantizer.cpp
new file mode 100644 (file)
index 0000000..6fe1d56
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Quantizer.h"
+#include <luci/Service/Validate.h>
+
+#include <iostream>
+
+using namespace mpqsolver::bisection;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+
+namespace
+{
+
+bool make_model_fake_quantized(luci::Module *module)
+{
+  luci::CircleQuantizer quantizer;
+
+  auto options = quantizer.options();
+  options->enable(Algorithms::ConvertToFakeQuantizedModel);
+
+  for (size_t idx = 0; idx < module->size(); ++idx)
+  {
+    auto graph = module->graph(idx);
+    // quantize the graph
+    quantizer.quantize(graph);
+    if (!luci::validate(graph))
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+} // namespace
+
+Quantizer::Quantizer(const std::string &input_dtype, const std::string &output_dtype)
+  : _input_dtype(input_dtype), _output_dtype(output_dtype)
+{
+}
+
+/**
+ * @brief quantize recorded module (min/max initialized) with specified parameters
+ * returns true on success
+ */
+bool Quantizer::quantize(luci::Module *module, const std::string &quant_dtype,
+                         LayerParams &layer_params)
+{
+  if (!module)
+    return false;
+
+  static const std::string default_dtype = "float32";
+  static const std::string granularity_type = "channel";
+
+  luci::CircleQuantizer quantizer;
+
+  auto options = quantizer.options();
+  options->enable(Algorithms::QuantizeWithMinMax);
+
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, default_dtype);
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, quant_dtype);
+  options->param(AlgorithmParameters::Quantize_granularity, granularity_type);
+  options->param(AlgorithmParameters::Quantize_input_type, _input_dtype);
+  options->param(AlgorithmParameters::Quantize_output_type, _output_dtype);
+  options->param(AlgorithmParameters::Quantize_TF_style_maxpool, "False");
+
+  if (!layer_params.empty())
+  {
+    try
+    {
+      options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+    }
+    catch (const std::runtime_error &e)
+    {
+      std::cerr << e.what() << '\n';
+      return false;
+    }
+  }
+
+  for (size_t idx = 0; idx < module->size(); ++idx)
+  {
+    auto graph = module->graph(idx);
+    // quantize the graph
+    quantizer.quantize(graph);
+    if (!luci::validate(graph))
+    {
+      std::cerr << "ERROR: Quantized graph is invalid" << std::endl;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+/**
+ * @brief fake_quantize recorded module (min/max initialized) with specified parameters
+ * returns true on success
+ */
+bool Quantizer::fake_quantize(luci::Module *module, const std::string &quant_dtype,
+                              LayerParams &layer_params)
+{
+  if (!quantize(module, quant_dtype, layer_params))
+    return false;
+
+  if (!make_model_fake_quantized(module))
+    return false;
+
+  return true;
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/Quantizer.h b/compiler/circle-mpqsolver/src/bisection/Quantizer.h
new file mode 100644 (file)
index 0000000..ceaabf5
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_QUANTIZER_H__
+#define __MPQSOLVER_BISECTION_QUANTIZER_H__
+
+#include <luci/IR/Module.h>
+#include <luci/CircleQuantizer.h>
+
+#include <string>
+#include <vector>
+#include <memory>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+using LayerParams = std::vector<std::shared_ptr<LayerParam>>;
+
+class Quantizer
+{
+public:
+  Quantizer(const std::string &input_dtype, const std::string &output_type);
+
+  /**
+   * @brief quantize recorded module (min/max initialized) with specified parameters
+   * returns true on success
+   */
+  bool quantize(luci::Module *module, const std::string &quant_dtype, LayerParams &layer_params);
+
+  /**
+   * @brief fake_quantize recorded module (min/max initialized) with specified parameters
+   * returns true on success
+   */
+  bool fake_quantize(luci::Module *module, const std::string &quant_dtype,
+                     LayerParams &layer_params);
+
+private:
+  std::string _input_dtype = "uint8";
+  std::string _output_dtype = "uint8";
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_QUANTIZER_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/Quantizer.test.cpp b/compiler/circle-mpqsolver/src/bisection/Quantizer.test.cpp
new file mode 100644 (file)
index 0000000..8575156
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <gtest/gtest.h>
+
+#include "Quantizer.h"
+#include "TestHelper.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <cmath>
+
+namespace
+{
+
+class AddGraph final : public SimpleGraph
+{
+protected:
+  void initInput(loco::Node *input) override
+  {
+    auto ci_input = loco::must_cast<luci::CircleNode *>(input);
+    initMinMax(ci_input);
+  }
+
+  void initMinMax(luci::CircleNode *node)
+  {
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->min.assign(1, _a_min);
+    qparam->max.assign(1, _a_max);
+    node->quantparam(std::move(qparam));
+  }
+
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    _add = _g->nodes()->create<luci::CircleAdd>();
+    _beta = _g->nodes()->create<luci::CircleConst>();
+
+    _add->dtype(loco::DataType::FLOAT32);
+    _beta->dtype(loco::DataType::FLOAT32);
+
+    uint32_t channel_size = 16;
+    _add->shape({1, _channel_size, _width, _height});
+    _beta->shape({1, _channel_size, _width, _height});
+
+    _beta->size<loco::DataType::FLOAT32>(channel_size);
+    _add->x(input);
+    _add->y(_beta);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _add->name("add");
+    _beta->name("beta");
+    initMinMax(_add);
+
+    return _add;
+  }
+
+public:
+  float _a_min = -1.f;
+  float _a_max = 1.f;
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_beta = nullptr;
+};
+
+} // namespace
+
+TEST(CircleMPQSolverQuantizerTest, verifyResultsTest)
+{
+  auto m = luci::make_module();
+  AddGraph g;
+  g.init();
+  auto add = g._add;
+  float range = g._a_max - g._a_min;
+  g.transfer_to(m.get());
+
+  std::string def_quant = "uint8";
+  mpqsolver::bisection::Quantizer quantizer(def_quant, def_quant);
+  mpqsolver::bisection::LayerParams params;
+  auto res = quantizer.quantize(m.get(), def_quant, params);
+  EXPECT_TRUE(res);
+  auto quant_param = add->quantparam();
+  EXPECT_TRUE(quant_param != nullptr);
+  EXPECT_TRUE(quant_param->scale.size() == 1);
+  EXPECT_FLOAT_EQ(quant_param->scale[0], range / 255.f);
+  EXPECT_TRUE(quant_param->zerop.size() == 1);
+  EXPECT_TRUE(quant_param->zerop[0] == 128);
+}
+
+TEST(CircleMPQSolverQuantizerTest, verifyResultsTest_NEG)
+{
+  std::string def_quant = "uint8";
+  mpqsolver::bisection::Quantizer quantizer(def_quant, def_quant);
+  mpqsolver::bisection::LayerParams params;
+  auto res = quantizer.quantize(nullptr, def_quant, params);
+  EXPECT_TRUE(!res);
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/TestHelper.h b/compiler/circle-mpqsolver/src/bisection/TestHelper.h
new file mode 100644 (file)
index 0000000..f930738
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_TEST_HELPER_H__
+#define __MPQSOLVER_TEST_HELPER_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/Module.h>
+
+class SimpleGraph
+{
+public:
+  SimpleGraph() : _g(loco::make_graph()) {}
+
+public:
+  void init()
+  {
+    _input = _g->nodes()->create<luci::CircleInput>();
+    _output = _g->nodes()->create<luci::CircleOutput>();
+    _input->name("input");
+    _output->name("output");
+
+    auto graph_input = _g->inputs()->create();
+    _input->index(graph_input->index());
+    auto graph_output = _g->outputs()->create();
+    _output->index(graph_output->index());
+
+    graph_input->dtype(loco::DataType::FLOAT32);
+    _input->dtype(loco::DataType::FLOAT32);
+    _output->dtype(loco::DataType::FLOAT32);
+    graph_output->dtype(loco::DataType::FLOAT32);
+
+    graph_input->shape({1, _channel_size, _width, _height});
+    _input->shape({1, _channel_size, _width, _height});
+    _output->shape({1, _channel_size, _width, _height});
+    graph_output->shape({1, _channel_size, _width, _height});
+
+    auto graph_body = insertGraphBody(_input);
+    _output->from(graph_body);
+
+    initInput(_input);
+  }
+
+  virtual ~SimpleGraph() = default;
+  void transfer_to(luci::Module *module)
+  {
+    // WARNING: after g is transfered, _graph_inputs, _inputs
+    //          and _graph_outputs, _outputs in TestOsGraphlet will be invalid.
+    //          arrays are not cleared as this is just helpers to unit tests
+    module->add(std::move(_g));
+  }
+
+protected:
+  virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+  virtual void initInput(loco::Node *input){};
+
+public:
+  std::unique_ptr<loco::Graph> _g;
+  luci::CircleInput *_input = nullptr;
+  luci::CircleOutput *_output = nullptr;
+  uint32_t _channel_size = 16;
+  uint32_t _width = 4;
+  uint32_t _height = 4;
+};
+
+#endif //__MPQSOLVER_TEST_HELPER_H__
diff --git a/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.cpp b/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.cpp
new file mode 100644 (file)
index 0000000..822df89
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VISQErrorApproximator.h"
+
+#include <fstream>
+
+using namespace mpqsolver::bisection;
+
+void VISQErrorApproximator::init(const std::string &visq_data_path)
+{
+  // read file
+  std::ifstream file(visq_data_path);
+  if (!init(file))
+  {
+    throw std::runtime_error("Invalid visq file " + visq_data_path);
+  }
+}
+
+bool VISQErrorApproximator::init(std::istream &)
+{
+  // TODO
+  return true;
+}
+
+float VISQErrorApproximator::approximate(const std::string &node_name) const
+{
+  auto iter = _layer_errors.find(node_name);
+  if (iter == _layer_errors.end())
+  {
+    return 0.f;
+  }
+
+  return iter->second;
+}
diff --git a/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.h b/compiler/circle-mpqsolver/src/bisection/VISQErrorApproximator.h
new file mode 100644 (file)
index 0000000..855bca8
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
+#define __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
+
+#include <string>
+#include <map>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+class VISQErrorApproximator final
+{
+public:
+  /**
+   * @brief constructor of VISQErrorApproximator
+   */
+  VISQErrorApproximator() = default;
+
+  /**
+   * @brief initiliaze by visq_data_path (throws on failure)
+   */
+  void init(const std::string &visq_data_path);
+
+  /**
+   * @brief approximate error introduced while quantizing node into Q8
+   */
+  float approximate(const std::string &node_name) const;
+
+private:
+  /**
+   * @brief initiliaze by visq_data (returns success)
+   */
+  bool init(std::istream &visq_data);
+
+private:
+  std::string _visq_data_path;
+  std::map<std::string, float> _layer_errors;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif // __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
index 29c6f37..351eda8 100644 (file)
@@ -20,7 +20,7 @@
 #include <fstream>
 #include <vector>
 
-class cirlce_operator_test : public ::testing::Test
+class circle_operator_test : public ::testing::Test
 {
 protected:
   bool initialize(void);
@@ -35,7 +35,7 @@ protected:
   std::string _result;
 };
 
-bool cirlce_operator_test::initialize(void)
+bool circle_operator_test::initialize(void)
 {
   char *path = std::getenv("ARTIFACTS_PATH");
   if (path == nullptr)
@@ -56,7 +56,7 @@ bool cirlce_operator_test::initialize(void)
   return true;
 }
 
-bool cirlce_operator_test::run(const std::string &command)
+bool circle_operator_test::run(const std::string &command)
 {
   std::vector<char> buffer(260);
   std::string result = "";
@@ -86,7 +86,7 @@ bool cirlce_operator_test::run(const std::string &command)
   return true;
 }
 
-bool cirlce_operator_test::load(const std::string &file)
+bool circle_operator_test::load(const std::string &file)
 {
   std::ifstream tmp(file.c_str());
   if (tmp.fail())
@@ -98,7 +98,7 @@ bool cirlce_operator_test::load(const std::string &file)
   return true;
 }
 
-TEST_F(cirlce_operator_test, valid_names)
+TEST_F(circle_operator_test, valid_names)
 {
   if (!initialize())
   {
@@ -118,7 +118,7 @@ TEST_F(cirlce_operator_test, valid_names)
   ASSERT_NE(std::string::npos, pos);
 }
 
-TEST_F(cirlce_operator_test, valid_codes)
+TEST_F(circle_operator_test, valid_codes)
 {
   if (!initialize())
   {
@@ -138,7 +138,7 @@ TEST_F(cirlce_operator_test, valid_codes)
   ASSERT_NE(std::string::npos, pos);
 }
 
-TEST_F(cirlce_operator_test, invalid_option_NEG)
+TEST_F(circle_operator_test, invalid_option_NEG)
 {
   if (!initialize())
   {
@@ -158,7 +158,7 @@ TEST_F(cirlce_operator_test, invalid_option_NEG)
   ASSERT_NE(std::string::npos, pos);
 }
 
-TEST_F(cirlce_operator_test, check_code_name)
+TEST_F(circle_operator_test, check_code_name)
 {
   if (!initialize())
   {
@@ -180,7 +180,7 @@ TEST_F(cirlce_operator_test, check_code_name)
   ASSERT_NE(std::string::npos, pos2);
 }
 
-TEST_F(cirlce_operator_test, nonexist_file_NEG)
+TEST_F(circle_operator_test, nonexist_file_NEG)
 {
   if (!initialize())
   {
@@ -200,7 +200,7 @@ TEST_F(cirlce_operator_test, nonexist_file_NEG)
   ASSERT_NE(std::string::npos, pos);
 }
 
-TEST_F(cirlce_operator_test, invalid_file_NEG)
+TEST_F(circle_operator_test, invalid_file_NEG)
 {
   if (!initialize())
   {
@@ -220,7 +220,7 @@ TEST_F(cirlce_operator_test, invalid_file_NEG)
   ASSERT_NE(std::string::npos, pos);
 }
 
-TEST_F(cirlce_operator_test, output_file)
+TEST_F(circle_operator_test, output_file)
 {
   if (!initialize())
   {
index 93ab84c..3f7f424 100644 (file)
@@ -14,6 +14,7 @@ target_link_libraries(circle-opselector luci_export)
 target_link_libraries(circle-opselector arser)
 target_link_libraries(circle-opselector vconone)
 target_link_libraries(circle-opselector luci_service)
+target_link_libraries(circle-opselector luci_partition)
 target_link_libraries(circle-opselector luci_profile)
 
 install(TARGETS circle-opselector DESTINATION bin)
@@ -33,4 +34,6 @@ target_link_libraries(circle-opselector-test luci_export)
 target_link_libraries(circle-opselector-test arser)
 target_link_libraries(circle-opselector-test vconone)
 target_link_libraries(circle-opselector-test luci_service)
+target_link_libraries(circle-opselector-test luci_partition)
 target_link_libraries(circle-opselector-test luci_profile)
+target_link_libraries(circle-opselector-test luci_testhelper)
index 4b39a6d..6549fc7 100644 (file)
  */
 
 #include "ModuleIO.h"
+#include "OpSelector.h"
 
+#include <luci/ConnectNode.h>
 #include <luci/Profile/CircleNodeID.h>
+#include <luci/Service/CircleNodeClone.h>
 
 #include <arser/arser.h>
 #include <vconone/vconone.h>
@@ -35,124 +38,6 @@ void print_version(void)
   std::cout << vconone::get_copyright() << std::endl;
 }
 
-std::vector<std::string> split_into_vector(const std::string &str, const char &delim)
-{
-  std::vector<std::string> ret;
-  std::istringstream is(str);
-  for (std::string item; std::getline(is, item, delim);)
-  {
-    ret.push_back(item);
-  }
-
-  // remove empty string
-  ret.erase(std::remove_if(ret.begin(), ret.end(), [](const std::string &s) { return s.empty(); }),
-            ret.end());
-
-  return ret;
-}
-
-bool is_number(const std::string &s)
-{
-  return !s.empty() && std::find_if(s.begin(), s.end(),
-                                    [](unsigned char c) { return !std::isdigit(c); }) == s.end();
-}
-
-bool is_number(const std::vector<std::string> &vec)
-{
-  for (const auto &s : vec)
-  {
-    if (not::is_number(s))
-    {
-      return false;
-    }
-  }
-  return true;
-}
-
-/**
- * @brief  Segmentation function for user's '--by_id' input
- *
- * @note   This function tokenizes the input data.s
- *         First, divide it into ',', and if token has '-', devide it once more into '-'.
- *         For example, if user input is '12,34,56', it is devided into [12,34,56].
- *         If input is '1-2,34,56', it is devided into [[1,2],34,56].
- *         And '-' means range so, if input is '2-7', it means all integer between 2-7.
- */
-std::vector<uint32_t> split_id_input(const std::string &str)
-{
-  std::vector<uint32_t> by_id;
-
-  // tokenize colon-separated string
-  auto colon_tokens = ::split_into_vector(str, ',');
-  if (colon_tokens.empty()) // input empty line like "".
-  {
-    std::cerr << "ERROR: Nothing was entered." << std::endl;
-    exit(EXIT_FAILURE);
-  }
-  for (const auto &ctok : colon_tokens)
-  {
-    auto dash_tokens = ::split_into_vector(ctok, '-');
-    if (not::is_number(dash_tokens))
-    {
-      std::cerr << "ERROR: To select operator by id, please use these args: [0-9], '-', ','"
-                << std::endl;
-      exit(EXIT_FAILURE);
-    }
-    // convert string into integer
-    std::vector<uint32_t> int_tokens;
-    try
-    {
-      std::transform(dash_tokens.begin(), dash_tokens.end(), std::back_inserter(int_tokens),
-                     [](const std::string &str) { return static_cast<uint32_t>(std::stoi(str)); });
-    }
-    catch (const std::out_of_range &)
-    {
-      // if input is big integer like '123467891234', stoi throw this exception.
-      std::cerr << "ERROR: Argument is out of range." << std::endl;
-      exit(EXIT_FAILURE);
-    }
-    catch (...)
-    {
-      std::cerr << "ERROR: Unknown error" << std::endl;
-      exit(EXIT_FAILURE);
-    }
-
-    switch (int_tokens.size())
-    {
-      case 0: // inputs like "-"
-      {
-        std::cerr << "ERROR: Nothing was entered" << std::endl;
-        exit(EXIT_FAILURE);
-      }
-      case 1: // inputs like "1", "2"
-      {
-        by_id.push_back(int_tokens.at(0));
-        break;
-      }
-      case 2: // inputs like "1-2", "11-50"
-      {
-        for (uint32_t i = int_tokens.at(0); i <= int_tokens.at(1); i++)
-        {
-          by_id.push_back(i);
-        }
-        break;
-      }
-      default: // inputs like "1-2-3"
-      {
-        std::cerr << "ERROR: Too many '-' in str." << std::endl;
-        exit(EXIT_FAILURE);
-      }
-    }
-  }
-
-  return by_id;
-}
-
-std::vector<std::string> split_name_input(const std::string &str)
-{
-  return ::split_into_vector(str, ',');
-}
-
 int entry(int argc, char **argv)
 {
   // TODO Add new option names!
@@ -184,11 +69,6 @@ int entry(int argc, char **argv)
   std::string input_path = arser.get<std::string>("input");
   std::string output_path = arser.get<std::string>("output");
 
-  std::string operator_input;
-
-  std::vector<uint32_t> by_id;
-  std::vector<std::string> by_name;
-
   if (!arser["--by_id"] && !arser["--by_name"] || arser["--by_id"] && arser["--by_name"])
   {
     std::cerr << "ERROR: Either option '--by_id' or '--by_name' must be specified" << std::endl;
@@ -196,69 +76,33 @@ int entry(int argc, char **argv)
     return EXIT_FAILURE;
   }
 
-  if (arser["--by_id"])
-  {
-    operator_input = arser.get<std::string>("--by_id");
-    by_id = split_id_input(operator_input);
-  }
-  if (arser["--by_name"])
-  {
-    operator_input = arser.get<std::string>("--by_name");
-    by_name = split_name_input(operator_input);
-  }
-
   // Import original circle file.
   auto module = opselector::getModule(input_path);
 
-  // Select nodes from user input.
-  std::vector<const luci::CircleNode *> selected_nodes;
-
-  // put selected nodes into vector.
-  if (by_id.size())
+  // TODO support two or more subgraphs
+  if (module.get()->size() != 1)
   {
-    loco::Graph *graph = module.get()->graph(0); // get main subgraph.
+    std::cerr << "ERROR: Not support two or more subgraphs" << std::endl;
+    return EXIT_FAILURE;
+  }
 
-    for (auto node : loco::all_nodes(graph))
-    {
-      auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+  opselector::OpSelector op_selector{module.get()};
 
-      try
-      {
-        auto node_id = luci::get_node_id(cnode); // if the node is not operator, throw runtime_error
+  std::unique_ptr<luci::Module> new_module;
+  std::string operator_input;
 
-        for (auto selected_id : by_id)
-          if (selected_id == node_id) // find the selected id
-            selected_nodes.emplace_back(cnode);
-      }
-      catch (std::runtime_error)
-      {
-        continue;
-      }
-    }
-  }
-  if (by_name.size())
+  if (arser["--by_id"])
   {
-    loco::Graph *graph = module.get()->graph(0); // get main subgraph.
-
-    for (auto node : loco::all_nodes(graph))
-    {
-      auto cnode = loco::must_cast<const luci::CircleNode *>(node);
-      std::string node_name = cnode->name();
-
-      for (auto selected_name : by_name)
-        if (selected_name.compare(node_name) == 0) // find the selected name
-          selected_nodes.emplace_back(cnode);
-    }
+    operator_input = arser.get<std::string>("--by_id");
+    new_module = op_selector.select_by<opselector::SelectType::ID>(operator_input);
   }
-  if (selected_nodes.size() == 0)
+  if (arser["--by_name"])
   {
-    std::cerr << "ERROR: No operator selected" << std::endl;
-    exit(EXIT_FAILURE);
+    operator_input = arser.get<std::string>("--by_name");
+    new_module = op_selector.select_by<opselector::SelectType::NAME>(operator_input);
   }
-  // TODO implement node selections
 
-  // Export to output Circle file
-  assert(opselector::exportModule(module.get(), output_path));
+  assert(opselector::exportModule(new_module.get(), output_path));
 
   return 0;
 }
diff --git a/compiler/circle-opselector/src/Driver.test.cpp b/compiler/circle-opselector/src/Driver.test.cpp
deleted file mode 100644 (file)
index 6e56908..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Driver.test.h"
-#include "TestHelper.h"
-
-#include <gtest/gtest.h>
-
-TEST(DriverTest, NoArg_NEG)
-{
-  Argv<1> argv;
-  argv.add("circle-opselector");
-
-  ::testing::internal::CaptureStderr();
-  ::testing::internal::CaptureStdout();
-  int result = entry(1, argv.argv());
-  ::testing::internal::GetCapturedStdout();
-  ASSERT_EQ(EXIT_FAILURE, result);
-}
-
-TEST(DriverTest, Wrong_ID_NEG)
-{
-  std::string str1 = "1";
-  std::string empty = "";
-  std::string no_integer = "1531538X5";
-
-  ASSERT_EQ(true, is_number(str1));
-  ASSERT_EQ(false, is_number(empty));
-  ASSERT_EQ(false, is_number(no_integer));
-}
-
-TEST(DriverTest, Split)
-{
-  std::vector<uint32_t> vec1;
-  std::vector<uint32_t> vec2;
-
-  std::string hyphen = "1-3,8-10";
-  std::string comma = "1,2,3";
-
-  vec1.push_back(1);
-  vec1.push_back(2);
-  vec1.push_back(3);
-  vec1.push_back(8);
-  vec1.push_back(9);
-  vec1.push_back(10);
-
-  vec2.push_back(1);
-  vec2.push_back(2);
-  vec2.push_back(3);
-
-  ASSERT_EQ(vec1, split_id_input(hyphen));
-  ASSERT_EQ(vec2, split_id_input(comma));
-}
diff --git a/compiler/circle-opselector/src/Driver.test.h b/compiler/circle-opselector/src/Driver.test.h
deleted file mode 100644 (file)
index 06f1516..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
-#define __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
-
-#include <vector>
-#include <string>
-
-int entry(int argc, char **argv);
-bool is_number(const std::string &s);
-std::vector<uint32_t> split_id_input(const std::string &str);
-
-#endif // __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
diff --git a/compiler/circle-opselector/src/OpSelector.cpp b/compiler/circle-opselector/src/OpSelector.cpp
new file mode 100644 (file)
index 0000000..31cbf9c
--- /dev/null
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpSelector.h"
+
+#include <luci/ConnectNode.h>
+#include <luci/Profile/CircleNodeID.h>
+#include <luci/Service/CircleNodeClone.h>
+
+#include <algorithm>
+#include <cassert>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+/**
+ * @brief Tokenize given string
+ *
+ * Assumes given string looks like below.
+ *
+ * - '1,2,5,7,9'
+ * - '1-5,6,7,9,12-14'
+ * - 'tensor_a,tensor_b,tensor_d'
+ *
+ * NOTE. 1-5 is same with '1,2,3,4,5'.
+ *
+ * WARNING. SelectType::NAME doesn't allow '-' like 'tensor_a-tensor_c'.
+ */
+std::vector<std::string> split_into_vector(const std::string &str, const char &delim)
+{
+  std::vector<std::string> ret;
+  std::istringstream is(str);
+  for (std::string item; std::getline(is, item, delim);)
+  {
+    ret.push_back(item);
+  }
+
+  // Remove empty string
+  ret.erase(std::remove_if(ret.begin(), ret.end(), [](const std::string &s) { return s.empty(); }),
+            ret.end());
+
+  return ret;
+}
+
+bool is_number(const std::string &s)
+{
+  return !s.empty() && std::find_if(s.begin(), s.end(),
+                                    [](unsigned char c) { return !std::isdigit(c); }) == s.end();
+}
+
+bool is_number(const std::vector<std::string> &vec)
+{
+  for (const auto &s : vec)
+  {
+    if (not::is_number(s))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+// TODO Move this class into a separate header for reuse
+class IsMultiOutputNode final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  bool visit(const luci::CircleCustom *) final { return true; }
+  bool visit(const luci::CircleIf *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV4 *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV5 *) final { return true; }
+  bool visit(const luci::CircleSplit *) final { return true; }
+  bool visit(const luci::CircleSplitV *) final { return true; }
+  bool visit(const luci::CircleTopKV2 *) final { return true; }
+  bool visit(const luci::CircleUnique *) final { return true; }
+  bool visit(const luci::CircleUnpack *) final { return true; }
+  bool visit(const luci::CircleWhile *) final { return true; }
+  // default is false
+  bool visit(const luci::CircleNode *) final { return false; }
+};
+
+std::unique_ptr<loco::Graph> make_graph(const std::vector<const luci::CircleNode *> nodes)
+{
+  auto graph = loco::make_graph();
+
+  luci::CloneContext ctx;
+  // clone nodes
+  for (const auto &n : nodes)
+  {
+    auto clone = luci::clone_node(n, graph.get());
+    ctx.emplace(n, clone);
+  }
+  // set graph input
+  for (const auto &n : nodes)
+  {
+    for (uint32_t i = 0; i < n->arity(); i++)
+    {
+      auto arg = n->arg(i);
+      auto input_node = dynamic_cast<luci::CircleNode *>(arg);
+      auto ctx_it = ctx.find(input_node);
+      // check if the node already has been cloned
+      if (ctx_it != ctx.end())
+        continue;
+      // the node isn't graph input if it is an other node's input
+      if (std::find(nodes.begin(), nodes.end(), arg) != nodes.end())
+        continue;
+      auto circle_const = dynamic_cast<luci::CircleConst *>(arg);
+      if (circle_const != nullptr)
+      {
+        auto clone = luci::clone_node(circle_const, graph.get());
+        ctx.emplace(circle_const, clone);
+      }
+      else
+      {
+        // circle input
+        auto circle_input = graph->nodes()->create<luci::CircleInput>();
+        input_node = dynamic_cast<luci::CircleNode *>(arg);
+        if (not input_node)
+        {
+          throw std::runtime_error{"ERROR: Invalid graph"};
+        }
+        luci::copy_common_attributes(input_node, circle_input);
+        ctx.emplace(input_node, circle_input);
+        // graph input
+        auto graph_input = graph->inputs()->create();
+        graph_input->name(circle_input->name());
+        graph_input->dtype(circle_input->dtype());
+        // graph input shape
+        auto input_shape = std::make_unique<loco::TensorShape>();
+        input_shape->rank(circle_input->rank());
+        for (uint32_t i = 0; i < circle_input->rank(); i++)
+        {
+          if (circle_input->dim(i).known())
+          {
+            circle_input->dim(i).set(circle_input->dim(i).value());
+          }
+        }
+        graph_input->shape(std::move(input_shape));
+
+        circle_input->index(graph_input->index());
+      }
+    }
+  }
+  // set graph output
+  for (auto &n : nodes)
+  {
+    auto outputs = loco::succs(n);
+    bool beingUsed = false;
+    for (const auto &o : outputs)
+    {
+      if (std::find(nodes.begin(), nodes.end(), o) != nodes.end())
+      {
+        beingUsed = true;
+        break;
+      }
+    }
+    // the node isn't graph output if it is an other node's output
+    if (beingUsed)
+      continue;
+
+    IsMultiOutputNode multiout_visitor;
+    bool isMultiOut = n->accept(&multiout_visitor);
+    for (auto &o : outputs)
+    {
+      const luci::CircleNode *output_node = nullptr;
+      if (isMultiOut)
+      {
+        output_node = dynamic_cast<const luci::CircleNode *>(o);
+        if (not output_node)
+        {
+          throw std::runtime_error{"ERROR: Invalid graph"};
+        }
+      }
+      else
+      {
+        output_node = n;
+      }
+      // circle output
+      auto circle_output = graph->nodes()->create<luci::CircleOutput>();
+      luci::copy_common_attributes(output_node, circle_output);
+      // connect to cloned output node
+      circle_output->from(ctx.find(output_node)->second);
+      // graph output
+      auto graph_output = graph->outputs()->create();
+      graph_output->name(output_node->name());
+      graph_output->dtype(output_node->dtype());
+      // graph output shape
+      auto output_shape = std::make_unique<loco::TensorShape>();
+      output_shape->rank(circle_output->rank());
+      for (uint32_t i = 0; i < output_shape->rank(); i++)
+      {
+        if (circle_output->dim(i).known())
+        {
+          output_shape->dim(i).set(circle_output->dim(i).value());
+        }
+      }
+      graph_output->shape(std::move(output_shape));
+
+      circle_output->index(graph_output->index());
+      if (not isMultiOut)
+        break;
+    }
+  }
+  // connect nodes
+  for (const auto &n : nodes)
+  {
+    luci::clone_connect(n, ctx);
+  }
+
+  return graph;
+}
+
+} // namespace
+
+namespace opselector
+{
+
+OpSelector::OpSelector(const luci::Module *module) : _module{module}
+{
+  if (_module->size() != 1)
+  {
+    throw std::runtime_error{"ERROR: Not support two or more subgraphs"};
+  }
+}
+
+template <>
+std::vector<const luci::CircleNode *>
+OpSelector::select_by<SelectType::ID>(const std::vector<std::string> &comma_tokens)
+{
+  std::vector<uint32_t> by_id;
+
+  for (const auto &comma_token : comma_tokens)
+  {
+    auto dash_tokens = ::split_into_vector(comma_token, '-');
+    if (not::is_number(dash_tokens))
+    {
+      throw std::runtime_error{
+        "ERROR: To select operator by id, please use these args: [0-9], '-', ','"};
+    }
+
+    // Convert string into integer
+    std::vector<uint32_t> int_tokens;
+    try
+    {
+      std::transform(dash_tokens.begin(), dash_tokens.end(), std::back_inserter(int_tokens),
+                     [](const std::string &str) { return static_cast<uint32_t>(std::stoi(str)); });
+    }
+    catch (const std::out_of_range &)
+    {
+      // Uf input is big integer like '123467891234', stoi throws this exception.
+      throw std::runtime_error{"ERROR: Argument is out of range."};
+    }
+    catch (...)
+    {
+      throw std::runtime_error{"ERROR: Unknown error"};
+    }
+
+    switch (int_tokens.size())
+    {
+      case 0: // inputs like "-"
+      {
+        throw std::runtime_error{"ERROR: Nothing was entered"};
+      }
+      case 1: // inputs like "1", "2"
+      {
+        by_id.push_back(int_tokens.at(0));
+        break;
+      }
+      case 2: // inputs like "1-2", "11-50"
+      {
+        for (uint32_t i = int_tokens.at(0); i <= int_tokens.at(1); i++)
+        {
+          by_id.push_back(i);
+        }
+        break;
+      }
+      default: // inputs like "1-2-3"
+      {
+        throw std::runtime_error{"ERROR: Too many '-' in str."};
+      }
+    }
+  }
+
+  loco::Graph *graph = _module->graph(0);
+  std::vector<const luci::CircleNode *> selected_nodes;
+
+  for (auto node : loco::all_nodes(graph))
+  {
+    auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+
+    try
+    {
+      auto node_id = luci::get_node_id(cnode);
+      for (auto selected_id : by_id)
+      {
+        if (selected_id == node_id)
+        {
+          selected_nodes.emplace_back(cnode);
+        }
+      }
+    }
+    catch (const std::runtime_error &)
+    {
+      continue;
+    }
+  }
+
+  return selected_nodes;
+}
+
+template <>
+std::vector<const luci::CircleNode *>
+OpSelector::select_by<SelectType::NAME>(const std::vector<std::string> &tokens)
+{
+  loco::Graph *graph = _module->graph(0);
+  std::vector<const luci::CircleNode *> selected_nodes;
+
+  for (auto node : loco::all_nodes(graph))
+  {
+    auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+    std::string node_name = cnode->name();
+
+    for (auto selected_name : tokens)
+      if (selected_name.compare(node_name) == 0) // find the selected name
+        selected_nodes.emplace_back(cnode);
+  }
+
+  return selected_nodes;
+}
+
+template <SelectType SELECT_TYPE>
+std::unique_ptr<luci::Module> OpSelector::select_by(const std::string &str)
+{
+  auto colon_tokens = ::split_into_vector(str, ',');
+  if (colon_tokens.empty())
+  {
+    throw std::runtime_error{"ERROR: Nothing was entered."};
+  }
+
+  assert(_module->size() == 1);
+
+  auto selected_nodes = select_by<SELECT_TYPE>(colon_tokens);
+
+  // multiout node should be considered
+  IsMultiOutputNode multiout_visitor;
+  std::vector<const luci::CircleNode *> output_nodes;
+  for (const auto &node : selected_nodes)
+  {
+    if (node->accept(&multiout_visitor))
+    {
+      auto outputs = loco::succs(node);
+      for (auto &o : outputs)
+      {
+        output_nodes.push_back(dynamic_cast<luci::CircleNode *>(o));
+      }
+    }
+  }
+  selected_nodes.insert(selected_nodes.end(), output_nodes.begin(), output_nodes.end());
+
+  auto new_module = std::make_unique<luci::Module>();
+  new_module->add(::make_graph(selected_nodes));
+
+  return new_module;
+}
+
+template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::ID>(const std::string &str);
+
+template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::NAME>(const std::string &str);
+
+} // namespace opselector
diff --git a/compiler/circle-opselector/src/OpSelector.h b/compiler/circle-opselector/src/OpSelector.h
new file mode 100644 (file)
index 0000000..c4366fa
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_OPSELECTOR_H__
+#define __CIRCLE_OPSELECTOR_OPSELECTOR_H__
+
+#include "SelectType.h"
+
+#include <luci/IR/Module.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <string>
+#include <vector>
+
+namespace opselector
+{
+
+class OpSelector final
+{
+private:
+  const luci::Module *_module;
+
+public:
+  OpSelector(const luci::Module *module);
+
+private:
+  template <SelectType SELECT_TYPE>
+  std::vector<const luci::CircleNode *> select_by(const std::vector<std::string> &tokens);
+
+public:
+  template <SelectType SELECT_TYPE> std::unique_ptr<luci::Module> select_by(const std::string &str);
+};
+
+extern template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::ID>(const std::string &str);
+extern template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::NAME>(const std::string &str);
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_OPSELECTOR_H__
diff --git a/compiler/circle-opselector/src/OpSelector.test.cpp b/compiler/circle-opselector/src/OpSelector.test.cpp
new file mode 100644 (file)
index 0000000..a5ccc03
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpSelector.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Conv-Donv graphlet
+ *
+ *   [Conv]
+ *      |
+ *   [Donv]
+ *
+ */
+class ConvDonvGraphlet
+{
+public:
+  void init(loco::Graph *g)
+  {
+    _conv_filter = g->nodes()->create<luci::CircleConst>();
+    _conv_filter->dtype(loco::DataType::FLOAT32);
+    _conv_filter->shape({16, 1, 1, 16});
+    _conv_filter->name("conv_filter");
+
+    _conv_bias = g->nodes()->create<luci::CircleConst>();
+    _conv_bias->dtype(loco::DataType::FLOAT32);
+    _conv_bias->shape({16});
+    _conv_bias->name("conv_bias");
+
+    _conv = g->nodes()->create<luci::CircleConv2D>();
+    _conv->padding(luci::Padding::SAME);
+    _conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _conv->dtype(loco::DataType::FLOAT32);
+    _conv->shape({1, 4, 4, 16});
+    _conv->name("conv");
+    _conv->filter(_conv_filter);
+    _conv->bias(_conv_bias);
+
+    _dconv_filter = g->nodes()->create<luci::CircleConst>();
+    _dconv_filter->dtype(loco::DataType::FLOAT32);
+    _dconv_filter->shape({16, 1, 1, 16});
+    _dconv_filter->name("dconv_filter");
+
+    _dconv_bias = g->nodes()->create<luci::CircleConst>();
+    _dconv_bias->dtype(loco::DataType::FLOAT32);
+    _dconv_bias->shape({16});
+    _dconv_bias->name("dconv_bias");
+
+    _dconv = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+    _dconv->input(_conv);
+    _dconv->depthMultiplier(1);
+    _dconv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _dconv->dtype(loco::DataType::FLOAT32);
+    _dconv->shape({1, 4, 4, 16});
+    _dconv->padding(luci::Padding::SAME);
+    _dconv->name("dconv");
+    _dconv->filter(_dconv_filter);
+    _dconv->bias(_dconv_bias);
+  }
+
+protected:
+  luci::CircleConv2D *_conv{nullptr};
+  luci::CircleConst *_conv_filter{nullptr};
+  luci::CircleConst *_conv_bias{nullptr};
+  luci::CircleDepthwiseConv2D *_dconv{nullptr};
+  luci::CircleConst *_dconv_filter{nullptr};
+  luci::CircleConst *_dconv_bias{nullptr};
+};
+
+class ConvDonvGraph : public luci::test::TestIOGraph, public ConvDonvGraphlet
+{
+public:
+  ConvDonvGraph()
+  {
+    luci::test::TestIOGraph::init({1, 4, 4, 16}, {1, 4, 4, 16});
+    ConvDonvGraphlet::init(g());
+
+    _conv->input(input());
+
+    output()->from(_dconv);
+  }
+
+  std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+} // namespace
+
+TEST(OpSelectorTest, select_by_name)
+{
+  auto m = luci::make_module();
+
+  ConvDonvGraph g;
+  g.transfer_to(m.get());
+
+  opselector::OpSelector op_selector{m.get()};
+
+  // Select conv only
+  auto conv_module = op_selector.select_by<opselector::SelectType::NAME>("conv");
+  ASSERT_EQ(1, conv_module->size());
+
+  auto conv_graph = conv_module->graph(0);
+  ASSERT_EQ(1, conv_graph->outputs()->size());
+
+  auto output_node1 = luci::output_node(conv_graph, 0);
+  auto conv = loco::must_cast<luci::CircleConv2D *>(output_node1->from());
+  EXPECT_STREQ("conv", conv->name().c_str());
+  auto conv_filter = loco::must_cast<luci::CircleConst *>(conv->filter());
+  EXPECT_STREQ("conv_filter", conv_filter->name().c_str());
+  auto conv_bias = loco::must_cast<luci::CircleConst *>(conv->bias());
+  EXPECT_STREQ("conv_bias", conv_bias->name().c_str());
+
+  // Select dconv only
+  auto dconv_module = op_selector.select_by<opselector::SelectType::NAME>("dconv");
+  ASSERT_EQ(1, dconv_module->size());
+
+  auto dconv_graph = dconv_module->graph(0);
+  ASSERT_EQ(1, dconv_graph->outputs()->size());
+
+  auto output_node2 = luci::output_node(dconv_graph, 0);
+  auto dconv = loco::must_cast<luci::CircleDepthwiseConv2D *>(output_node2->from());
+  EXPECT_STREQ("dconv", dconv->name().c_str());
+  auto dconv_filter = loco::must_cast<luci::CircleConst *>(dconv->filter());
+  EXPECT_STREQ("dconv_filter", dconv_filter->name().c_str());
+  auto dconv_bias = loco::must_cast<luci::CircleConst *>(dconv->bias());
+  EXPECT_STREQ("dconv_bias", dconv_bias->name().c_str());
+}
+
+TEST(OpSelectorTest, select_by_name_NEG)
+{
+  auto m = luci::make_module();
+
+  ConvDonvGraph g;
+  g.transfer_to(m.get());
+
+  opselector::OpSelector op_selector{m.get()};
+
+  EXPECT_ANY_THROW(op_selector.select_by<opselector::SelectType::NAME>(","));
+}
diff --git a/compiler/circle-opselector/src/SelectType.h b/compiler/circle-opselector/src/SelectType.h
new file mode 100644 (file)
index 0000000..46d5f09
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
+#define __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
+
+#include <string>
+
+namespace opselector
+{
+
+enum class SelectType
+{
+  ID,
+  NAME,
+};
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
index ffe1b89..c4bd200 100644 (file)
@@ -49,7 +49,7 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
 
   add_custom_command(OUTPUT ${TFLITE_DST_PATH}
     COMMAND ${CMAKE_COMMAND} -E copy "${TFLITE_SRC_PATH}" "${TFLITE_DST_PATH}"
-    DEPENDS ${TFLITE_SRC_PATH} ${PARTITIONER_OUTPUT_PATH}
+    DEPENDS ${TFLITE_SRC_PATH}
     COMMENT "Copy ${RECIPE_NAME}.tflite"
   )
   list(APPEND TEST_DEPS ${TFLITE_DST_PATH})
@@ -60,7 +60,7 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
 
   add_custom_command(OUTPUT ${CIRCLE_DST_PATH}
     COMMAND ${CMAKE_COMMAND} -E copy "${CIRCLE_SRC_PATH}" "${CIRCLE_DST_PATH}"
-    DEPENDS ${CIRCLE_SRC_PATH} ${PARTITIONER_OUTPUT_PATH}
+    DEPENDS ${CIRCLE_SRC_PATH}
     COMMENT "Copy ${RECIPE_NAME}.circle"
   )
   list(APPEND TEST_DEPS ${CIRCLE_DST_PATH})
@@ -72,7 +72,7 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
 
   add_custom_command(OUTPUT ${PART_DST_PATH}
     COMMAND ${CMAKE_COMMAND} -E copy "${PART_SRC_PATH}" "${PART_DST_PATH}"
-    DEPENDS ${PART_SRC_PATH} ${PARTITIONER_OUTPUT_PATH}
+    DEPENDS ${PART_SRC_PATH}
     COMMENT "Copy ${PART_FILE}"
   )
   list(APPEND TEST_DEPS ${PART_DST_PATH})
@@ -111,3 +111,13 @@ add_test(NAME circle_part_value_test
           "$<TARGET_FILE:circle_part_driver>"
           ${PARTITION_LIST}
 )
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  add_test(NAME circle_part_value_210_test
+    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
+            "${CMAKE_CURRENT_BINARY_DIR}"
+            "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+            "$<TARGET_FILE:circle_part_driver>"
+            ${PARTITION_LIST}
+  )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
index a3a2902..a9420d4 100644 (file)
@@ -82,6 +82,47 @@ macro(AddFakeQuant RECIPE)
   list(APPEND TEST_NAMES ${RECIPE})
 endmacro(AddFakeQuant)
 
+# Macro to quantize without quantize_dequantize_weights
+macro(AddSkipQDQW RECIPE)
+  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  set(QCONFIG_OPT "")
+  if(ARG_USE_QCONFIG)
+    set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json")
+  endif()
+
+  set(INPUT_DTYPE_OPT "")
+  if(ARG_INPUT_DTYPE)
+    set(INPUT_DTYPE_OPT "--input_type" "${ARG_INPUT_DTYPE}")
+  endif()
+
+  set(OUTPUT_DTYPE_OPT "")
+  if(ARG_OUTPUT_DTYPE)
+    set(OUTPUT_DTYPE_OPT "--output_type" "${ARG_OUTPUT_DTYPE}")
+  endif()
+
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+  set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle")
+  set(QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+  # Generate quantized .circle
+  add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:record-minmax> --input_model ${CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer>
+      --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY}
+      ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+      ${INPUT_DTYPE_OPT} ${OUTPUT_DTYPE_OPT}
+    DEPENDS
+      circle-quantizer
+      record-minmax
+      ${CIRCLE_PATH}
+    COMMENT "Generate ${RECIPE}.q.circle"
+  )
+
+  list(APPEND TEST_DEPS ${QUANT_CIRCLE_PATH})
+  list(APPEND TEST_NAMES ${RECIPE})
+endmacro(AddSkipQDQW)
+
 # Read "test.lst"
 include("test.lst")
 
index 6152549..396cf7c 100644 (file)
@@ -8,18 +8,18 @@ It tests non-functional conditions of a quantized circle model generated by circ
 
 2. Make a recipe (`test.recipe`) for fp32 model under the directory.
 
-3. Make a rule (`test.rule`) you want to test under the directory. (For more information on dredd-test-rules, see _dredd-rule-lib_ module.)
+3. Make a rule (`test.rule`) you want to test under the directory.
+(For more information on dredd-test-rules, see _dredd-rule-lib_ module.)
 
 4. Add test to `test.lst` in this module with `Add` macro.
-
-```
-Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG)
-```
-
-- `RECIPE_DIR`: Path to the directory where the recipe file is saved.
-- `DTYPE`: Default quantization dtype (uint8, int16)
-- `GRANULARITY`: Quantization granularity (channel, layer)
-- `USE_QCONFIG`: (Optional) Whether to use a quantization configuration file or not. If this is set, `test.qconf.json` should exist under `RECIPE_DIR`
+   ```
+   Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG)
+   ```
+   - `RECIPE_DIR`: Path to the directory where the recipe file is saved.
+   - `DTYPE`: Default quantization dtype (uint8, int16)
+   - `GRANULARITY`: Quantization granularity (channel, layer)
+   - `USE_QCONFIG`: (Optional) Whether to use a quantization configuration file or not.
+   If this is set, `test.qconf.json` should exist under `RECIPE_DIR`
 
 ## Example
 
index 58f89c7..1464083 100644 (file)
@@ -70,6 +70,8 @@ Add(Quant_Conv_001 DTYPE uint8 GRANULARITY channel OUTPUT_DTYPE float32)
 Add(Quant_Conv_002 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32 OUTPUT_DTYPE float32)
 
 AddFakeQuant(Quant_Add_000)
+AddFakeQuant(Quant_DepthToSpace_000)
+AddFakeQuant(Quant_SpaceToDepth_000)
 
 ## CIRCLE RECIPE
 
@@ -78,3 +80,7 @@ Add(Quant_InstanceNorm_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
 
 # MPQ Test (default: s16, target: u8)
 Add(Quant_InstanceNorm_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+
+# Test for quantization without QuantizeDequantizeWeights
+AddSkipQDQW(Quant_Conv_005 DTYPE uint8 GRANULARITY channel)
+AddSkipQDQW(Quant_Conv_006 DTYPE int16 GRANULARITY channel)
index f1e31ed..33a845c 100644 (file)
@@ -104,8 +104,6 @@ void print_version(void)
 
 int entry(int argc, char **argv)
 {
-  // Simple argument parser (based on map)
-  std::map<std::string, OptionHook> argparse;
   luci::CircleQuantizer quantizer;
 
   auto options = quantizer.options();
@@ -177,10 +175,14 @@ int entry(int argc, char **argv)
           "destination_tensor_name(string)");
 
   arser.add_argument("--input_type")
-    .help("Input type of quantized model (uint8, int16, or float32)");
+    .help("Input type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
+          "multiple inputs, "
+          "use comma-separated values. e.g., uint8,int16");
 
   arser.add_argument("--output_type")
-    .help("Output type of quantized model (uint8, int16, or float32)");
+    .help("Output type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
+          "multiple outputs, "
+          "use comma-separated values. e.g., uint8,int16");
 
   arser.add_argument(cfg).help("Path to the quantization configuration file");
 
index 85140a8..b6e03e1 100644 (file)
@@ -2,7 +2,8 @@
 
 It tests the non-functional conditions of the optimized circle binary resulting from circle2circle.
 
-This test basically refers to the _TensorFlowLiteRecipes_ resource. So you should add what you want to test to both of the resource and `test.lst`.
+This test basically refers to the _TensorFlowLiteRecipes_ resource.
+So you should add what you want to test to both of the resource and `test.lst`.
 
 ## Example
 
index a6f2786..4fb1e78 100644 (file)
@@ -20,9 +20,12 @@ Add(Net_Conv_FakeQuant_000 PASS remove_fakequant)
 Add(Net_Conv_QuantDequant_000 PASS remove_quantdequant)
 Add(Net_Conv_Min_Max_000 PASS transform_min_max_to_relu6)
 Add(Net_Conv_Min_Relu_000 PASS transform_min_relu_to_relu6)
+Add(Net_Conv_PReluGraph_000 PASS fuse_prelu)
 Add(Net_Conv_Relu6_000 PASS fuse_activation_function)
+Add(Net_Duplicate_Weights_000 PASS remove_duplicate_const)
 Add(Net_DwConv_BN_000 PASS fuse_batchnorm_with_dwconv)
 Add(Net_DwConv_BN_001 PASS fuse_batchnorm_with_dwconv)
+Add(Net_FullyConnected_Add_000 PASS fold_fully_connected)
 Add(Net_Reshape_Reshape_000 PASS remove_redundant_reshape)
 Add(Net_Squeeze_Squeeze_000 PASS substitute_squeeze_to_reshape)
 Add(Net_TConv_Add_000 PASS fuse_add_with_tconv)
@@ -33,6 +36,7 @@ Add(Net_TConv_BN_001 PASS fuse_batchnorm_with_tconv)
 Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv)
 Add(Net_TConv_BN_003 PASS fuse_batchnorm_with_tconv)
 Add(Net_TConv_BN_004 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_005 PASS fuse_batchnorm_with_tconv)
 Add(Net_InstanceNorm_001 PASS fuse_instnorm)
 Add(Net_InstanceNorm_003 PASS fuse_instnorm)
 Add(Net_InstanceNorm_004 PASS fuse_instnorm)
@@ -49,6 +53,7 @@ Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax)
 Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
 Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax)
 Add(FullyConnected_007 PASS replace_non_const_fc_with_batch_matmul)
+Add(FullyConnected_008 PASS replace_non_const_fc_with_batch_matmul)
 
 ## CIRCLE RECIPE
 
index 2899587..94831d1 100755 (executable)
@@ -21,7 +21,6 @@ source "${CONFIG_PATH}"
 
 echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
 echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
-echo "-- Found circle2circle: ${CIRCLE2CIRCLE_PATH}"
 echo "-- Found common-artifacts: ${RESOURCE_DIR}"
 
 TESTED=()
index f5cf0d7..9afb67d 100644 (file)
@@ -77,10 +77,14 @@ int entry(int argc, char **argv)
   add_switch(arser, "--fold_dequantize", "This will fold dequantize op");
   add_switch(arser, "--fold_dwconv",
              "This will fold Depthwise Convolution operator with constant inputs");
+  add_switch(arser, "--fold_fully_connected",
+             "This will fold FullyConnected operator with constant inputs");
   add_switch(arser, "--fold_gather", "This will fold Gather operator");
   add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
   add_switch(arser, "--forward_reshape_to_unaryop",
              "This will move Reshape after UnaryOp for centain condition");
+  add_switch(arser, "--forward_transpose_op",
+             "This will move Transpose Op forward if possible (for further optimization)");
   add_switch(arser, "--fuse_activation_function",
              "This will fuse Activation function to a preceding operator");
   add_switch(arser, "--fuse_add_with_fully_connected",
@@ -106,6 +110,8 @@ int entry(int argc, char **argv)
              "when the impact is known to be acceptable.");
   add_switch(arser, "--fuse_preactivation_batchnorm",
              "This will fuse BatchNorm operators of pre-activations to Convolution operator");
+  add_switch(arser, "--fuse_prelu", "This will fuse operators to PReLU operator");
+  add_switch(arser, "--remove_duplicate_const", "This will remove all duplicate constant nodes");
   add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators");
   add_switch(arser, "--remove_quantdequant", "This will remove Quantize-Dequantize sequence");
   add_switch(arser, "--remove_redundant_quantize", "This will remove redundant Quantize operators");
@@ -149,6 +155,7 @@ int entry(int argc, char **argv)
   add_switch(arser, "--substitute_transpose_to_reshape",
              "This will convert single input Transpose to Reshape");
   add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs");
+  add_switch(arser, "--unroll_unidirseqlstm", "Unroll UnidirectionalSequenceLSTM operator.");
   add_switch(arser, "--convert_nchw_to_nhwc",
              "Experimental: This will convert NCHW operators to NHWC under the assumption that "
              "input model is NCHW.");
@@ -216,12 +223,16 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::FoldDequantize);
   if (arser.get<bool>("--fold_dwconv"))
     options->enable(Algorithms::FoldDepthwiseConv2D);
+  if (arser.get<bool>("--fold_fully_connected"))
+    options->enable(Algorithms::FoldFullyConnected);
   if (arser.get<bool>("--fold_gather"))
     options->enable(Algorithms::FoldGather);
   if (arser.get<bool>("--fold_sparse_to_dense"))
     options->enable(Algorithms::FoldSparseToDense);
   if (arser.get<bool>("--forward_reshape_to_unaryop"))
     options->enable(Algorithms::ForwardReshapeToUnaryOp);
+  if (arser.get<bool>("--forward_transpose_op"))
+    options->enable(Algorithms::ForwardTransposeOp);
   if (arser.get<bool>("--fuse_activation_function"))
     options->enable(Algorithms::FuseActivationFunction);
   if (arser.get<bool>("--fuse_batchnorm_with_conv"))
@@ -244,8 +255,12 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::MakeBatchNormGammaPositive);
   if (arser.get<bool>("--fuse_preactivation_batchnorm"))
     options->enable(Algorithms::FusePreActivationBatchNorm);
+  if (arser.get<bool>("--fuse_prelu"))
+    options->enable(Algorithms::FusePRelu);
   if (arser.get<bool>("--fuse_transpose_with_mean"))
     options->enable(Algorithms::FuseTransposeWithMean);
+  if (arser.get<bool>("--remove_duplicate_const"))
+    options->enable(Algorithms::RemoveDuplicateConst);
   if (arser.get<bool>("--remove_fakequant"))
     options->enable(Algorithms::RemoveFakeQuant);
   if (arser.get<bool>("--remove_quantdequant"))
@@ -300,6 +315,8 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::TransformMinReluToRelu6Pass);
   if (arser.get<bool>("--expand_broadcast_const"))
     options->enable(Algorithms::ExpandBroadcastConst);
+  if (arser.get<bool>("--unroll_unidirseqlstm"))
+    options->enable(Algorithms::UnrollUnidirSeqLSTM);
 
   if (arser.get<bool>("--mute_warnings"))
     settings->set(luci::UserSettings::Key::MuteWarnings, true);
index 34a3a4d..7d89d45 100644 (file)
@@ -1,4 +1,9 @@
 #[[ Generate common python virtual enviornment ]]
+# NOTE find_package try to use at least python3.8 as follows depending on platform version
+#   Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+#   Ubuntu20.04; default python3.8
+#   Ubuntu22.04; default python3.10
+#   refer https://github.com/Samsung/ONE/issues/9962
 find_package(PythonInterp 3.8 QUIET)
 find_package(PythonLibs 3.8 QUIET)
 
@@ -14,6 +19,10 @@ endif()
 
 # Create python virtual environment with tensorflow 2.8.0
 set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+# TensorFlow 2.10.1 for Ubuntu22.04
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  set(VIRTUALENV_OVERLAY_TF_2_10_1 "${NNCC_OVERLAY_DIR}/venv_2_10_1")
+endif(ONE_UBUNTU_CODENAME_JAMMY)
 
 add_custom_command(
   OUTPUT ${VIRTUALENV_OVERLAY_TF_2_8_0}
@@ -24,6 +33,15 @@ add_custom_command(
 set(REQUIREMENTS_FILE "requirements.txt")
 set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}")
 
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  add_custom_command(
+    OUTPUT ${VIRTUALENV_OVERLAY_TF_2_10_1}
+    COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_10_1}
+  )
+  set(REQUIREMENTS_FILE "requirements.txt")
+  set(REQUIREMENTS_OVERLAY_PATH_TF_2_10_1 "${VIRTUALENV_OVERLAY_TF_2_10_1}/${REQUIREMENTS_FILE}")
+endif(ONE_UBUNTU_CODENAME_JAMMY)
+
 set(PYTHON_OVERLAY python3)
 if(PYTHON_EXECUTABLE MATCHES python3.8)
   set(PYTHON_OVERLAY python3.8)
@@ -43,6 +61,7 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
   COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
   COMMAND ${CMAKE_COMMAND} -E echo "protobuf==3.20.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+  COMMAND ${CMAKE_COMMAND} -E echo "pydot==1.4.2" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
   COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
           ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools
   COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
@@ -55,6 +74,27 @@ add_custom_target(common_artifacts_python_deps ALL
           ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
 )
 
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  add_custom_command(
+    OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+    COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.10.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==23.1.21" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "protobuf==3.19.6" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+    COMMAND ${CMAKE_COMMAND} -E echo "pydot==1.4.2" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+    COMMAND ${VIRTUALENV_OVERLAY_TF_2_10_1}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+            ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools
+    COMMAND ${VIRTUALENV_OVERLAY_TF_2_10_1}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+            ${PIP_OPTION_TRUSTED_HOST} install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1} --upgrade
+    DEPENDS ${VIRTUALENV_OVERLAY_TF_2_10_1}
+  )
+
+  add_custom_target(common_artifacts_python_u22_deps ALL
+    DEPENDS ${VIRTUALENV_OVERLAY_TF_2_10_1}
+            ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+  )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
+
 #[[ Generate common resources ]]
 # TODO add pbtxt
 nnas_find_package(HDF5 QUIET)
@@ -86,7 +126,7 @@ set(TEST_RECIPE_FILENAME "test.recipe")
 set(TEST_RULE_FILENAME "test.rule")
 set(TEST_QCONFIG_FILENAME "test.qconf.json")
 
-set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh")
+set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py")
 # Get test case list
 unset(RECIPES)
 file(GLOB TFLITE_SUBDIR RELATIVE ${TFLITE_RECIPE_REPO} ${TFLITE_RECIPE_REPO}/*)
@@ -270,8 +310,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
   list(APPEND TEST_DEPS ${NNPKG_DIR})
 
   add_custom_command(OUTPUT ${NNPKG_MODEL}
-    COMMAND ${MODEL2NNPKG} ${MODEL_PATH}
-    DEPENDS ${MODEL2NNPKG} ${MODEL_PATH} ${NNPKG_DIR}
+    COMMAND ${PYTHON_EXECUTABLE} ${MODEL2NNPKG} -m ${MODEL_PATH}
+    DEPENDS ${MODEL2NNPKG} ${MODEL_PATH}
     COMMENT "Generate ${RECIPE} nnpackage"
   )
   list(APPEND TEST_DEPS ${NNPKG_MODEL})
@@ -281,7 +321,6 @@ foreach(RECIPE IN ITEMS ${RECIPES})
     set(TC_DIRECTORY "${NNPKG_DIR}/metadata/tc")
     add_custom_command(OUTPUT ${TC_DIRECTORY}
       COMMAND ${CMAKE_COMMAND} -E make_directory ${TC_DIRECTORY}
-      DEPENDS ${NNPKG_DIR}
       COMMENT "Generate ${RECIPE} nnpackage test directory"
     )
     list(APPEND TEST_DEPS ${TC_DIRECTORY})
index 2275a42..f238e79 100644 (file)
@@ -5,7 +5,6 @@
 
 #[[ optimize : Exclude from circle optimization(circle2circle) ]]
 ## TensorFlowLiteRecipes
-optimize(UnidirectionalSequenceLSTM_001) # This recipe contains is_variable Tensor
 
 ## CircleRecipes
 
@@ -136,8 +135,7 @@ tcgenerate(Tile_000)
 tcgenerate(Tile_U8_000)
 tcgenerate(TopKV2_000)
 tcgenerate(TopKV2_001)
-tcgenerate(UnidirectionalSequenceLSTM_000) # runtime and luci-interpreter doesn't support UnidirectionalSequenceLSTM op yet
-tcgenerate(UnidirectionalSequenceLSTM_001) # runtime and luci-interpreter doesn't support UnidirectionalSequenceLSTM op yet
+tcgenerate(UnidirectionalSequenceLSTM_000) # This mode is just for Op creation, cannot run
 tcgenerate(Unique_000)
 tcgenerate(Unique_001)
 tcgenerate(Unique_002)
diff --git a/compiler/dalgona-test/.gitignore b/compiler/dalgona-test/.gitignore
new file mode 100644 (file)
index 0000000..19f2918
--- /dev/null
@@ -0,0 +1 @@
+test.local.lst
diff --git a/compiler/dalgona-test/CMakeLists.txt b/compiler/dalgona-test/CMakeLists.txt
new file mode 100644 (file)
index 0000000..a233831
--- /dev/null
@@ -0,0 +1,69 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+unset(DALGONA_SINGLE_OP_TEST)
+
+macro(singleOpTest NAME)
+  list(APPEND DALGONA_SINGLE_OP_TEST ${NAME})
+endmacro(singleOpTest)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+# Place test scripts in one place
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/GenH5RandomInputs.py" "${CMAKE_CURRENT_BINARY_DIR}/GenH5RandomInputs.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/SingleOperatorTest.py" "${CMAKE_CURRENT_BINARY_DIR}/SingleOperatorTest.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RandomDataGenerator.py" "${CMAKE_CURRENT_BINARY_DIR}/RandomDataGenerator.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/TestUtil.py" "${CMAKE_CURRENT_BINARY_DIR}/TestUtil.py" COPYONLY)
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+  OUTPUT ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'DALGONA_PATH=\"$<TARGET_FILE:dalgona>\"' >> ${TEST_CONFIG}
+  DEPENDS dalgona
+  COMMENT "Generate test configuration"
+)
+
+# Import pics module
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CMAKE_CURRENT_BINARY_DIR}/circle")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(dalgona_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+  NAME dalgona_single_op_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/TestSingleOp.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          "${NNCC_OVERLAY_DIR}/venv_2_8_0"
+          ${DALGONA_SINGLE_OP_TEST}
+)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  add_test(
+    NAME dalgona_single_op_210_test
+    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/TestSingleOp.sh"
+            "${TEST_CONFIG}"
+            "${ARTIFACTS_BIN_PATH}"
+            "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+            ${DALGONA_SINGLE_OP_TEST}
+  )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
diff --git a/compiler/dalgona-test/GenH5RandomInputs.py b/compiler/dalgona-test/GenH5RandomInputs.py
new file mode 100644 (file)
index 0000000..795cd5a
--- /dev/null
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import h5py as h5
+import numpy as np
+import argparse
+
+from circle.Model import Model
+from RandomDataGenerator import RandomDataGenerator
+
+#
+# This script generates a pack of random input data (.h5) expected by the input circle model
+#
+# Basic usage:
+#   gen_h5_random_inputs.py --model <path/to/circle/model> --num_data <number/of/data> --output <path/to/output/data>
+#   ex: gen_h5_random_inputs.py --model add.circle --num_data 3 --output add.circle.input.h5
+#   (This will create add.circle.input.h5 composed of three random inputs in the same directory as the model)
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--num_data', type=int, required=True)
+parser.add_argument('--output', type=str, required=True)
+args = parser.parse_args()
+
+model = args.model
+num_data = args.num_data
+output_path = args.output
+
+with open(model, 'rb') as f:
+    buf = f.read()
+    circle_model = Model.GetRootAsModel(buf, 0)
+
+# Assume one subgraph
+assert (circle_model.SubgraphsLength() == 1)
+graph = circle_model.Subgraphs(0)
+inputs = graph.InputsAsNumpy()
+
+# Create h5 file
+h5_file = h5.File(output_path, 'w')
+group = h5_file.create_group("value")
+group.attrs['desc'] = "Input data for " + model
+
+# Generate random data
+for i in range(num_data):
+    sample = group.create_group(str(i))
+    for j in range(len(inputs)):
+        input_index = inputs[j]
+        tensor = graph.Tensors(input_index)
+        g = RandomDataGenerator(tensor.ShapeAsNumpy())
+        input_data = g.gen(tensor.Type())
+        sample.create_dataset(str(j), data=input_data)
+
+h5_file.close()
diff --git a/compiler/dalgona-test/RandomDataGenerator.py b/compiler/dalgona-test/RandomDataGenerator.py
new file mode 100644 (file)
index 0000000..6fa9ab0
--- /dev/null
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import numpy as np
+from circle.TensorType import TensorType
+
+
+class RandomDataGenerator:
+    def __init__(self, shape):
+        self.shape = shape
+
+    def _unsupported_types(self):
+        raise RuntimeError('Unsupported data type')
+
+    def _gen_uint8(self):
+        return np.random.randint(0, high=256, size=self.shape, dtype=np.uint8)
+
+    def _gen_int16(self):
+        return np.random.randint(-32767, high=32768, size=self.shape, dtype=np.int16)
+
+    def _gen_float32(self):
+        return np.array(10 * np.random.random_sample(self.shape) - 5, np.float32)
+
+    def gen(self, dtype):
+        gen_book = dict()
+        gen_book[TensorType.UINT8] = self._gen_uint8
+        gen_book[TensorType.INT16] = self._gen_int16
+        gen_book[TensorType.FLOAT32] = self._gen_float32
+
+        return gen_book.get(dtype, self._unsupported_types)()
diff --git a/compiler/dalgona-test/SingleOperatorTest.py b/compiler/dalgona-test/SingleOperatorTest.py
new file mode 100644 (file)
index 0000000..9de77dc
--- /dev/null
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""Test for a model with a single operator"""
+
+from TestUtil import *
+
+from circle import Model
+from circle import BuiltinOptions
+from circle import BuiltinOperator
+from circle import Conv2DOptions
+from circle import DepthwiseConv2DOptions
+from circle import AddOptions
+from circle import FullyConnectedOptions
+from circle import TransposeConvOptions
+from circle import InstanceNormOptions
+from circle import SplitOptions
+
+
+class SingleOperatorTest(object):
+    def StartAnalysis(self, args):
+        """Called when the analysis starts"""
+        with open(args, 'rb') as f:
+            buffer = f.read()
+            self._model = Model.Model.GetRootAsModel(buffer, 0)
+
+        # Check model has one subgraph
+        assertTrue(self._model.SubgraphsLength() == 1, "Model has more than one subgraph")
+        graph = self._model.Subgraphs(0)
+
+        # Check model has one operator
+        assertTrue(graph.OperatorsLength() == 1, "Model has more than one operator")
+        self._op = graph.Operators(0)
+
+    def DefaultOpPost(self, name, opcode, inputs, output):
+        raise SystemExit('NYI operator: ' + str(opcode))
+
+    def testConv2D(self, padding, stride, dilation, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.CONV_2D)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.Conv2DOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = Conv2DOptions.Conv2DOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkPadding(padding, opt.Padding())
+        assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+        assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+        assertTrue(opt.DilationWFactor() == dilation['w'], "Dilation_w mismatches")
+        assertTrue(opt.DilationHFactor() == dilation['h'], "Dilation_w mismatches")
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def Conv2DPre(self, name, input, filter, bias, padding, stride, dilation, fused_act):
+        self.testConv2D(padding, stride, dilation, fused_act)
+
+    def Conv2DPost(self, name, input, filter, bias, padding, stride, dilation, output,
+                   fused_act):
+        self.testConv2D(padding, stride, dilation, fused_act)
+
+    def testAdd(self, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.ADD)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.AddOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = AddOptions.AddOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def AddPre(self, name, x, y, fused_act):
+        self.testAdd(fused_act)
+
+    def AddPost(self, name, x, y, output, fused_act):
+        self.testAdd(fused_act)
+
+    def testDepthwiseConv2D(self, padding, stride, depth_multiplier, dilation, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(),
+                    BuiltinOperator.BuiltinOperator.DEPTHWISE_CONV_2D)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.DepthwiseConv2DOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = DepthwiseConv2DOptions.DepthwiseConv2DOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkPadding(padding, opt.Padding())
+        assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+        assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+        assertTrue(opt.DepthMultiplier() == depth_multiplier,
+                   "Depth multiplier mismatches")
+        assertTrue(opt.DilationWFactor() == dilation['w'], "Dilation_w mismatches")
+        assertTrue(opt.DilationHFactor() == dilation['h'], "Dilation_w mismatches")
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def DepthwiseConv2DPre(self, name, input, filter, bias, padding, stride,
+                           depth_multiplier, dilation, fused_act):
+        self.testDepthwiseConv2D(padding, stride, depth_multiplier, dilation, fused_act)
+
+    def DepthwiseConv2DPost(self, name, input, filter, bias, padding, stride,
+                            depth_multiplier, dilation, output, fused_act):
+        self.testDepthwiseConv2D(padding, stride, depth_multiplier, dilation, fused_act)
+
+    def testFullyConnected(self, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.FULLY_CONNECTED)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.FullyConnectedOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = FullyConnectedOptions.FullyConnectedOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def FullyConnectedPre(self, name, input, weights, bias, fused_act):
+        self.testFullyConnected(fused_act)
+
+    def FullyConnectedPost(self, name, input, weights, bias, output, fused_act):
+        self.testFullyConnected(fused_act)
+
+    def testTransposeConv(self, padding, stride):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.TRANSPOSE_CONV)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.TransposeConvOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = TransposeConvOptions.TransposeConvOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        checkPadding(padding, opt.Padding())
+        assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+        assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+
+    def TransposeConvPre(self, name, input, filter, output_shape, bias, padding, stride):
+        self.testTransposeConv(padding, stride)
+
+    def TransposeConvPost(self, name, input, filter, output_shape, bias, padding, stride,
+                          output):
+        self.testTransposeConv(padding, stride)
+
+    def testInstanceNorm(self, epsilon, fused_act):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.INSTANCE_NORM)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.InstanceNormOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = InstanceNormOptions.InstanceNormOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        assertTrue(opt.Epsilon() == epsilon, "epsilon mismatches")
+        checkActivation(fused_act, opt.FusedActivationFunction())
+
+    def InstanceNormPre(self, name, input, gamma, beta, epsilon, fused_act):
+        self.testInstanceNorm(epsilon, fused_act)
+
+    def InstanceNormPost(self, name, input, gamma, beta, epsilon, output, fused_act):
+        self.testInstanceNorm(epsilon, fused_act)
+
+    def testSplit(self, num_split):
+        # Check opcode
+        opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+        checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.SPLIT)
+
+        # Check option
+        checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+                               BuiltinOptions.BuiltinOptions.SplitOptions)
+
+        self._opt = self._op.BuiltinOptions()
+        opt = SplitOptions.SplitOptions()
+        opt.Init(self._opt.Bytes, self._opt.Pos)
+        assertTrue(opt.NumSplits() == num_split, "num_split mismatches")
+
+    def SplitPre(self, name, split_dim, input, num_split):
+        self.testSplit(num_split)
+
+    def SplitPost(self, name, split_dim, input, num_split, outputs):
+        self.testSplit(num_split)
+        assertTrue(num_split == len(outputs), "num_split mismatches with outputs")
diff --git a/compiler/dalgona-test/TestSingleOp.sh b/compiler/dalgona-test/TestSingleOp.sh
new file mode 100755 (executable)
index 0000000..fad26fb
--- /dev/null
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# This script tests the basic behavior of dalgona
+#
+# HOW TO USE
+#
+# ./test_single_op.sh <path/to/test.config> <path/to/work_dir> <path/to/venv> <TEST 1> <TEST 2> ...
+# test.config : set ${DALGONA_PATH}
+# work_dir : archive of common-artifacts (ex: build/compiler/common-artifacts)
+# venv : virtual environment for python execution
+
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "$CONFIG_PATH")
+GEN_SCRIPT_PATH="${BIN_PATH}/GenH5RandomInputs.py"
+TEST_SCRIPT_PATH="${BIN_PATH}/SingleOperatorTest.py"
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found DALGONA: ${DALGONA_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+
+  PASSED_TAG="${BIN_PATH}/${TESTCASE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${BIN_PATH}/${TESTCASE}.log" <(
+    exec 2>&1
+    set -ex
+
+    # Generate random h5 input data
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+    --model "${TESTCASE_FILE}.circle" \
+    --num_data 3 \
+    --output "${BIN_PATH}/${TESTCASE}.circle.input.h5"
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE INPUT"
+      continue
+    fi
+
+    # Run dalgona with test script(SingleOperatorTest.py)
+    "${DALGONA_PATH}" \
+      --input_model "${TESTCASE_FILE}.circle" \
+      --input_data "${BIN_PATH}/${TESTCASE}.circle.input.h5" \
+      --analysis "${TEST_SCRIPT_PATH}" \
+      --analysis_args "${TESTCASE_FILE}.circle"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/dalgona-test/TestUtil.py b/compiler/dalgona-test/TestUtil.py
new file mode 100644 (file)
index 0000000..d646528
--- /dev/null
@@ -0,0 +1,58 @@
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from circle import ActivationFunctionType
+from circle import BuiltinOptions
+from circle import Padding
+
+
+def assertTrue(cond, msg):
+    assert cond, msg
+
+
+def checkPadding(pad, exp_pad):
+    if pad == 'SAME':
+        assertTrue(exp_pad == Padding.Padding.SAME, "Padding mismatches")
+    elif pad == 'VALID':
+        assertTrue(exp_pad == Padding.Padding.VALID, "Padding mismatches")
+    else:
+        raise SystemExit('Unsupported padding')
+
+
+def checkActivation(act, exp_act):
+    act_functions = {
+        'relu': ActivationFunctionType.ActivationFunctionType.RELU,
+        'relu6': ActivationFunctionType.ActivationFunctionType.RELU6,
+        'relu_n1_to_1': ActivationFunctionType.ActivationFunctionType.RELU_N1_TO_1,
+        'tanh': ActivationFunctionType.ActivationFunctionType.TANH,
+        'none': ActivationFunctionType.ActivationFunctionType.NONE,
+        'sign_bit': ActivationFunctionType.ActivationFunctionType.SIGN_BIT,
+    }
+
+    try:
+        assertTrue(act_functions[act] == exp_act, "Activation function mismatches")
+    except KeyError:
+        raise SystemExit('Unsupported activation functions')
+
+
+def checkOpcode(opcode, exp_opcode):
+    assertTrue(opcode == exp_opcode,
+               "Opcode mismatches (" + str(opcode) + ", " + str(exp_opcode) + ")")
+
+
+def checkBuiltinOptionType(option, exp_option):
+    assertTrue(
+        option == exp_option,
+        "Built-in option type mismatches (" + str(option) + ", " + str(exp_option) + ")")
diff --git a/compiler/dalgona-test/requires.cmake b/compiler/dalgona-test/requires.cmake
new file mode 100644 (file)
index 0000000..ab75e09
--- /dev/null
@@ -0,0 +1,3 @@
+require("dalgona")
+require("common-artifacts")
+require("pics")
diff --git a/compiler/dalgona-test/test.lst b/compiler/dalgona-test/test.lst
new file mode 100644 (file)
index 0000000..36c4070
--- /dev/null
@@ -0,0 +1,6 @@
+singleOpTest(Conv2D_000)
+singleOpTest(Conv2D_001)
+singleOpTest(Conv2D_002)
+singleOpTest(Conv2D_003)
+singleOpTest(Split_000)
+singleOpTest(InstanceNorm_000)
diff --git a/compiler/dalgona/CMakeLists.txt b/compiler/dalgona/CMakeLists.txt
new file mode 100644 (file)
index 0000000..bd06424
--- /dev/null
@@ -0,0 +1,63 @@
+# NOTE find_package will try to use at least python3.8 as follows depending on platform version
+#   Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+#   Ubuntu20.04; default python3.8
+#   Ubuntu22.04; default python3.10
+#   refer https://github.com/Samsung/ONE/issues/9962
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
+
+if(NOT ${PYTHONINTERP_FOUND})
+  message(STATUS "Build dalgona: FAILED (Python3 is missing)")
+  return()
+endif()
+
+if(${PYTHON_VERSION_MINOR} LESS 8)
+  message(STATUS "Build dalgona: FAILED (Install Python version higher than or equal to 3.8)")
+  return()
+endif()
+
+nnas_find_package(Pybind11)
+if(NOT Pybind11_FOUND)
+  message(STATUS "Build dalgona: FAILED (Pybind11 is missing)")
+  return()
+endif(NOT Pybind11_FOUND)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_compile_options(-fvisibility=hidden)
+
+add_executable(dalgona ${DRIVER} ${SOURCES})
+target_include_directories(dalgona PRIVATE include)
+target_include_directories(dalgona PRIVATE ${PYTHON_INCLUDE_DIRS})
+target_include_directories(dalgona PRIVATE ${Pybind11_INCLUDE_DIRS})
+
+target_link_libraries(dalgona INTERFACE pybind11::embed)
+target_link_libraries(dalgona PRIVATE ${PYTHON_LIBRARIES})
+target_link_libraries(dalgona PRIVATE arser)
+target_link_libraries(dalgona PRIVATE safemain)
+target_link_libraries(dalgona PRIVATE foder)
+target_link_libraries(dalgona PRIVATE luci_import)
+target_link_libraries(dalgona PRIVATE luci_interpreter)
+target_link_libraries(dalgona PRIVATE dio_hdf5)
+target_link_libraries(dalgona PRIVATE nncc_common)
+
+install(TARGETS dalgona DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# dalgona is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources used for tests.
+set(TEST_SOURCES
+    "src/StringUtils.cpp"
+    "src/RandomUtils.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(dalgona_unit_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(dalgona_unit_test PRIVATE src)
+target_link_libraries(dalgona_unit_test luci_lang)
diff --git a/compiler/dalgona/README.md b/compiler/dalgona/README.md
new file mode 100644 (file)
index 0000000..0fd0f0b
--- /dev/null
@@ -0,0 +1,104 @@
+# dalgona
+
+## What is dalgona?
+
+_dalgona_ is a tool for dynamic analysis of deep neural network.
+
+## How it works?
+
+_dalgona_ runs a user's custom analysis code (written in "Python") while performing inference. The analysis code has the form of hooks, called before/after each operator is executed. Intermediate execution results (values of activations) are passed to the hooks, so users can analyze the distribution of activations inside the hooks. The analysis result can be exported as files, log messages or any other forms, used for various purposes (model compression, optimization, etc.).
+
+NOTE Inference is performed by `luci-interpreter`.
+
+## Possible applications
+- Finding quantization parameters based on the distribution of activations
+- Finding sparse activations by observing the portion of zero values
+- Finding the distribution of conditional variables in If-statement and While-statement
+- Visualization of activation data with Python libraries
+
+## Prerequisite
+- Python 3.8 (python3.8, python3.8-dev packages)
+- Circle model (target to analyze)
+- Input data of the model (hdf5 format. See _rawdata2hdf5_ or _gen_h5_explicit_inputs.py_ for more details.)
+- Analysis code (Python code)
+
+## Example
+```
+dalgona \
+ --input_model model.circle
+ --input_data data.h5
+ --analysis analysis/AnalysisTemplate.py
+```
+
+## Arguments
+```
+  --help            Show help message and exit
+  --input_model     Input model filepath (.circle)
+  --input_data      Input data filepath (.h5) (if not given, random data will be used)
+  --analysis        Analysis code filepath (.py)
+  --analysis_args   (optional) String argument passed to the analysis code
+```
+
+## How to write analysis code?
+
+_dalgona_ provides hooks which are called before/after an operator is executed.
+Users can access tensors relevant to the corresponding operator inside the hooks.
+The information of each operator is passed as the arguments of the hook.
+For example, for a Conv2D operator, _dalgona_ provides following hooks.
+
+```
+  def Conv2DPre(self, name, input, filter, bias, padding, stride, dilation, fused_act)
+  def Conv2DPost(self, name, input, filter, bias, padding, stride, dilation, output, fused_act)
+```
+
+`Conv2DPre`/`Conv2DPost` are called before/after Conv2D is executed, respectively. Users can write codes to analyze the distribution of intermediate tensors using the provided arguments.
+
+(Note that Conv2DPost has one more argument "output", which is the execution result of the operator)
+
+Details about the arguments of each hook can be found in the section "Arguments of Hooks".
+
+We proivde a template for the analysis code in `analysis/AnalysisTemplate.py`. Users can copy the template file and modify it to write their custom analysis codes.
+
+| List of hooks | Explanation |
+| --------------|------------ |
+| StartAnalysis(self) | Called when the analysis starts |
+| EndAnalysis(self) | Called when the analysis ends |
+| StartNetworkExecution(self, inputs) | Called when the execution of a network starts |
+| EndNetworkExecution(self, outputs) | Called when the execution of a network ends |
+| DefaultOpPre(self, name, opcode, inputs) | Default hook called before an operator is executed |
+| DefaultOpPost(self, name, opcode, inputs, output) | Default hook called after an operator is executed |
+| \<OPCODE\>Pre/Post | Hooks called before/after the corresponding operator is executed. |
+
+## Arguments of Hooks
+
+Arguments are implemented with built-in Python types.
+
+Tensor
+- Type: dict
+- {name:str, data: np.ndarray, quantparam: QuantParam, is_const: bool}
+
+QuantParam
+- Type: dict
+- {scale: list, zero_point: list, quantized_dimension: int}
+
+Padding
+- Type: string
+- Values: 'SAME', 'VALID'
+
+Stride
+- Type: dict
+- {w: int, h: int}
+
+Dilation
+- Type: dict
+- {w: int, h: int}
+
+FusedActivationFunction
+- Type: string
+- Values: 'none', 'relu', 'relu_n1_to_1', 'relu6'
+
+## What's different from Hook APIs in Tensorflow or Pytorch?
+
+Basically, dalgona works in the same way as Hooks in TF or Pytorch. It calls user-defined functions before/after each operator is executed.
+
+A major difference is that dalgona runs with a model desinged for inference (i.e., circle, which can be directly converted from tflite).
diff --git a/compiler/dalgona/analysis/AnalysisTemplate.py b/compiler/dalgona/analysis/AnalysisTemplate.py
new file mode 100644 (file)
index 0000000..b1ffac4
--- /dev/null
@@ -0,0 +1,125 @@
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+A template for anlaysis code.
+This template shows how to access the information of each operator inside hooks.
+Users can write their own hooks by modifying this file.
+
+NOTE See "Arguments of Hooks" section in README to understand argument types (Tensor, Stride, ..)
+NOTE See "tests/SingleOperatorTest.py" for more operators.
+"""
+
+
+class AnalysisTemplate(object):
+    def StartAnalysis(self, args: str):
+        """
+        Called when the analysis starts
+        args: string given by --analysis_args option
+        """
+        print("Analysis started.")
+        print("args", args)
+
+    def EndAnalysis(self):
+        """
+        Called when the analysis ends
+        """
+        print("Analysis ended.")
+
+    def StartNetworkExecution(self, inputs: list):
+        """
+        Called when the execution of a network starts
+        inputs: list of Tensor
+        """
+        print("Network execution started.")
+
+    def EndNetworkExecution(self, outputs: list):
+        """
+        Called when the execution of a network ends
+        outputs: list of Tensor
+        """
+        print("Network execution ended.")
+
+    def DefaultOpPre(self, name: str, opcode: str, inputs: list):
+        """
+        Default hook called before an operator is executed
+        name: output tensor name (string)
+        opcode: opcode name (string)
+        inputs: list of Tensor
+        """
+        print("name", name)
+        print("opcode", opcode)
+        print("inputs", inputs)
+
+    def DefaultOpPost(self, name: str, opcode: str, inputs: list, output: dict):
+        """
+        Default hook called after an operator is executed
+        name: output tensor name (string)
+        opcode: opcode name (string)
+        inputs: list of Tensor
+        output: Tensor
+        """
+        print("name", name)
+        print("opcode", opcode)
+        print("inputs", inputs)
+        print("output", output)
+
+    def Conv2DPre(self, name: str, input: dict, filter: dict, bias: dict, padding: str,
+                  stride: dict, dilation: dict, fused_act: str):
+        """
+        Called before Conv2D layer execution
+        name: output tensor name (string)
+        opcode: opcode name (string)
+        input: Tensor
+        filter: Tensor
+        bias: Tensor
+        padding: Padding (string)
+        stride: Stride
+        dilation: Dilation
+        fused_act: Fused activation functions (string)
+        """
+        print("name", name)
+        print("input", input)
+        print("filter", filter)
+        print("bias", bias)
+        print("padding", padding)
+        print("stride", stride)
+        print("dilation", dilation)
+        print("fused activation", fused_act)
+
+    def Conv2DPost(self, name: str, input: dict, filter: dict, bias: dict, padding: str,
+                   stride: dict, dilation: dict, output: dict, fused_act: str):
+        """
+        Called after Conv2D layer execution
+        name: output tensor name (string)
+        opcode: opcode name (string)
+        input: Tensor
+        filter: Tensor
+        bias: Tensor
+        padding: Padding (string)
+        stride: Stride
+        dilation: Dilation
+        output: Tensor
+        fused_act: Fused activation functions (string)
+        """
+        print("name", name)
+        print("input", input)
+        print("filter", filter)
+        print("bias", bias)
+        print("padding", padding)
+        print("stride", stride)
+        print("dilation", dilation)
+        print("output shape", output['data'].shape)
+        print("output type", output['data'].dtype)
+        print("fused activation", fused_act)
diff --git a/compiler/dalgona/driver/Driver.cpp b/compiler/dalgona/driver/Driver.cpp
new file mode 100644 (file)
index 0000000..8bba0b7
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalgona.h"
+
+#include <arser/arser.h>
+#include <pybind11/embed.h>
+
+namespace py = pybind11;
+
+using namespace dalgona;
+
+int entry(const int argc, char **argv)
+{
+  arser::Arser arser("Dalgona: Dynamic analysis tool for DNN");
+
+  arser.add_argument("--input_model")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(true)
+    .help("Input model filepath (.circle)");
+
+  arser.add_argument("--input_data")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Input data filepath (.h5) (if not given, random data will be used)");
+
+  arser.add_argument("--analysis")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(true)
+    .help("Analysis code filepath (.py)");
+
+  arser.add_argument("--analysis_args")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("String argument passed to the analysis code");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
+    return EXIT_FAILURE;
+  }
+
+  auto input_model_path = arser.get<std::string>("--input_model");
+  auto analysis_path = arser.get<std::string>("--analysis");
+  std::string analysis_args = "";
+  if (arser["--analysis_args"])
+    analysis_args = arser.get<std::string>("--analysis_args");
+
+  // Initialize python interpreter
+  py::scoped_interpreter guard{};
+
+  Dalgona dalgona;
+
+  // Initialize interpreter and operator hooks
+  dalgona.initialize(input_model_path);
+
+  // Run analysis
+  if (arser["--input_data"])
+  {
+    const auto input_data_path = arser.get<std::string>("--input_data");
+    dalgona.runAnalysisWithH5Input(input_data_path, analysis_path, analysis_args);
+  }
+  else
+  {
+    std::cout << "--input_data was not specified. Run with a random input." << std::endl;
+    dalgona.runAnalysisWithRandomInput(analysis_path, analysis_args);
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/dalgona/include/Dalgona.h b/compiler/dalgona/include/Dalgona.h
new file mode 100644 (file)
index 0000000..353e268
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_H__
+#define __DALGONA_H__
+
+#include <luci/IR/Module.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include "PythonHooks.h"
+
+#include <memory>
+
+namespace dalgona
+{
+
+class Dalgona
+{
+public:
+  explicit Dalgona() = default;
+
+  ~Dalgona() = default;
+
+  void initialize(const std::string &input_model_path);
+
+  // Run analysis with hdf5 input
+  void runAnalysisWithH5Input(const std::string &input_data_path, const std::string &analysis_path,
+                              const std::string &analysis_args);
+
+  // Run analysis with random input
+  void runAnalysisWithRandomInput(const std::string &analysis_path,
+                                  const std::string &analysis_args);
+
+private:
+  std::unique_ptr<luci::Module> _module{nullptr};
+  std::unique_ptr<luci_interpreter::Interpreter> _interpreter{nullptr};
+  std::unique_ptr<PythonHooks> _hooks{nullptr};
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_H__
diff --git a/compiler/dalgona/include/PythonHooks.h b/compiler/dalgona/include/PythonHooks.h
new file mode 100644 (file)
index 0000000..8c100aa
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_PYTHON_HOOKS_H__
+#define __DALGONA_PYTHON_HOOKS_H__
+
+#include <loco/IR/Graph.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <pybind11/embed.h>
+
+#include <string>
+
+namespace py = pybind11;
+
+namespace dalgona
+{
+
+class PythonHooks : public luci_interpreter::ExecutionObserver
+{
+public:
+  PythonHooks(luci_interpreter::Interpreter *interpreter) : _interpreter(interpreter)
+  {
+    // Do nothing
+  }
+
+  // Called when the analysis starts
+  void importAnalysis(const std::string &analysis_path, py::object &globals,
+                      const std::string &analysis_args);
+
+  // Called after the analysis is done
+  void endAnalysis();
+
+  // Called before a network is started to be executed
+  void startNetworkExecution(loco::Graph *graph);
+
+  // Called after a network is executed
+  void endNetworkExecution(loco::Graph *graph);
+
+  // Called before an operator is executed
+  void preOperatorExecute(const luci::CircleNode *node) override;
+
+  // Called after an operator is executed
+  void postOperatorExecute(const luci::CircleNode *node) override;
+
+private:
+  luci_interpreter::Interpreter *_interpreter = nullptr;
+  py::object _analysis;
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_PYTHON_HOOKS_H__
diff --git a/compiler/dalgona/requires.cmake b/compiler/dalgona/requires.cmake
new file mode 100644 (file)
index 0000000..185476b
--- /dev/null
@@ -0,0 +1,6 @@
+require("safemain")
+require("arser")
+require("foder")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
diff --git a/compiler/dalgona/src/Dalgona.cpp b/compiler/dalgona/src/Dalgona.cpp
new file mode 100644 (file)
index 0000000..1d060b4
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalgona.h"
+#include "PythonHooks.h"
+#include "RandomUtils.h"
+
+#include <luci/Importer.h>
+#include <foder/FileLoader.h>
+#include <dio_hdf5/HDF5Importer.h>
+
+#include <pybind11/embed.h>
+
+#include <iostream>
+#include <limits>
+
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
+
+namespace py = pybind11;
+
+namespace
+{
+
+uint32_t numElements(const luci::CircleNode *node)
+{
+  assert(node != nullptr); // FIX_CALLER_UNLESS
+
+  uint32_t num_elements = 1;
+  for (uint32_t i = 0; i < node->rank(); i++)
+    num_elements *= node->dim(i).value();
+
+  return num_elements;
+}
+
+// Return tensor's size in bytes
+template <typename NodeT> size_t getByteSize(const NodeT *node)
+{
+  assert(node != nullptr); // FIX_CALLER_UNLESS
+
+  uint32_t dtype_size = loco::size(node->dtype());
+  return static_cast<size_t>(dtype_size) * static_cast<size_t>(numElements(node));
+}
+
+// Throw exception if input has one of the following conditions.
+// 1. Have unknown dimension
+// 2. Number of elements is 0
+void checkInputDimension(const luci::CircleInput *input)
+{
+  assert(input != nullptr); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < input->rank(); i++)
+    if (!input->dim(i).known())
+      throw std::runtime_error(input->name() + " has unknown dimension");
+
+  if (numElements(input) == 0)
+    throw std::runtime_error(input->name() + " is a zero-sized input");
+}
+
+// Check the type and the shape of CircleInput
+// Throw an exception if type or shape does not match
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+  assert(input_node != nullptr); // FIX_CALLER_UNLESS
+
+  // Type check
+  if (dtype != input_node->dtype())
+    throw std::runtime_error("Wrong input type.");
+
+  if (shape.size() != input_node->rank())
+    throw std::runtime_error("Input rank mismatch.");
+
+  for (uint32_t i = 0; i < shape.size(); i++)
+  {
+    if (not(shape.at(i) == input_node->dim(i)))
+      throw std::runtime_error("Input shape mismatch.");
+  }
+}
+
+} // namespace
+
+namespace dalgona
+{
+
+void Dalgona::initialize(const std::string &input_model_path)
+{
+  // Load model from the file
+  foder::FileLoader loader{input_model_path};
+  std::vector<char> model_data = loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+                                 model_data.size()};
+  if (not circle::VerifyModelBuffer(verifier))
+    throw std::runtime_error("Failed to verify circle '" + input_model_path + "'");
+
+  auto circle_model = circle::GetModel(model_data.data());
+
+  if (not circle_model)
+    throw std::runtime_error("Failed to load '" + input_model_path + "'");
+
+  _module = luci::Importer().importModule(circle_model);
+
+  if (not _module)
+    throw std::runtime_error("ERROR: Failed to load '" + input_model_path + "'");
+
+  // Initialize interpreter
+  _interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+
+  _hooks = std::make_unique<PythonHooks>(_interpreter.get());
+
+  _interpreter->attachObserver(_hooks.get());
+}
+
+void Dalgona::runAnalysisWithH5Input(const std::string &input_data_path,
+                                     const std::string &analysis_path,
+                                     const std::string &analysis_args)
+{
+  py::object scope = py::module::import("__main__").attr("__dict__");
+  _hooks->importAnalysis(analysis_path, scope, analysis_args);
+
+  try
+  {
+    dio::hdf5::HDF5Importer importer(input_data_path);
+    importer.importGroup("value");
+
+    bool is_raw_data = importer.isRawData();
+
+    const auto num_records = importer.numData();
+    if (num_records == 0)
+      throw std::runtime_error("The input data file does not contain any record.");
+
+    const auto input_nodes = loco::input_nodes(_module->graph());
+    const auto num_inputs = input_nodes.size();
+
+    for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+    {
+      if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
+        throw std::runtime_error("Wrong number of inputs.");
+
+      std::cout << "Running " << record_idx << "'th data" << std::endl;
+
+      for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+      {
+        const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+        assert(input_node->index() == input_idx);
+        checkInputDimension(input_node);
+        std::vector<char> input_data(getByteSize(input_node));
+
+        if (is_raw_data)
+        {
+          // Skip type/shape check for raw data
+          importer.readTensor(record_idx, input_idx, input_data.data());
+        }
+        else
+        {
+          DataType dtype;
+          Shape shape;
+          importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data());
+
+          // Check the type and the shape of the input data is valid
+          verifyTypeShape(input_node, dtype, shape);
+        }
+
+        _interpreter->writeInputTensor(input_node, input_data.data(), input_data.size());
+      }
+
+      _hooks->startNetworkExecution(_module->graph());
+      _interpreter->interpret();
+      _hooks->endNetworkExecution(_module->graph());
+    }
+
+    std::cout << "Finished executing " << num_records << "'th data" << std::endl;
+    _hooks->endAnalysis();
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
+  }
+}
+
+void Dalgona::runAnalysisWithRandomInput(const std::string &analysis_path,
+                                         const std::string &analysis_args)
+{
+  py::object scope = py::module::import("__main__").attr("__dict__");
+  _hooks->importAnalysis(analysis_path, scope, analysis_args);
+
+  const auto input_nodes = loco::input_nodes(_module->graph());
+  const auto num_inputs = input_nodes.size();
+
+  for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+  {
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+    assert(input_node->index() == input_idx);
+    checkInputDimension(input_node);
+
+    uint32_t num_elems = numElements(input_node);
+    switch (input_node->dtype())
+    {
+      case DataType::FLOAT32:
+      {
+        // Synced with record-minmax (-5,5)
+        auto input_data = genRandomFloatData(num_elems, -5, 5);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(float));
+        break;
+      }
+      case DataType::U8:
+      {
+        auto input_data = genRandomIntData<uint8_t>(num_elems, std::numeric_limits<uint8_t>::min(),
+                                                    std::numeric_limits<uint8_t>::max());
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(uint8_t));
+        break;
+      }
+      case DataType::S16:
+      {
+        auto input_data = genRandomIntData<int16_t>(num_elems, std::numeric_limits<int16_t>::min(),
+                                                    std::numeric_limits<int16_t>::max());
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(int16_t));
+        break;
+      }
+      case DataType::S32:
+      {
+        // Synced with record-minmax (0, 100)
+        auto input_data = genRandomIntData<int32_t>(num_elems, 0, 100);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(int32_t));
+        break;
+      }
+      case DataType::S64:
+      {
+        // Synced with record-minmax (0, 100)
+        auto input_data = genRandomIntData<int64_t>(num_elems, 0, 100);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(int64_t));
+        break;
+      }
+      case DataType::BOOL:
+      {
+        // Bool is represented as uint8 (0 or 1)
+        auto input_data = genRandomIntData<uint8_t>(num_elems, 0, 1);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(uint8_t));
+        break;
+      }
+      default:
+        throw std::runtime_error("Unsupported input data type in " + input_node->name());
+    }
+  }
+
+  _hooks->startNetworkExecution(_module->graph());
+  _interpreter->interpret();
+  _hooks->endNetworkExecution(_module->graph());
+
+  std::cout << "Finished executing a random input" << std::endl;
+  _hooks->endAnalysis();
+}
+
+} // namespace dalgona
diff --git a/compiler/dalgona/src/PostOperatorHook.h b/compiler/dalgona/src/PostOperatorHook.h
new file mode 100644 (file)
index 0000000..daf32f6
--- /dev/null
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_POST_OPERATOR_HOOK_H__
+#define __DALGONA_POST_OPERATOR_HOOK_H__
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <loco/IR/Node.h>
+#include <luci_interpreter/Interpreter.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <pybind11/embed.h>
+#include <vector>
+
+namespace py = pybind11;
+using namespace py::literals;
+
+namespace dalgona
+{
+
+// Invoke a user-written Python hook after an operator is executed
+class PostOperatorHook final : public luci::CircleNodeVisitor<void>
+{
+
+// This macro creates three variables used for post-operator hooks.
+// 1. hook: Python function to be invoked (type: py::object)
+// 2. inputs: input data (type: std::vector of numpy array)
+// 3. output: output data (type: numpy array)
+#define POST_OPERATOR_HOOK_PROLOGUE(OP_NAME)                \
+  if (!py::hasattr(_analysis, #OP_NAME "Post"))             \
+  {                                                         \
+    visit(loco::must_cast<const luci::CircleNode *>(node)); \
+    return;                                                 \
+  }                                                         \
+  py::object hook = _analysis.attr(#OP_NAME "Post");        \
+  auto inputs = inputsPyArray(node, _interpreter);          \
+  auto output = outputPyArray(node, _interpreter);
+
+// Multi-output version of POST_OPERATOR_HOOK_PROLOGUE
+#define POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS(OP_NAME)     \
+  if (!py::hasattr(_analysis, #OP_NAME "Post"))             \
+  {                                                         \
+    visit(loco::must_cast<const luci::CircleNode *>(node)); \
+    return;                                                 \
+  }                                                         \
+  py::object hook = _analysis.attr(#OP_NAME "Post");        \
+  auto inputs = inputsPyArray(node, _interpreter);          \
+  auto outputs = outputsPyArray(node, _interpreter);
+
+private:
+  py::object _analysis;
+  luci_interpreter::Interpreter *_interpreter{nullptr};
+
+public:
+  explicit PostOperatorHook(py::object analysis, luci_interpreter::Interpreter *interpreter)
+    : _analysis(analysis), _interpreter(interpreter)
+  {
+    // Do nothing
+  }
+
+  // default
+  void visit(const luci::CircleNode *node)
+  {
+    if (not py::hasattr(_analysis, "DefaultOpPost"))
+      return;
+
+    py::object hook = _analysis.attr("DefaultOpPost");
+    auto inputs = inputsPyArray(node, _interpreter);
+    auto output = outputPyArray(node, _interpreter);
+
+    py::list input_list;
+    for (uint32_t i = 0; i < inputs.size(); i++)
+    {
+      input_list.append(inputs[i]);
+    }
+
+    pySafeCall(hook,
+               node->name(),             // name
+               toString(node->opcode()), // opcode
+               input_list,               // list of inputs
+               output                    // output
+    );
+  }
+
+  void visit(const luci::CircleConv2D *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(Conv2D)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+    auto dilation = node->dilation();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+    auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[0],                                         // input
+               inputs[1],                                         // filter
+               inputs[2],                                         // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               py_dilation,                                       // dilation
+               output,                                            // output
+               toString(fused_act)                                // fused activation
+    );
+  }
+
+  void visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(DepthwiseConv2D)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+    auto dilation = node->dilation();
+    auto depthMultiplier = node->depthMultiplier();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+    auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[0],                                         // input
+               inputs[1],                                         // filter
+               inputs[2],                                         // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               depthMultiplier,                                   // depthMultiplier
+               py_dilation,                                       // dilation
+               output,                                            // output
+               toString(fused_act)                                // fused activation
+    );
+  }
+
+  void visit(const luci::CircleAdd *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(Add)
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // x
+               inputs[1],          // y
+               output,             // output
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleFullyConnected *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(FullyConnected)
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // input
+               inputs[1],          // weights
+               inputs[2],          // bias
+               output,             // output
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleTransposeConv *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(TransposeConv)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[2],                                         // input
+               inputs[1],                                         // filter
+               inputs[0],                                         // output shape
+               inputs.size() == 4 ? inputs[3] : none(),           // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               output                                             // output
+    );
+  }
+
+  void visit(const luci::CircleInstanceNorm *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE(InstanceNorm)
+
+    auto epsilon = node->epsilon();
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // input
+               inputs[1],          // gamma
+               inputs[2],          // beta
+               epsilon,            // epsilon
+               output,             // output
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleSplit *node)
+  {
+    POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS(Split)
+
+    py::list output_list;
+    for (uint32_t i = 0; i < outputs.size(); i++)
+    {
+      output_list.append(outputs[i]);
+    }
+
+    auto num_split = node->num_split();
+
+    pySafeCall(hook,
+               node->name(), // name
+               inputs[0],    // split_dim
+               inputs[1],    // input
+               num_split,    // num_split
+               output_list   // list of outputs
+    );
+  }
+
+#undef POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_POST_OPERATOR_HOOK_H__
diff --git a/compiler/dalgona/src/PreOperatorHook.h b/compiler/dalgona/src/PreOperatorHook.h
new file mode 100644 (file)
index 0000000..eb6a95e
--- /dev/null
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_PRE_OPERATOR_HOOK_H__
+#define __DALGONA_PRE_OPERATOR_HOOK_H__
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <loco/IR/Node.h>
+#include <luci_interpreter/Interpreter.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <pybind11/embed.h>
+#include <vector>
+
+namespace py = pybind11;
+using namespace py::literals;
+
+namespace dalgona
+{
+
+// Invoke a user-written Python hook before an operator is executed
+class PreOperatorHook final : public luci::CircleNodeVisitor<void>
+{
+
+// This macro creates two variables used for pre-operator hooks.
+// 1. hook: Python function to be invoked (type: py::object)
+// 2. inputs: input data (type: std::vector of numpy array)
+#define PRE_OPERATOR_HOOK_PROLOGUE(OP_NAME)                 \
+  if (!py::hasattr(_analysis, #OP_NAME "Pre"))              \
+  {                                                         \
+    visit(loco::must_cast<const luci::CircleNode *>(node)); \
+    return;                                                 \
+  }                                                         \
+  py::object hook = _analysis.attr(#OP_NAME "Pre");         \
+  auto inputs = inputsPyArray(node, _interpreter);
+
+private:
+  py::object _analysis;
+  luci_interpreter::Interpreter *_interpreter{nullptr};
+
+public:
+  explicit PreOperatorHook(py::object analysis, luci_interpreter::Interpreter *interpreter)
+    : _analysis(analysis), _interpreter(interpreter)
+  {
+    // Do nothing
+  }
+
+  // default
+  void visit(const luci::CircleNode *node)
+  {
+    if (not py::hasattr(_analysis, "DefaultOpPre"))
+      return;
+
+    py::object hook = _analysis.attr("DefaultOpPre");
+    auto inputs = inputsPyArray(node, _interpreter);
+
+    py::list input_list;
+    for (uint32_t i = 0; i < inputs.size(); i++)
+    {
+      input_list.append(inputs[i]);
+    }
+
+    pySafeCall(hook,
+               node->name(),             // name
+               toString(node->opcode()), // opcode
+               input_list                // list of inputs
+    );
+  }
+
+  void visit(const luci::CircleConv2D *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(Conv2D)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+    auto dilation = node->dilation();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+    auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[0],                                         // input
+               inputs[1],                                         // filter
+               inputs[2],                                         // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               py_dilation,                                       // dilation
+               toString(fused_act)                                // fused activation
+    );
+  }
+
+  void visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(DepthwiseConv2D)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+    auto dilation = node->dilation();
+    auto depthMultiplier = node->depthMultiplier();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+    auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[0],                                         // input
+               inputs[1],                                         // filter
+               inputs[2],                                         // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride,                                         // stride
+               depthMultiplier,                                   // depthMultiplier
+               py_dilation,                                       // dilation
+               toString(fused_act)                                // fused activation
+    );
+  }
+
+  void visit(const luci::CircleAdd *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(Add)
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // x
+               inputs[1],          // y
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleFullyConnected *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(FullyConnected)
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // input
+               inputs[1],          // weights
+               inputs[2],          // bias
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleTransposeConv *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(TransposeConv)
+
+    auto padding = node->padding();
+    auto stride = node->stride();
+
+    auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+
+    pySafeCall(hook,
+               node->name(),                                      // name
+               inputs[2],                                         // input
+               inputs[1],                                         // filter
+               inputs[0],                                         // output shape
+               inputs.size() == 4 ? inputs[3] : none(),           // bias
+               padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+               py_stride                                          // stride
+    );
+  }
+
+  void visit(const luci::CircleInstanceNorm *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(InstanceNorm)
+
+    auto epsilon = node->epsilon();
+
+    auto fused_act = node->fusedActivationFunction();
+
+    pySafeCall(hook,
+               node->name(),       // name
+               inputs[0],          // input
+               inputs[1],          // gamma
+               inputs[2],          // beta
+               epsilon,            // epsilon
+               toString(fused_act) // fused activation
+    );
+  }
+
+  void visit(const luci::CircleSplit *node)
+  {
+    PRE_OPERATOR_HOOK_PROLOGUE(Split)
+
+    auto num_split = node->num_split();
+
+    pySafeCall(hook,
+               node->name(), // name
+               inputs[0],    // split_dim
+               inputs[1],    // input
+               num_split     // num_split
+    );
+  }
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_PRE_OPERATOR_HOOK_H__
diff --git a/compiler/dalgona/src/PythonHooks.cpp b/compiler/dalgona/src/PythonHooks.cpp
new file mode 100644 (file)
index 0000000..8447699
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PythonHooks.h"
+#include "PostOperatorHook.h"
+#include "PreOperatorHook.h"
+#include "Utils.h"
+
+#include <loco/IR/Graph.h>
+
+namespace dalgona
+{
+
+void PythonHooks::preOperatorExecute(const luci::CircleNode *node)
+{
+  PreOperatorHook hook(_analysis, _interpreter);
+  node->accept(&hook);
+}
+
+void PythonHooks::postOperatorExecute(const luci::CircleNode *node)
+{
+  PostOperatorHook hook(_analysis, _interpreter);
+  node->accept(&hook);
+}
+
+void PythonHooks::importAnalysis(const std::string &analysis_path, py::object &globals,
+                                 const std::string &analysis_args)
+{
+  const auto base_filename = analysis_path.substr(analysis_path.find_last_of("/\\") + 1);
+  // module name must be the same with the python code
+  // ex: base_filename = MyAnalysis.py -> module_name = MyAnalysis
+  const auto module_name = base_filename.substr(0, base_filename.find_last_of('.'));
+
+  py::dict locals;
+  locals["path"] = py::cast(analysis_path);
+
+  py::eval<py::eval_statements>("import sys\n"
+                                "import os\n"
+                                "sys.path.append(os.path.dirname(path))\n"
+                                "import " +
+                                  module_name +
+                                  "\n"
+                                  "analysis = " +
+                                  module_name + "." + module_name + "()",
+                                globals, locals);
+
+  _analysis = locals["analysis"];
+
+  if (py::hasattr(_analysis, "StartAnalysis"))
+    pySafeCall(_analysis.attr("StartAnalysis"), analysis_args);
+}
+
+void PythonHooks::startNetworkExecution(loco::Graph *graph)
+{
+  if (!py::hasattr(_analysis, "StartNetworkExecution"))
+    return;
+
+  assert(graph != nullptr); // FIX_CALLER_UNLESS
+
+  const auto input_nodes = loco::input_nodes(graph);
+  py::list inputs;
+  // Assumption: input_nodes is iterated in the same order of model inputs
+  for (const auto input_node : input_nodes)
+  {
+    auto circle_node = loco::must_cast<luci::CircleInput *>(input_node);
+    inputs.append(outputPyArray(circle_node, _interpreter));
+  }
+  pySafeCall(_analysis.attr("StartNetworkExecution"), inputs);
+}
+
+void PythonHooks::endNetworkExecution(loco::Graph *graph)
+{
+  if (!py::hasattr(_analysis, "EndNetworkExecution"))
+    return;
+
+  assert(graph != nullptr); // FIX_CALLER_UNLESS
+
+  const auto output_nodes = loco::output_nodes(graph);
+  py::list outputs;
+  // Assumption: output_nodes is iterated in the same order of model outputs
+  for (const auto output_node : output_nodes)
+  {
+    auto circle_node = loco::must_cast<luci::CircleOutput *>(output_node);
+    outputs.append(
+      outputPyArray(loco::must_cast<luci::CircleNode *>(circle_node->from()), _interpreter));
+  }
+  pySafeCall(_analysis.attr("EndNetworkExecution"), outputs);
+}
+
+void PythonHooks::endAnalysis()
+{
+  if (py::hasattr(_analysis, "EndAnalysis"))
+    pySafeCall(_analysis.attr("EndAnalysis"));
+}
+
+} // namespace dalgona
diff --git a/compiler/dalgona/src/RandomUtils.cpp b/compiler/dalgona/src/RandomUtils.cpp
new file mode 100644 (file)
index 0000000..a8e32b3
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RandomUtils.h"
+
+#include <random>
+#include <vector>
+#include <cassert>
+
+namespace dalgona
+{
+
+std::vector<float> genRandomFloatData(uint32_t num_elements, float min, float max)
+{
+  if (min > max)
+    throw std::invalid_argument("min is greater than max");
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<> dist(min, max);
+  std::vector<float> buffer(num_elements);
+
+  // Write random data
+  for (auto &iter : buffer)
+    iter = static_cast<float>(dist(gen));
+
+  return buffer;
+}
+
+} // namespace dalgona
diff --git a/compiler/dalgona/src/RandomUtils.h b/compiler/dalgona/src/RandomUtils.h
new file mode 100644 (file)
index 0000000..6f6f48f
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_RANDOM_UTILS_H__
+#define __DALGONA_RANDOM_UTILS_H__
+
+#include <cstdint>
+#include <vector>
+#include <random>
+#include <stdexcept>
+
+namespace dalgona
+{
+
+template <typename T> std::vector<T> genRandomIntData(uint32_t num_elements, T min, T max)
+{
+  if (min > max)
+    throw std::invalid_argument("min is greater than max");
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<T> dist(min, max);
+  std::vector<T> buffer(num_elements);
+
+  // Write random data
+  for (auto &iter : buffer)
+    iter = dist(gen);
+
+  return buffer;
+}
+
+std::vector<float> genRandomFloatData(uint32_t num_elements, float min, float max);
+
+} // namespace dalgona
+
+#endif // __DALGONA_RANDOM_UTILS_H__
diff --git a/compiler/dalgona/src/RandomUtils.test.cpp b/compiler/dalgona/src/RandomUtils.test.cpp
new file mode 100644 (file)
index 0000000..b04d8b8
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RandomUtils.h"
+
+#include <gtest/gtest.h>
+
+using namespace dalgona;
+
+TEST(DalgonaUtilTest, gen_random_int32)
+{
+  const uint32_t num_elements = 10;
+  const int32_t min = -5;
+  const int32_t max = 5;
+  std::vector<int32_t> buffer = genRandomIntData<int32_t>(num_elements, min, max);
+
+  EXPECT_EQ(num_elements, buffer.size());
+  for (auto val : buffer)
+  {
+    EXPECT_TRUE(val >= min and val <= max);
+  }
+}
+
+TEST(DalgonaUtilTest, gen_random_int32_NEG)
+{
+  const uint32_t num_elements = 10;
+  const int32_t min = 5;
+  const int32_t max = -5;
+  EXPECT_ANY_THROW(genRandomIntData<int32_t>(num_elements, min, max));
+}
+
+TEST(DalgonaUtilTest, gen_random_float)
+{
+  const uint32_t num_elements = 10;
+  const float min = -5;
+  const float max = 5;
+  std::vector<float> buffer = genRandomFloatData(num_elements, min, max);
+
+  EXPECT_EQ(num_elements, buffer.size());
+  for (auto val : buffer)
+  {
+    EXPECT_TRUE(val >= min and val <= max);
+  }
+}
+
+TEST(DalgonaUtilTest, gen_random_float_NEG)
+{
+  const uint32_t num_elements = 10;
+  const float min = 5;
+  const float max = -5;
+  EXPECT_ANY_THROW(genRandomFloatData(num_elements, min, max));
+}
diff --git a/compiler/dalgona/src/StringUtils.cpp b/compiler/dalgona/src/StringUtils.cpp
new file mode 100644 (file)
index 0000000..423b488
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StringUtils.h"
+
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <cassert>
+
+namespace dalgona
+{
+
+const std::string toString(luci::CircleOpcode opcode)
+{
+  static const char *names[] = {
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+#undef CIRCLE_VNODE
+  };
+
+  auto const node_name = names[static_cast<int>(opcode)];
+
+  assert(std::string(node_name).substr(0, 6) == "Circle"); // FIX_ME_UNLESS
+
+  // Return substring of class name ("Circle" is sliced out)
+  // Ex: Return "Conv2D" for "CircleConv2D" node
+  return std::string(node_name).substr(6);
+}
+
+const std::string toString(luci::FusedActFunc fused_act)
+{
+  switch (fused_act)
+  {
+    case (luci::FusedActFunc::UNDEFINED):
+      return std::string("undefined");
+    case (luci::FusedActFunc::NONE):
+      return std::string("none");
+    case (luci::FusedActFunc::RELU):
+      return std::string("relu");
+    case (luci::FusedActFunc::RELU_N1_TO_1):
+      return std::string("relu_n1_to_1");
+    case (luci::FusedActFunc::RELU6):
+      return std::string("relu6");
+    case (luci::FusedActFunc::TANH):
+      return std::string("tanh");
+    case (luci::FusedActFunc::SIGN_BIT):
+      return std::string("sign_bit");
+    default:
+      throw std::runtime_error("Unsupported activation function");
+  }
+}
+
+} // namespace dalgona
diff --git a/compiler/dalgona/src/StringUtils.h b/compiler/dalgona/src/StringUtils.h
new file mode 100644 (file)
index 0000000..ad9d061
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_STRING_UTILS_H__
+#define __DALGONA_STRING_UTILS_H__
+
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <string>
+
+namespace dalgona
+{
+
+const std::string toString(luci::CircleOpcode opcode);
+
+const std::string toString(luci::FusedActFunc fused_act);
+
+} // namespace dalgona
+
+#endif // __DALGONA_STRING_UTILS_H__
diff --git a/compiler/dalgona/src/StringUtils.test.cpp b/compiler/dalgona/src/StringUtils.test.cpp
new file mode 100644 (file)
index 0000000..e795a47
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StringUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <gtest/gtest.h>
+
+using namespace dalgona;
+
+TEST(DalgonaUtilTest, toString_basic)
+{
+  luci::CircleConv2D node;
+
+  EXPECT_EQ("Conv2D", toString(node.opcode()));
+}
+
+TEST(DalgonaUtilTest, toString_fused_act_func)
+{
+  EXPECT_EQ("undefined", toString(luci::FusedActFunc::UNDEFINED));
+  EXPECT_EQ("none", toString(luci::FusedActFunc::NONE));
+  EXPECT_EQ("relu", toString(luci::FusedActFunc::RELU));
+  EXPECT_EQ("relu6", toString(luci::FusedActFunc::RELU6));
+  EXPECT_EQ("relu_n1_to_1", toString(luci::FusedActFunc::RELU_N1_TO_1));
+  EXPECT_EQ("tanh", toString(luci::FusedActFunc::TANH));
+  EXPECT_EQ("sign_bit", toString(luci::FusedActFunc::SIGN_BIT));
+}
diff --git a/compiler/dalgona/src/Utils.cpp b/compiler/dalgona/src/Utils.cpp
new file mode 100644 (file)
index 0000000..f65d9c2
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <luci_interpreter/core/Tensor.h>
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <pybind11/numpy.h>
+#include <stdexcept>
+#include <vector>
+
+using Tensor = luci_interpreter::Tensor;
+
+namespace py = pybind11;
+using namespace py::literals;
+
+#define THROW_UNLESS(COND, MSG) \
+  if (not(COND))                \
+    throw std::runtime_error(MSG);
+
+namespace
+{
+
+py::array numpyArray(const Tensor *tensor)
+{
+  assert(tensor != nullptr); // FIX_CALLER_UNLESS
+
+  const auto tensor_shape = tensor->shape();
+
+  uint32_t size = 1;
+  std::vector<uint32_t> shape(tensor_shape.num_dims());
+  for (int i = 0; i < tensor_shape.num_dims(); i++)
+  {
+    THROW_UNLESS(tensor_shape.dim(i) >= 0, "Negative dimension detected in " + tensor->name());
+
+    shape[i] = tensor_shape.dim(i);
+    size *= shape[i];
+  }
+
+  if (size == 0)
+    return py::none();
+
+  switch (tensor->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      return py::array_t<float, py::array::c_style>(shape, tensor->data<float>());
+    case loco::DataType::S16:
+      return py::array_t<int16_t, py::array::c_style>(shape, tensor->data<int16_t>());
+    case loco::DataType::S32:
+      return py::array_t<int32_t, py::array::c_style>(shape, tensor->data<int32_t>());
+    case loco::DataType::S64:
+      return py::array_t<int64_t, py::array::c_style>(shape, tensor->data<int64_t>());
+    case loco::DataType::U8:
+      return py::array_t<uint8_t, py::array::c_style>(shape, tensor->data<uint8_t>());
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+py::dict quantparam(const Tensor *tensor)
+{
+  assert(tensor != nullptr); // FIX_CALLER_UNLESS
+
+  auto scale = tensor->scales();
+  auto zp = tensor->zero_points();
+
+  py::list py_scale;
+  for (auto s : scale)
+  {
+    py_scale.append(s);
+  }
+
+  py::list py_zp;
+  for (auto z : zp)
+  {
+    py_zp.append(z);
+  }
+
+  auto quantparam = py::dict("scale"_a = py_scale, "zero_point"_a = py_zp,
+                             "quantized_dimension"_a = tensor->quantized_dimension());
+  return quantparam;
+}
+
+} // namespace
+
+namespace dalgona
+{
+
+py::object none() { return py::none(); }
+
+std::vector<py::dict> inputsPyArray(const luci::CircleNode *node,
+                                    luci_interpreter::Interpreter *interpreter)
+{
+  assert(node != nullptr);        // FIX_CALLER_UNLESS
+  assert(interpreter != nullptr); // FIX_CALLER_UNLESS
+
+  std::vector<py::dict> inputs;
+  for (uint32_t i = 0; i < node->arity(); ++i)
+  {
+    const auto input_tensor = interpreter->getTensor(node->arg(i));
+    auto circle_node = static_cast<luci::CircleNode *>(node->arg(i));
+
+    // skip invalid inputs (e.g., non-existing bias in TCONV)
+    if (circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+      continue;
+
+    auto py_input =
+      py::dict("name"_a = circle_node->name(), "data"_a = numpyArray(input_tensor),
+               "quantparam"_a = quantparam(input_tensor),
+               "is_const"_a = circle_node->opcode() == luci::CircleOpcode::CIRCLECONST);
+    inputs.push_back(py_input);
+  }
+  return inputs;
+}
+
+std::vector<py::dict> outputsPyArray(const luci::CircleNode *node,
+                                     luci_interpreter::Interpreter *interpreter)
+{
+  std::vector<py::dict> outputs;
+  for (auto succ : loco::succs(node))
+  {
+    const auto output_tensor = interpreter->getTensor(succ);
+    auto circle_node = static_cast<luci::CircleNode *>(succ);
+
+    auto opcode_str = toString(circle_node->opcode());
+    // Check if node is a multi-output node
+    // Assumption: Multi-output virtual nodes have 'Out' prefix
+    // TODO Fix this if the assumption changes
+    THROW_UNLESS(opcode_str.substr(opcode_str.length() - 3) == "Out",
+                 "Invalid output detected in " + node->name());
+
+    auto py_output =
+      py::dict("name"_a = circle_node->name(), "data"_a = numpyArray(output_tensor),
+               "quantparam"_a = quantparam(output_tensor),
+               "is_const"_a = circle_node->opcode() == luci::CircleOpcode::CIRCLECONST);
+    outputs.push_back(py_output);
+  }
+  return outputs;
+}
+
+// Note: Only returns 1 output
+py::dict outputPyArray(const luci::CircleNode *node, luci_interpreter::Interpreter *interpreter)
+{
+  assert(node != nullptr);        // FIX_CALLER_UNLESS
+  assert(interpreter != nullptr); // FIX_CALLER_UNLESS
+
+  const auto tensor = interpreter->getTensor(node);
+
+  THROW_UNLESS(tensor != nullptr, "Null tensor detected in " + node->name());
+
+  auto py_output = py::dict("name"_a = node->name(), "data"_a = numpyArray(tensor),
+                            "quantparam"_a = quantparam(tensor),
+                            "is_const"_a = node->opcode() == luci::CircleOpcode::CIRCLECONST);
+  return py_output;
+}
+
+} // namespace dalgona
+
+#undef THROW_UNLESS
diff --git a/compiler/dalgona/src/Utils.h b/compiler/dalgona/src/Utils.h
new file mode 100644 (file)
index 0000000..9ca920f
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_UTILS_H__
+#define __DALGONA_UTILS_H__
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <pybind11/embed.h>
+
+namespace py = pybind11;
+
+namespace dalgona
+{
+
+template <typename... Args> void pySafeCall(py::object func, Args... args)
+{
+  try
+  {
+    func(args...);
+  }
+  catch (py::error_already_set &e)
+  {
+    throw std::runtime_error(e.what());
+  }
+}
+
+py::dict outputPyArray(const luci::CircleNode *node, luci_interpreter::Interpreter *interpreter);
+
+// Return a vector of Tensors(py::dict) which correspond to node's inputs
+std::vector<py::dict> inputsPyArray(const luci::CircleNode *node,
+                                    luci_interpreter::Interpreter *interpreter);
+
+// Return a vector of Tensors(py::dict) which correspond to the outputs of multi-out node (ex:
+// SPLIT)
+std::vector<py::dict> outputsPyArray(const luci::CircleNode *node,
+                                     luci_interpreter::Interpreter *interpreter);
+
+py::object none();
+
+} // namespace dalgona
+
+#endif // __DALGONA_UTILS_H__
index c25dc5f..a920e08 100755 (executable)
@@ -234,4 +234,22 @@ tensor_dtype()
   echo ${ACTUAL}
 }
 
+const_count()
+{
+  argc_check $# 1
+  file_path_check ${COMPILED_FILE}
+  file_path_check ${INSPECT_PROG_PATH}
+
+  set -o pipefail
+
+  RESULT=`init_error_log ; ${INSPECT_PROG_PATH} --constants ${COMPILED_FILE}`
+  check_success_exit_code $? 0
+
+  # note : grep's exit code is 2 in case of error.
+  ACTUAL=`init_error_log ; echo "${RESULT}" | grep -wc "$1"`
+  check_error_exit_code $? 2
+
+  echo ${ACTUAL}
+}
+
 # TODO define more qullity test function
index a820aba..176e6cc 100644 (file)
@@ -264,7 +264,6 @@ struct GraphOutputIndexQueryService : public DialectService
   virtual GraphOutputIndex index(const Node *node) const = 0;
 };
 
-// TODO Use "const Graph *"
 std::vector<Node *> output_nodes(Graph *);
 
 /**
index 837d293..99de8fc 100644 (file)
@@ -79,6 +79,18 @@ TEST(GraphTest, create_and_destroy_node)
   auto pull = g->nodes()->create<loco::Pull>();
 
   ASSERT_NO_THROW(g->nodes()->destroy(pull));
+}
+
+TEST(GraphTest, DISABLED_create_and_destroy_node_again)
+{
+  auto g = loco::make_graph();
+
+  auto pull = g->nodes()->create<loco::Pull>();
+
+  ASSERT_NO_THROW(g->nodes()->destroy(pull));
+  // NOTE calling destroy again raises Segmentation fault
+  //      refer https://github.com/Samsung/ONE/issues/9968
+  // TODO fix this
   ASSERT_THROW(g->nodes()->destroy(pull), std::invalid_argument);
 }
 
@@ -139,7 +151,6 @@ TEST(GraphTest, consturctor_with_param_node)
   ASSERT_FLOAT_EQ(test_node->f(), 11.11);
 
   ASSERT_NO_THROW(g->nodes()->destroy(test_node));
-  ASSERT_THROW(g->nodes()->destroy(test_node), std::invalid_argument);
 }
 
 TEST(GraphTest, getters_over_const_instance)
index 38e5a7a..682237f 100644 (file)
@@ -42,6 +42,10 @@ using namespace locomotiv;
 void execute_node(loco::TensorBroadcast *tensor_broadcast)
 {
   auto input_data = annot_data(tensor_broadcast->input());
+  if (input_data == nullptr)
+  {
+    throw std::runtime_error("Annotation is required for TensorBroadcast input");
+  }
 
   // Calculate output shape
   Shape input_shape = *(input_data->shape());
index 5d4ad5d..fb07128 100644 (file)
@@ -53,6 +53,7 @@ void execute_node(loco::TensorConstantPad *pad)
 
   auto constant_node = pad->constant();
   auto constant_data = annot_data(constant_node);
+  validate(constant_data != nullptr, "constant is not found");
   validate(constant_data->dtype() == input_data->dtype(), "constant and input have same data type");
   validate(constant_data->shape()->rank() == 1 && constant_data->shape()->dim(0) == 1,
            "constant should have one rank with one dimension at zero axis");
diff --git a/compiler/luci-compute/CMakeLists.txt b/compiler/luci-compute/CMakeLists.txt
new file mode 100644 (file)
index 0000000..b7ddb44
--- /dev/null
@@ -0,0 +1,23 @@
+nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+nnas_find_package(TensorFlowRuySource EXACT 2.8.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+  message(STATUS "Build luci-compute: FAILED (missing TensorFlowSource 2.8.0)")
+  return()
+endif(NOT TensorFlowSource_FOUND)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+  message(STATUS "Build luci-compute: FAILED (missing TensorFlowGEMMLowpSource 2.8.0)")
+  return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+if(NOT TensorFlowRuySource_FOUND)
+  message(STATUS "Build luci-compute: FAILED (missing TensorFlowRuySource 2.8.0)")
+  return()
+endif(NOT TensorFlowRuySource_FOUND)
+
+add_library(luci_compute INTERFACE)
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowRuySource_DIR}")
diff --git a/compiler/luci-compute/README.md b/compiler/luci-compute/README.md
new file mode 100644 (file)
index 0000000..caa4ab4
--- /dev/null
@@ -0,0 +1,3 @@
+# luci-compute
+
+_luci-compute_ provides computation kernels for _luci_ and related modules.
index c67754a..e8cd4f8 100644 (file)
@@ -3,11 +3,7 @@ set(SRCS_EVAL_TESTER
    )
 
 add_executable(luci_eval_driver ${SRCS_EVAL_TESTER})
-target_link_libraries(luci_eval_driver PRIVATE oops)
-target_link_libraries(luci_eval_driver PRIVATE loco)
 target_link_libraries(luci_eval_driver PRIVATE luci_import)
-target_link_libraries(luci_eval_driver PRIVATE luci_export)
-target_link_libraries(luci_eval_driver PRIVATE luci_lang)
 target_link_libraries(luci_eval_driver PRIVATE luci_interpreter)
 target_link_libraries(luci_eval_driver PRIVATE safemain)
 
index 2904d9d..847eb87 100644 (file)
@@ -1,5 +1,3 @@
-require("oops")
-require("loco")
 require("luci")
 require("luci-interpreter")
 require("safemain")
index 0ed3543..8d844ab 100644 (file)
 
 #include <luci/ImporterEx.h>
 #include <luci_interpreter/Interpreter.h>
-#include <luci/CircleExporter.h>
-#include <luci/CircleFileExpContract.h>
 
 #include <cstdlib>
 #include <fstream>
+#include <iostream>
 #include <vector>
 #include <string>
 
index 4dd77ff..f8a4a80 100644 (file)
@@ -15,7 +15,7 @@
  */
 
 #ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
-#define LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
 
 #include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
 
index 1e6c41e..db31cbf 100644 (file)
@@ -1,3 +1,4 @@
+REGISTER_KERNEL(Abs)
 REGISTER_KERNEL(Add)
 REGISTER_KERNEL(ArgMax)
 REGISTER_KERNEL(AveragePool2D)
@@ -50,6 +51,7 @@ REGISTER_KERNEL(Pow)
 REGISTER_KERNEL(PRelu)
 REGISTER_KERNEL(Quantize)
 REGISTER_KERNEL(ReduceMax)
+REGISTER_KERNEL(ReduceProd)
 REGISTER_KERNEL(Relu)
 REGISTER_KERNEL(Relu6)
 REGISTER_KERNEL(Reshape)
@@ -74,5 +76,6 @@ REGISTER_KERNEL(SVDF)
 REGISTER_KERNEL(Tanh)
 REGISTER_KERNEL(Transpose)
 REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(UnidirectionalSequenceLSTM)
 REGISTER_KERNEL(Unpack)
 REGISTER_KERNEL(While)
index f0df58d..fe3f73f 100644 (file)
@@ -1,3 +1,4 @@
+REGISTER_KERNEL(Abs)
 REGISTER_KERNEL(Add)
 REGISTER_KERNEL(ArgMax)
 REGISTER_KERNEL(AveragePool2D)
index 4dd77ff..f8a4a80 100644 (file)
@@ -15,7 +15,7 @@
  */
 
 #ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
-#define LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
 
 #include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
 
index 6ad1f32..14bc75e 100644 (file)
@@ -53,7 +53,10 @@ void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
   while (l < 32 && !_free_blocks[l])
     l++;
 
-  assert(l < 32);
+  if (l >= 32)
+  {
+    throw std::runtime_error{"Memory limit exceeded"};
+  }
 
   Block *tmp;
   tmp = _free_blocks[l];
index 6c0220c..45f3bfe 100644 (file)
@@ -218,6 +218,15 @@ struct TransposeConvParams
   int32_t stride_width;
 };
 
+struct UnidirectionalSequenceLSTMParams
+{
+  Activation activation;
+  float cell_clip;
+  float proj_clip;
+  bool time_major;
+  bool asymmetric_quantize_inputs;
+};
+
 struct UnpackParams
 {
   int axis;
diff --git a/compiler/luci-interpreter/src/kernels/Abs.cpp b/compiler/luci-interpreter/src/kernels/Abs.cpp
new file mode 100644 (file)
index 0000000..5c63315
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Abs.h"
+
+#include "kernels/Utils.h"
+
+#include <cmath> // abs for float
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Abs::Abs(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Abs::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void Abs::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Abs::eval() const
+{
+  const auto *input_data = input()->data<T>();
+  auto *output_data = output()->data<T>();
+
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = std::abs(input_data[i]);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Abs.h b/compiler/luci-interpreter/src/kernels/Abs.h
new file mode 100644 (file)
index 0000000..b5b874a
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ABS_H
+#define LUCI_INTERPRETER_KERNELS_ABS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Abs : public Kernel
+{
+public:
+  Abs(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ABS_H
diff --git a/compiler/luci-interpreter/src/kernels/Abs.test.cpp b/compiler/luci-interpreter/src/kernels/Abs.test.cpp
new file mode 100644 (file)
index 0000000..2c42ab7
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Abs.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Abs kernel(&input_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(AbsTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, -1.0f, 3.0f,  // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, 1.0f, 3.0f, // Row 1
+                 1.0f, 1.0f, 2.0f, // Row 2
+               });
+
+  SUCCEED();
+}
+
+TEST(AbsTest, Type_Mismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<loco::DataType::S32>({3}, {1, -3, 2}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Abs kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.cpp b/compiler/luci-interpreter/src/kernels/ReduceProd.cpp
new file mode 100644 (file)
index 0000000..f3fc7d3
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceProd.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+ReduceProd::ReduceProd(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+                       Tensor *resolved_axes, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params)
+{
+}
+
+void ReduceProd::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  LUCI_INTERPRETER_CHECK(num_axes <= 4);
+
+  // We compute shapes of outputs in configure, assuming that outputs have
+  // static shape
+  // TODO Support dynamic shape
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  temp_index->resize(Shape(input_num_dims));
+  resolved_axes->resize(Shape(num_axes));
+}
+
+void ReduceProd::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    // TODO Support quantized kernels
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void ReduceProd::evalFloat() const
+{
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  int num_resolved_axis = 0;
+  LUCI_INTERPRETER_CHECK(
+    tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes,
+                                       getTensorData<int>(resolved_axes), &num_resolved_axis));
+
+  float init_value = 1.0;
+  tflite::reference_ops::ReduceGeneric<float>(
+    getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+    getTensorData<float>(output()), getTensorShape(output()).DimsData(),
+    output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims,
+    getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value,
+    [](const float current, const float in) -> float { return current * in; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.h b/compiler/luci-interpreter/src/kernels/ReduceProd.h
new file mode 100644 (file)
index 0000000..d2f58cc
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
+#define LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReduceProd : public KernelWithParams<ReducerParams>
+{
+public:
+  ReduceProd(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+             Tensor *resolved_axes, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
diff --git a/compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceProd.test.cpp
new file mode 100644 (file)
index 0000000..fa46f39
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceProd.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReduceProdTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReduceProdTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0, -3, -3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                    params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.162341376e+11, 1.9619905536e+12};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceProdTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                    params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{7.74592e+06, 1.197504e+08, 6.6889152e+08};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceProdTest, Input_Output_Type_NEG)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                    params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ReduceProdTest, Invalid_Axes_Type_NEG)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int64_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S64>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                    params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
index 40d79aa..b124e24 100644 (file)
@@ -26,30 +26,6 @@ namespace luci_interpreter
 namespace kernels
 {
 
-namespace
-{
-TfLiteFusedActivation get_tflite_activation(Activation activation)
-{
-  switch (activation)
-  {
-    case luci::FusedActFunc::RELU:
-      return kTfLiteActRelu;
-    case luci::FusedActFunc::RELU6:
-      return kTfLiteActRelu6;
-    case luci::FusedActFunc::RELU_N1_TO_1:
-      return kTfLiteActReluN1To1;
-    case luci::FusedActFunc::TANH:
-      return kTfLiteActTanh;
-    case luci::FusedActFunc::SIGN_BIT:
-      return kTfLiteActSignBit;
-    case luci::FusedActFunc::NONE:
-      return kTfLiteActNone;
-    default:
-      throw std::runtime_error("Unsupported activation type");
-  }
-}
-} // namespace
-
 SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
            const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
            Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
@@ -191,7 +167,7 @@ void SVDF::evalInteger() const
   TfLiteSVDFParams params_svdf{};
   params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
   params_svdf.rank = params().svdf_rank;
-  params_svdf.activation = get_tflite_activation(params().activation);
+  params_svdf.activation = getTfLiteActivation(params().activation);
 
   auto scratchpad_activation_state = getOutputTensors()[1];
   // Note: it is expected that activation_state input variable tensor reset to zero,
@@ -219,7 +195,7 @@ void SVDF::evalFloat() const
   TfLiteSVDFParams params_svdf{};
   params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
   params_svdf.rank = params().svdf_rank;
-  params_svdf.activation = get_tflite_activation(params().activation);
+  params_svdf.activation = getTfLiteActivation(params().activation);
 
   auto scratchpad_activation_state = getOutputTensors()[1];
   // Note: it is expected that activation_state input variable tensor reset to zero,
index c4fa169..d47a0bd 100644 (file)
@@ -17,6 +17,7 @@
 #include "kernels/Tanh.h"
 
 #include "kernels/Utils.h"
+#include <limits> // std::numeric_limits
 
 #include <tensorflow/lite/kernels/internal/reference/tanh.h>
 
index 1f5a0c3..b9c942e 100644 (file)
@@ -22,6 +22,7 @@
 #include "luci_interpreter/MemoryManager.h"
 
 #include <type_traits>
+#include <limits> // std::numeric_limits
 
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
index 1b5f9d9..08bfbf3 100644 (file)
@@ -22,6 +22,7 @@
 #include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
 
 #include <stdexcept>
+#include <limits> // std::numeric_limits
 
 namespace luci_interpreter
 {
diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp
new file mode 100644 (file)
index 0000000..f049bee
--- /dev/null
@@ -0,0 +1,892 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/tensor_utils.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace lstm
+{
+namespace
+{
+
+using namespace tflite;
+
+void UpdateLstmCellFloat(int n_batch, int n_cell, float *cell_state, const float *input_gate,
+                         float *forget_gate, const float *cell_gate, bool use_cifg, float clip)
+{
+// NOTE tflite source is as is but will fail build with gcc-8 and above
+// TODO remove #pragma
+#pragma GCC diagnostic ignored "-Wrestrict"
+  tensor_utils::VectorVectorCwiseProduct(forget_gate, cell_state, n_batch * n_cell, cell_state);
+
+  if (use_cifg)
+  {
+    // With CIFG, input_gate = 1-forget_gate. Use the forget_gate array as
+    // scratch, as input_gate array is not allocated in this case. (Be careful
+    // not to write to the scratch before reading the forget gate data.)
+    float *scratch = forget_gate;
+    tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch);
+    tensor_utils::VectorVectorCwiseProductAccumulate(cell_gate, scratch, n_batch * n_cell,
+                                                     cell_state);
+  }
+  else
+  {
+    tensor_utils::VectorVectorCwiseProductAccumulate(cell_gate, input_gate, n_batch * n_cell,
+                                                     cell_state);
+  }
+  if (clip > 0.0f)
+  {
+    tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip);
+  }
+}
+
+void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, const float *cell_state,
+                              const float *output_gate, TfLiteFusedActivation activation,
+                              const float *projection_weights, const float *projection_bias,
+                              const float proj_clip, float *output_state, float *scratch)
+{
+  tensor_utils::ApplyActivationToVector(cell_state, n_batch * n_cell, activation, scratch);
+  tensor_utils::VectorVectorCwiseProduct(output_gate, scratch, n_batch * n_cell, scratch);
+
+  const bool use_projection = (projection_weights != nullptr);
+  const bool use_projection_bias = (projection_bias != nullptr);
+
+  if (use_projection)
+  {
+    if (use_projection_bias)
+    {
+      tensor_utils::VectorBatchVectorAssign(projection_bias, n_output, n_batch, output_state);
+    }
+    else
+    {
+      std::fill_n(output_state, n_batch * n_output, 0.0f);
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(projection_weights, n_output, n_cell, scratch,
+                                                      n_batch, output_state);
+    if (proj_clip > 0.0f)
+    {
+      tensor_utils::CwiseClipping(output_state, n_batch * n_output, proj_clip);
+    }
+  }
+  else
+  {
+    std::copy_n(scratch, n_batch * n_output, output_state);
+  }
+}
+
+inline void CalculateLstmGateFloat(const float *input, const float *input_to_gate_weights,
+                                   const float *aux_input, const float *aux_input_to_gate_weights,
+                                   const float *output_state,
+                                   const float *recurrent_to_gate_weights, const float *cell_state,
+                                   const float *cell_to_gate_weights,
+                                   const float *layer_norm_coefficients, const float *gate_bias,
+                                   const int n_batch, const int n_input, const int n_aux_input,
+                                   const int n_output, const int n_cell,
+                                   const TfLiteFusedActivation activation, float *gate,
+                                   const bool is_input_all_zeros, const bool is_aux_input_all_zeros)
+{
+  const bool use_peephole = (cell_to_gate_weights != nullptr);
+  const bool use_layer_norm = (layer_norm_coefficients != nullptr);
+
+  // Initialize scratch buffers with bias for regular lstm or initialize with
+  // zero for layer norm lstm.
+  if (use_layer_norm)
+  {
+    std::fill_n(gate, n_cell * n_batch, 0.0f);
+  }
+  else
+  {
+    tensor_utils::VectorBatchVectorAssign(gate_bias, n_cell, n_batch, gate);
+  }
+  // For each batch and cell: compute input_weight * input.
+  // Skip if input is all zeros.
+  if (!is_input_all_zeros)
+  {
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(input_to_gate_weights, n_cell, n_input, input,
+                                                      n_batch, gate);
+  }
+  // For each batch and cell: compute aux_input_weight * aux_input.
+  // Skip if auxiliary input is not available or all zeros.
+  if (!is_aux_input_all_zeros)
+  {
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(aux_input_to_gate_weights, n_cell,
+                                                      n_aux_input, aux_input, n_batch, gate);
+  }
+  // For each batch and cell: compute recurrent_weight * output_state.
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(recurrent_to_gate_weights, n_cell, n_output,
+                                                    output_state, n_batch, gate);
+  // For each batch and cell: compute cell_weight .* cell_state (peephole LSTM)
+  if (use_peephole)
+  {
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(cell_to_gate_weights, n_cell, cell_state,
+                                                          n_batch, gate);
+  }
+  // Do layer normalization (if layer norm LSTM)
+  if (use_layer_norm)
+  {
+    tensor_utils::MeanStddevNormalization(gate, gate, n_cell, n_batch);
+    tensor_utils::VectorBatchVectorCwiseProduct(layer_norm_coefficients, n_cell, gate, n_batch,
+                                                gate);
+    tensor_utils::VectorBatchVectorAdd(gate_bias, n_cell, n_batch, gate);
+  }
+  // Apply activation
+  tensor_utils::ApplyActivationToVector(gate, n_batch * n_cell, activation, gate);
+}
+
+inline void LstmStepFloat(
+  const float *input_ptr, const float *input_to_input_weights_ptr,
+  const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr,
+  const float *input_to_output_weights_ptr, const float *aux_input_ptr,
+  const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr,
+  const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr,
+  const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr,
+  const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr,
+  const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr,
+  const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr,
+  const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr,
+  const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr,
+  const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr,
+  const float *output_gate_bias_ptr, const float *projection_weights_ptr,
+  const float *projection_bias_ptr, const TfLiteLSTMParams *params, int n_batch, int n_cell,
+  int n_input, int n_aux_input, int n_output, int output_batch_leading_dim, float *output_state_ptr,
+  float *cell_state_ptr, float *scratch0, float *scratch1, float *scratch2, float *scratch3,
+  float *output_ptr)
+{
+  // Since we have already checked that weights are all there or none, we can
+  // check the existence of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+
+  // Make named scratch buffers.
+  float *input_gate_scratch = scratch0;
+  float *forget_gate_scratch = scratch1;
+  float *cell_gate_scratch = scratch2;
+  float *output_gate_scratch = scratch3;
+
+  // Check if inputs are all zeros so we can skip some computations.
+  const bool is_input_all_zeros = tensor_utils::IsZeroVector(input_ptr, n_batch * n_input);
+  const bool is_aux_input_all_zeros =
+    (aux_input_ptr == nullptr || tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input));
+  if (!use_cifg)
+  {
+    // Calculate the input gate. (If not CIFG.)
+    CalculateLstmGateFloat(input_ptr, input_to_input_weights_ptr, aux_input_ptr,
+                           aux_input_to_input_weights_ptr, output_state_ptr,
+                           recurrent_to_input_weights_ptr, cell_state_ptr,
+                           cell_to_input_weights_ptr, input_layer_norm_coefficients_ptr,
+                           input_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                           /*activation=*/kTfLiteActSigmoid, input_gate_scratch, is_input_all_zeros,
+                           is_aux_input_all_zeros);
+  }
+  // Calculate the forget gate.
+  CalculateLstmGateFloat(input_ptr, input_to_forget_weights_ptr, aux_input_ptr,
+                         aux_input_to_forget_weights_ptr, output_state_ptr,
+                         recurrent_to_forget_weights_ptr, cell_state_ptr,
+                         cell_to_forget_weights_ptr, forget_layer_norm_coefficients_ptr,
+                         forget_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                         /*activation=*/kTfLiteActSigmoid, forget_gate_scratch, is_input_all_zeros,
+                         is_aux_input_all_zeros);
+  // Calculate the cell update gate.
+  CalculateLstmGateFloat(
+    input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr,
+    output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr,
+    /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr, n_batch,
+    n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch,
+    is_input_all_zeros, is_aux_input_all_zeros);
+  // Update the cell state.
+  UpdateLstmCellFloat(n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch,
+                      cell_gate_scratch, use_cifg, params->cell_clip);
+  // Calculate output gate.
+  CalculateLstmGateFloat(input_ptr, input_to_output_weights_ptr, aux_input_ptr,
+                         aux_input_to_output_weights_ptr, output_state_ptr,
+                         recurrent_to_output_weights_ptr, cell_state_ptr,
+                         cell_to_output_weights_ptr, output_layer_norm_coefficients_ptr,
+                         output_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+                         /*activation=*/kTfLiteActSigmoid, output_gate_scratch, is_input_all_zeros,
+                         is_aux_input_all_zeros);
+  // Update the output state.
+  CalculateLstmOutputFloat(n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch,
+                           params->activation, projection_weights_ptr, projection_bias_ptr,
+                           params->proj_clip, output_state_ptr, scratch2);
+  // Copy output state to the output. Note that the output's rows may not be
+  // contiguous (output_batch_leading_dim != n_output).
+  for (int b = 0; b < n_batch; b++)
+  {
+    std::copy_n(output_state_ptr + b * n_output, n_output,
+                output_ptr + b * output_batch_leading_dim);
+  }
+}
+
+} // namespace
+
+void EvalFloat(const Tensor *input,
+
+               const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+               const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+               const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+               const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+               const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+               const Tensor *cell_to_output_weights,
+
+               const Tensor *input_layer_norm_coefficients,
+               const Tensor *forget_layer_norm_coefficients,
+               const Tensor *cell_layer_norm_coefficients,
+               const Tensor *output_layer_norm_coefficients,
+
+               const Tensor *aux_input, const Tensor *aux_input_to_input_weights,
+               const Tensor *aux_input_to_forget_weights, const Tensor *aux_input_to_cell_weights,
+               const Tensor *aux_input_to_output_weights,
+
+               const Tensor *input_gate_bias, const Tensor *forget_gate_bias,
+               const Tensor *cell_gate_bias, const Tensor *output_gate_bias,
+
+               const Tensor *projection_weights, const Tensor *projection_bias,
+               const TfLiteLSTMParams *params,
+
+               bool forward_sequence, bool time_major, int output_offset,
+
+               Tensor *scratch_buffer, Tensor *output_state, Tensor *cell_state, Tensor *output)
+{
+  const Shape &input_shape = input->shape();
+  assert(input_shape.num_dims() >= 2 && input_shape.num_dims() <= 3);
+  int max_time, n_batch;
+  if (input_shape.num_dims() == 3)
+  {
+    max_time = (time_major) ? input_shape.dim(0) : input_shape.dim(1);
+    n_batch = (time_major) ? input_shape.dim(1) : input_shape.dim(0);
+  }
+  else
+  {
+    max_time = 1;
+    n_batch = input_shape.dim(0);
+  }
+  const int n_input = input_shape.dim(input_shape.num_dims() - 1);
+
+  int aux_input_temp = 0;
+  if (aux_input)
+  {
+    const Shape &aux_input_shape = aux_input->shape();
+    aux_input_temp = aux_input_shape.dim(aux_input_shape.num_dims() - 1);
+  }
+  const int aux_input_size = aux_input_temp;
+
+  // n_cell and n_output will be the same size when there is no projection.
+  const Shape &input_to_output_weights_shape = input_to_output_weights->shape();
+  const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights->shape();
+  const int n_cell = input_to_output_weights_shape.dim(0);
+  const int n_output = recurrent_to_output_weights_shape.dim(1);
+
+  // Since we have already checked that weights are all there or none, we can
+  // check the existence of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights == nullptr);
+
+  // Index the scratch buffers pointers to the global scratch buffer.
+  float *scratch_buffer_ptr = getTensorData<float>(scratch_buffer);
+  float *input_gate_scratch = nullptr;
+  float *cell_gate_scratch = nullptr;
+  float *forget_gate_scratch = nullptr;
+  float *output_gate_scratch = nullptr;
+  if (use_cifg)
+  {
+    cell_gate_scratch = scratch_buffer_ptr;
+    forget_gate_scratch = scratch_buffer_ptr + n_cell * n_batch;
+    output_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch;
+  }
+  else
+  {
+    input_gate_scratch = scratch_buffer_ptr;
+    cell_gate_scratch = scratch_buffer_ptr + n_cell * n_batch;
+    forget_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch;
+    output_gate_scratch = scratch_buffer_ptr + 3 * n_cell * n_batch;
+  }
+
+  const Shape &output_shape = output->shape();
+  const int output_batch_leading_dim = output_shape.dim(output_shape.num_dims() - 1);
+  if (time_major)
+  {
+    // Loop through the sequence.
+    const int input_step = n_batch * n_input;
+    const int output_step = n_batch * output_batch_leading_dim;
+    for (int t = 0; t < max_time; t++)
+    {
+      // If this is the forward_sequence, step forward, otherwise step
+      // backwards.
+      const int t_rel = forward_sequence ? t : max_time - t - 1;
+      const float *input_ptr = getTensorData<float>(input) + t_rel * input_step;
+      const float *aux_input_ptr = nullptr;
+      if (aux_input)
+      {
+        aux_input_ptr = getTensorData<float>(aux_input) + t_rel * input_step;
+      }
+      float *output_ptr = getTensorData<float>(output) + t_rel * output_step + output_offset;
+
+      LstmStepFloat(
+        input_ptr, getTensorData<float>(input_to_input_weights),
+        getTensorData<float>(input_to_forget_weights), getTensorData<float>(input_to_cell_weights),
+        getTensorData<float>(input_to_output_weights), aux_input_ptr,
+        getTensorData<float>(aux_input_to_input_weights),
+        getTensorData<float>(aux_input_to_forget_weights),
+        getTensorData<float>(aux_input_to_cell_weights),
+        getTensorData<float>(aux_input_to_output_weights),
+        getTensorData<float>(recurrent_to_input_weights),
+        getTensorData<float>(recurrent_to_forget_weights),
+        getTensorData<float>(recurrent_to_cell_weights),
+        getTensorData<float>(recurrent_to_output_weights),
+        getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights),
+        getTensorData<float>(cell_to_output_weights),
+        getTensorData<float>(input_layer_norm_coefficients),
+        getTensorData<float>(forget_layer_norm_coefficients),
+        getTensorData<float>(cell_layer_norm_coefficients),
+        getTensorData<float>(output_layer_norm_coefficients), getTensorData<float>(input_gate_bias),
+        getTensorData<float>(forget_gate_bias), getTensorData<float>(cell_gate_bias),
+        getTensorData<float>(output_gate_bias), getTensorData<float>(projection_weights),
+        getTensorData<float>(projection_bias), params, n_batch, n_cell, n_input, aux_input_size,
+        n_output, output_batch_leading_dim, getTensorData<float>(output_state),
+        getTensorData<float>(cell_state), input_gate_scratch, forget_gate_scratch,
+        cell_gate_scratch, output_gate_scratch, output_ptr);
+    }
+  }
+  else
+  {
+    for (int b = 0; b < n_batch; b++)
+    {
+      const int input_step = n_input;
+      const int output_step = output_batch_leading_dim;
+      for (int t = 0; t < max_time; t++)
+      {
+        // If this is the forward_sequence, step forward, otherwise step
+        // backwards.
+        const int t_rel = forward_sequence ? t : max_time - t - 1;
+        const int time_offset = b * max_time + t_rel;
+        const float *input_ptr = getTensorData<float>(input) + time_offset * input_step;
+        const float *aux_input_ptr = nullptr;
+        if (aux_input)
+        {
+          aux_input_ptr = getTensorData<float>(aux_input) + time_offset * input_step;
+        }
+        float *output_ptr =
+          getTensorData<float>(output) + time_offset * output_step + output_offset;
+
+        // Offset the {output,cell}_state pointers to the right batch.
+        float *output_state_ptr = getTensorData<float>(output_state) + b * output_batch_leading_dim;
+        float *cell_state_ptr = getTensorData<float>(cell_state) + b * n_cell;
+        // Offset the scratch pointers to the right batch.
+        float *input_gate_scratch_ptr =
+          input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
+        float *forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
+        float *cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
+        float *output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
+
+        LstmStepFloat(
+          input_ptr, getTensorData<float>(input_to_input_weights),
+          getTensorData<float>(input_to_forget_weights),
+          getTensorData<float>(input_to_cell_weights),
+          getTensorData<float>(input_to_output_weights), aux_input_ptr,
+          getTensorData<float>(aux_input_to_input_weights),
+          getTensorData<float>(aux_input_to_forget_weights),
+          getTensorData<float>(aux_input_to_cell_weights),
+          getTensorData<float>(aux_input_to_output_weights),
+          getTensorData<float>(recurrent_to_input_weights),
+          getTensorData<float>(recurrent_to_forget_weights),
+          getTensorData<float>(recurrent_to_cell_weights),
+          getTensorData<float>(recurrent_to_output_weights),
+          getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights),
+          getTensorData<float>(cell_to_output_weights),
+          getTensorData<float>(input_layer_norm_coefficients),
+          getTensorData<float>(forget_layer_norm_coefficients),
+          getTensorData<float>(cell_layer_norm_coefficients),
+          getTensorData<float>(output_layer_norm_coefficients),
+          getTensorData<float>(input_gate_bias), getTensorData<float>(forget_gate_bias),
+          getTensorData<float>(cell_gate_bias), getTensorData<float>(output_gate_bias),
+          getTensorData<float>(projection_weights), getTensorData<float>(projection_bias), params,
+          /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim,
+          output_state_ptr, cell_state_ptr, input_gate_scratch_ptr, forget_gate_scratch_ptr,
+          cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr);
+      }
+    }
+  }
+}
+
+} // namespace lstm
+} // namespace kernels
+} // namespace luci_interpreter
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+UnidirectionalSequenceLSTM::UnidirectionalSequenceLSTM(
+  const Tensor *input,
+
+  const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+  const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+  const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+  const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+  const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+  const Tensor *cell_to_output_weights,
+
+  const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias,
+  const Tensor *output_gate_bias,
+
+  const Tensor *projection_weights, const Tensor *projection_bias,
+
+  const Tensor *output_state, const Tensor *cell_state, const Tensor *input_layer_norm_coefficients,
+  const Tensor *forget_layer_norm_coefficients, const Tensor *cell_layer_norm_coefficients,
+  const Tensor *output_layer_norm_coefficients,
+
+  Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3,
+  const UnidirectionalSequenceLSTMParams &params)
+  : KernelWithParams<UnidirectionalSequenceLSTMParams>(
+      {input,
+       input_to_input_weights,
+       input_to_forget_weights,
+       input_to_cell_weights,
+       input_to_output_weights,
+
+       recurrent_to_input_weights,
+       recurrent_to_forget_weights,
+       recurrent_to_cell_weights,
+       recurrent_to_output_weights,
+
+       cell_to_input_weights,
+       cell_to_forget_weights,
+       cell_to_output_weights,
+
+       input_gate_bias,
+       forget_gate_bias,
+       cell_gate_bias,
+       output_gate_bias,
+
+       projection_weights,
+       projection_bias,
+
+       output_state,
+       cell_state,
+
+       input_layer_norm_coefficients,
+       forget_layer_norm_coefficients,
+       cell_layer_norm_coefficients,
+       output_layer_norm_coefficients},
+      {output, scratchpad_1, scratchpad_2, scratchpad_3}, params)
+{
+  // Do nothing
+}
+
+// Check that input tensor dimensions matches with each other.
+void UnidirectionalSequenceLSTM::check_input_tensor_dimensions(int n_input, int n_output,
+                                                               int n_cell, bool use_layer_norm,
+                                                               bool is_integer)
+{
+  // Making sure clipping parameters have valid values.
+  // == 0 means no clipping
+  //  > 0 means clipping
+  LUCI_INTERPRETER_CHECK(params().cell_clip >= 0);
+  LUCI_INTERPRETER_CHECK(params().proj_clip >= 0);
+
+  if (input_to_input_weights() != nullptr)
+  {
+    const Shape &input_to_input_weights_shape = input_to_input_weights()->shape();
+    LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.num_dims() == 2);
+    LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.dim(1) == n_input);
+  }
+
+  const Shape &input_to_forget_weights_shape = input_to_forget_weights()->shape();
+  LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.dim(0) == n_cell);
+  LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.dim(1) == n_input);
+
+  const Shape &input_to_cell_weights_shape = input_to_cell_weights()->shape();
+  LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.dim(0) == n_cell);
+  LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.dim(1) == n_input);
+
+  if (recurrent_to_input_weights() != nullptr)
+  {
+    const Shape &recurrent_to_input_weights_shape = recurrent_to_input_weights()->shape();
+    LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.num_dims() == 2);
+    LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.dim(1) == n_output);
+  }
+
+  const Shape &recurrent_to_forget_weights_shape = recurrent_to_forget_weights()->shape();
+  LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.dim(0) == n_cell);
+  LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.dim(1) == n_output);
+
+  const Shape &recurrent_to_cell_weights_shape = recurrent_to_cell_weights()->shape();
+  LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.dim(0) == n_cell);
+  LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.dim(1) == n_output);
+
+  // We make sure the input-gate's parameters are either both present (regular
+  // LSTM) or not at all (CIFG-LSTM).
+  const bool cifg_weights_all_or_none =
+    ((input_to_input_weights() != nullptr) && (recurrent_to_input_weights() != nullptr)) ||
+    ((input_to_input_weights() == nullptr) && (recurrent_to_input_weights() == nullptr));
+  LUCI_INTERPRETER_CHECK(cifg_weights_all_or_none == true);
+
+  if (cell_to_input_weights() != nullptr)
+  {
+    const Shape &cell_to_input_weights_shape = cell_to_input_weights()->shape();
+    LUCI_INTERPRETER_CHECK(cell_to_input_weights_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(cell_to_input_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(is_integer
+                             ? cell_to_input_weights()->element_type() == loco::DataType::S16
+                             : cell_to_input_weights()->element_type() ==
+                                 input_to_forget_weights()->element_type());
+  }
+
+  if (cell_to_forget_weights() != nullptr)
+  {
+    const Shape &cell_to_forget_weights_shape = cell_to_forget_weights()->shape();
+    LUCI_INTERPRETER_CHECK(cell_to_forget_weights_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(cell_to_forget_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(is_integer
+                             ? cell_to_forget_weights()->element_type() == loco::DataType::S16
+                             : cell_to_forget_weights()->element_type() ==
+                                 input_to_forget_weights()->element_type());
+  }
+
+  if (cell_to_output_weights() != nullptr)
+  {
+    const Shape &cell_to_output_weights_shape = cell_to_output_weights()->shape();
+    LUCI_INTERPRETER_CHECK(cell_to_output_weights_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(cell_to_output_weights_shape.dim(0) == n_cell);
+    LUCI_INTERPRETER_CHECK(is_integer
+                             ? cell_to_output_weights()->element_type() == loco::DataType::S16
+                             : cell_to_output_weights()->element_type() ==
+                                 input_to_forget_weights()->element_type());
+  }
+
+  // Making sure the peephole weights are there all or none.
+  const bool use_cifg = (input_to_input_weights() == nullptr);
+  const bool peephole_weights_all_or_none =
+    ((cell_to_input_weights() != nullptr || use_cifg) && (cell_to_forget_weights() != nullptr) &&
+     (cell_to_output_weights() != nullptr)) ||
+    ((cell_to_input_weights() == nullptr) && (cell_to_forget_weights() == nullptr) &&
+     (cell_to_output_weights() == nullptr));
+  LUCI_INTERPRETER_CHECK(peephole_weights_all_or_none == true);
+
+  // Make sure the input gate bias is present only when not a CIFG-LSTM.
+  if (use_cifg)
+  {
+    LUCI_INTERPRETER_CHECK(input_gate_bias() == nullptr);
+  }
+  else
+  {
+    const Shape &input_gate_bias_shape = input_gate_bias()->shape();
+    LUCI_INTERPRETER_CHECK(input_gate_bias_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(input_gate_bias_shape.dim(0) == n_cell);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(input_gate_bias()->element_type() == loco::DataType::S32);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(input_gate_bias()->element_type() == loco::DataType::FLOAT32);
+    }
+  }
+
+  const Shape &forget_gate_bias_shape = forget_gate_bias()->shape();
+  LUCI_INTERPRETER_CHECK(forget_gate_bias_shape.num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(forget_gate_bias_shape.dim(0) == n_cell);
+  if (is_integer)
+  {
+    LUCI_INTERPRETER_CHECK(forget_gate_bias()->element_type() == loco::DataType::S32);
+  }
+  else
+  {
+    LUCI_INTERPRETER_CHECK(forget_gate_bias()->element_type() == loco::DataType::FLOAT32);
+  }
+
+  const Shape &cell_gate_bias_shape = cell_gate_bias()->shape();
+  LUCI_INTERPRETER_CHECK(cell_gate_bias_shape.num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(cell_gate_bias_shape.dim(0) == n_cell);
+  if (is_integer)
+  {
+    LUCI_INTERPRETER_CHECK(cell_gate_bias()->element_type() == loco::DataType::S32);
+  }
+  else
+  {
+    LUCI_INTERPRETER_CHECK(cell_gate_bias()->element_type() == loco::DataType::FLOAT32);
+  }
+
+  const Shape &output_gate_bias_shape = output_gate_bias()->shape();
+  LUCI_INTERPRETER_CHECK(output_gate_bias_shape.num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(output_gate_bias_shape.dim(0) == n_cell);
+  if (is_integer)
+  {
+    LUCI_INTERPRETER_CHECK(output_gate_bias()->element_type() == loco::DataType::S32);
+  }
+  else
+  {
+    LUCI_INTERPRETER_CHECK(output_gate_bias()->element_type() == loco::DataType::FLOAT32);
+  }
+
+  if (projection_weights() != nullptr)
+  {
+    const Shape &projection_weights_shape = projection_weights()->shape();
+    LUCI_INTERPRETER_CHECK(projection_weights_shape.num_dims() == 2);
+    LUCI_INTERPRETER_CHECK(projection_weights_shape.dim(0) == n_output);
+    LUCI_INTERPRETER_CHECK(projection_weights_shape.dim(1) == n_cell);
+  }
+
+  if (projection_bias() != nullptr)
+  {
+    const Shape &projection_bias_shape = projection_bias()->shape();
+    LUCI_INTERPRETER_CHECK(projection_bias_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(projection_bias_shape.dim(0) == n_output);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(projection_bias()->element_type() == loco::DataType::S32);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(projection_bias()->element_type() == loco::DataType::FLOAT32);
+    }
+  }
+
+  // Making sure the projection tensors are consistent:
+  // 1) If projection weight is not present, then projection bias should not be
+  // present.
+  // 2) If projection weight is present, then projection bias is optional.
+  // TODO(ghodrat): make sure this is correct.
+  const bool projecton_tensors_consistent =
+    ((projection_weights() != nullptr) || (projection_bias() == nullptr));
+  LUCI_INTERPRETER_CHECK(projecton_tensors_consistent == true);
+
+  if (use_layer_norm)
+  {
+    if (use_cifg)
+    {
+      LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients() == nullptr);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients() != nullptr)
+
+      const Shape &input_layer_norm_coefficients_shape = input_layer_norm_coefficients()->shape();
+      LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients_shape.num_dims() == 1);
+      LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients_shape.dim(0) == n_cell);
+      if (is_integer)
+      {
+        LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients()->element_type() ==
+                               loco::DataType::S16);
+      }
+      else
+      {
+        LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients()->element_type() ==
+                               loco::DataType::FLOAT32);
+      }
+    }
+
+    const Shape &forget_layer_norm_coefficients_shape = forget_layer_norm_coefficients()->shape();
+    LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients_shape.dim(0) == n_cell);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::S16);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::FLOAT32);
+    }
+
+    const Shape &cell_layer_norm_coefficients_shape = cell_layer_norm_coefficients()->shape();
+    LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients_shape.dim(0) == n_cell);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients()->element_type() == loco::DataType::S16);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::FLOAT32);
+    }
+
+    const Shape &output_layer_norm_coefficients_shape = output_layer_norm_coefficients()->shape();
+    LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients_shape.num_dims() == 1);
+    LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients_shape.dim(0) == n_cell);
+    if (is_integer)
+    {
+      LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::S16);
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients()->element_type() ==
+                             loco::DataType::FLOAT32);
+    }
+  }
+}
+
+void UnidirectionalSequenceLSTM::configure()
+{
+  LUCI_INTERPRETER_CHECK(getInputTensors().size() == 24);
+  LUCI_INTERPRETER_CHECK(getOutputTensors().size() >= 1);
+
+  // TODO support U8
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32);
+  const bool is_integer = false;
+  const bool use_layer_norm = (forget_layer_norm_coefficients() != nullptr);
+
+  // Inferring batch size, number of outputs and sequence length and
+  // number of cells from the input tensors.
+  const Shape &input_shape = input()->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() > 1);
+  const bool time_major = params().time_major;
+  const int n_batch = time_major ? input_shape.dim(1) : input_shape.dim(0);
+  // NOTE as dim(2) is accessed, we need to check this is valid
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() > 2);
+  const int n_input = input_shape.dim(2);
+
+  const Shape &input_to_output_weights_shape = input_to_output_weights()->shape();
+  const int n_cell = input_to_output_weights_shape.dim(0);
+  LUCI_INTERPRETER_CHECK(input_to_output_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_to_output_weights_shape.dim(1) == n_input);
+
+  const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights()->shape();
+  LUCI_INTERPRETER_CHECK(recurrent_to_output_weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(recurrent_to_output_weights_shape.dim(0) == n_cell);
+
+  const int n_output = recurrent_to_output_weights_shape.dim(1);
+
+  // Check that input tensor dimensions matches with each other.
+  check_input_tensor_dimensions(n_input, n_output, n_cell, use_layer_norm, is_integer);
+
+  // Check the shape of input state tensors.
+  // These tensor may be 1D or 2D. It's fine as long as the total size is
+  // correct.
+  const Shape &output_state_shape = output_state()->shape();
+  const Shape &cell_state_shape = cell_state()->shape();
+  LUCI_INTERPRETER_CHECK(output_state_shape.num_elements() == n_batch * n_output);
+  LUCI_INTERPRETER_CHECK(cell_state_shape.num_elements() == n_batch * n_cell);
+
+  // Resize the output tensors.
+  Shape output_shape = Shape(input_shape.num_dims());
+  for (int i = 0; i < input_shape.num_dims() - 1; i++)
+  {
+    output_shape.dim(i) = input_shape.dim(i);
+  }
+  output_shape.dim(input_shape.num_dims() - 1) = n_output;
+  output()->resize(output_shape);
+
+  // TODO import integer
+
+  // output_state and cell_state are variable tensor; use scratchpad.
+  getOutputTensors()[1]->resize(output_state_shape);
+  getOutputTensors()[2]->resize(cell_state_shape);
+
+  const bool use_cifg = (input_to_input_weights() == nullptr);
+  if (use_cifg)
+    getOutputTensors()[3]->resize({n_batch, n_cell * 3});
+  else
+    getOutputTensors()[3]->resize({n_batch, n_cell * 4});
+
+  // hybrid not supported
+  if (input_to_output_weights()->element_type() == loco::DataType::U8 &&
+      input()->element_type() == loco::DataType::FLOAT32)
+  {
+    throw std::runtime_error("Hybrid type is not currently supported");
+  }
+  // TODO support hybrid
+  // TODO support U8
+}
+
+void UnidirectionalSequenceLSTM::execute() const
+{
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+}
+
+void UnidirectionalSequenceLSTM::evalFloat() const
+{
+  const bool time_major = params().time_major;
+  const bool use_layer_norm = (forget_layer_norm_coefficients() != nullptr);
+
+  const Tensor *t_input_layer_norm_coefficients =
+    use_layer_norm ? input_layer_norm_coefficients() : nullptr;
+  const Tensor *t_forget_layer_norm_coefficients =
+    use_layer_norm ? forget_layer_norm_coefficients() : nullptr;
+  const Tensor *t_cell_layer_norm_coefficients =
+    use_layer_norm ? cell_layer_norm_coefficients() : nullptr;
+  const Tensor *t_output_layer_norm_coefficients =
+    use_layer_norm ? output_layer_norm_coefficients() : nullptr;
+
+  Tensor *sp_output_state = getOutputTensors()[1];
+  Tensor *sp_cell_state = getOutputTensors()[2];
+  Tensor *sp_scratch_buffer = getOutputTensors()[3];
+
+  // Note: it is expected that output_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<float>(sp_output_state);
+  std::fill_n(scratchpad_data, sp_output_state->shape().num_elements(), 0);
+  scratchpad_data = getTensorData<float>(sp_cell_state);
+  std::fill_n(scratchpad_data, sp_cell_state->shape().num_elements(), 0);
+  scratchpad_data = getTensorData<float>(sp_scratch_buffer);
+  std::fill_n(scratchpad_data, sp_scratch_buffer->shape().num_elements(), 0);
+
+  TfLiteLSTMParams lstm_params{};
+  lstm_params.activation = getTfLiteActivation(params().activation);
+  lstm_params.cell_clip = params().cell_clip;
+  lstm_params.proj_clip = params().proj_clip;
+  lstm_params.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+
+  lstm::EvalFloat(input(), input_to_input_weights(), input_to_forget_weights(),
+                  input_to_cell_weights(), input_to_output_weights(),
+
+                  recurrent_to_input_weights(), recurrent_to_forget_weights(),
+                  recurrent_to_cell_weights(), recurrent_to_output_weights(),
+
+                  cell_to_input_weights(), cell_to_forget_weights(), cell_to_output_weights(),
+
+                  t_input_layer_norm_coefficients, t_forget_layer_norm_coefficients,
+                  t_cell_layer_norm_coefficients, t_output_layer_norm_coefficients,
+                  /*aux_input=*/nullptr,
+                  /*aux_input_to_input_weights=*/nullptr,
+                  /*aux_input_to_forget_weights=*/nullptr,
+                  /*aux_input_to_cell_weights=*/nullptr,
+                  /*aux_input_to_output_weights=*/nullptr, input_gate_bias(), forget_gate_bias(),
+                  cell_gate_bias(), output_gate_bias(),
+
+                  projection_weights(), projection_bias(), &lstm_params,
+                  /*forward_sequence=*/true, time_major,
+                  /*output_offset=*/0, sp_scratch_buffer, sp_output_state, sp_cell_state, output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h
new file mode 100644 (file)
index 0000000..b812511
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
+#define LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class UnidirectionalSequenceLSTM : public KernelWithParams<UnidirectionalSequenceLSTMParams>
+{
+public:
+  UnidirectionalSequenceLSTM(
+    const Tensor *input,
+
+    const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+    const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+    const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+    const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+    const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+    const Tensor *cell_to_output_weights,
+
+    const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias,
+    const Tensor *output_gate_bias,
+
+    const Tensor *projection_weights, const Tensor *projection_bias,
+
+    const Tensor *output_state, const Tensor *cell_state,
+
+    const Tensor *input_layer_norm_coefficients, const Tensor *forget_layer_norm_coefficients,
+    const Tensor *cell_layer_norm_coefficients, const Tensor *output_layer_norm_coefficients,
+
+    Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3,
+    const UnidirectionalSequenceLSTMParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+
+  const Tensor *input_to_input_weights() const { return _inputs[1]; }
+  const Tensor *input_to_forget_weights() const { return _inputs[2]; }
+  const Tensor *input_to_cell_weights() const { return _inputs[3]; }
+  const Tensor *input_to_output_weights() const { return _inputs[4]; }
+
+  const Tensor *recurrent_to_input_weights() const { return _inputs[5]; }
+  const Tensor *recurrent_to_forget_weights() const { return _inputs[6]; }
+  const Tensor *recurrent_to_cell_weights() const { return _inputs[7]; }
+  const Tensor *recurrent_to_output_weights() const { return _inputs[8]; }
+
+  const Tensor *cell_to_input_weights() const { return _inputs[9]; }
+  const Tensor *cell_to_forget_weights() const { return _inputs[10]; }
+  const Tensor *cell_to_output_weights() const { return _inputs[11]; }
+
+  const Tensor *input_gate_bias() const { return _inputs[12]; }
+  const Tensor *forget_gate_bias() const { return _inputs[13]; }
+  const Tensor *cell_gate_bias() const { return _inputs[14]; }
+  const Tensor *output_gate_bias() const { return _inputs[15]; }
+
+  const Tensor *projection_weights() const { return _inputs[16]; }
+  const Tensor *projection_bias() const { return _inputs[17]; }
+
+  const Tensor *output_state() const { return _inputs[18]; }
+  const Tensor *cell_state() const { return _inputs[19]; }
+
+  const Tensor *input_layer_norm_coefficients() const { return _inputs[20]; }
+  const Tensor *forget_layer_norm_coefficients() const { return _inputs[21]; }
+  const Tensor *cell_layer_norm_coefficients() const { return _inputs[22]; }
+  const Tensor *output_layer_norm_coefficients() const { return _inputs[23]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+
+private:
+  void check_input_tensor_dimensions(int n_input, int n_output, int n_cell, bool use_layer_norm,
+                                     bool is_integer);
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
diff --git a/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp b/compiler/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp
new file mode 100644 (file)
index 0000000..df059cf
--- /dev/null
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class UnidirectionalSequenceLSTMTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// NOTE from NoCifgNoPeepholeNoProjectionNoClippingUnidirectionalLstmTest
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+
+  std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284,  0.11810488, 0.2013163,
+                                              -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+  std::vector<float> input_to_forget_weights = {0.09701663,  0.20334584, -0.50592935, -0.31343272,
+                                                -0.40032279, 0.44781327, 0.01387155,  -0.35593212};
+
+  std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+                                                0.44272184,  0.03897077,  -0.1556896, 0.19487578};
+
+  std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+  std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+  std::vector<float> recurrent_to_input_weights = {
+    -0.0063535,  -0.2042388,  0.31454784,  -0.35746509, 0.28902304, 0.08183324,
+    -0.16555229, 0.02286911,  -0.13566875, 0.03034258,  0.48091322, -0.12528998,
+    0.24077177,  -0.51332325, -0.33502164, 0.10629296};
+
+  std::vector<float> recurrent_to_forget_weights = {
+    -0.48684245, -0.06655136, 0.42224967,  0.2112639,   0.27654213, 0.20864892,
+    -0.07646349, 0.45877004,  0.00141793,  -0.14609534, 0.36447752, 0.09196436,
+    0.28053468,  0.01560611,  -0.20127171, -0.01140004};
+
+  std::vector<float> recurrent_to_cell_weights = {
+    -0.3407414,  0.24443203,  -0.2078532,  0.26320225,  0.05695659, -0.00123841,
+    -0.4744786,  -0.35869038, -0.06418842, -0.13502428, -0.501764,  0.22830659,
+    -0.46367589, 0.26016325,  -0.03894562, -0.16368064};
+
+  std::vector<float> recurrent_to_output_weights = {
+    0.43385774,  -0.17194885, 0.2718237,  0.09215671,  0.24107647, -0.39835793,
+    0.18212086,  0.01301402,  0.48572797, -0.50656658, 0.20047462, -0.20607421,
+    -0.51818722, -0.15390486, 0.0468148,  0.39922136};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-0.02973187, 0.1229473,  0.20885126, -0.15358765,
+                                     -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+                                     -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+  std::vector<float> ref_output_shape{sequence_length, n_batch, n_output};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_batch)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+
+  std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284,  0.11810488, 0.2013163,
+                                              -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+  std::vector<float> input_to_forget_weights = {0.09701663,  0.20334584, -0.50592935, -0.31343272,
+                                                -0.40032279, 0.44781327, 0.01387155,  -0.35593212};
+
+  std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+                                                0.44272184,  0.03897077,  -0.1556896, 0.19487578};
+
+  std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+  std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+  std::vector<float> recurrent_to_input_weights = {
+    -0.0063535,  -0.2042388,  0.31454784,  -0.35746509, 0.28902304, 0.08183324,
+    -0.16555229, 0.02286911,  -0.13566875, 0.03034258,  0.48091322, -0.12528998,
+    0.24077177,  -0.51332325, -0.33502164, 0.10629296};
+
+  std::vector<float> recurrent_to_forget_weights = {
+    -0.48684245, -0.06655136, 0.42224967,  0.2112639,   0.27654213, 0.20864892,
+    -0.07646349, 0.45877004,  0.00141793,  -0.14609534, 0.36447752, 0.09196436,
+    0.28053468,  0.01560611,  -0.20127171, -0.01140004};
+
+  std::vector<float> recurrent_to_cell_weights = {
+    -0.3407414,  0.24443203,  -0.2078532,  0.26320225,  0.05695659, -0.00123841,
+    -0.4744786,  -0.35869038, -0.06418842, -0.13502428, -0.501764,  0.22830659,
+    -0.46367589, 0.26016325,  -0.03894562, -0.16368064};
+
+  std::vector<float> recurrent_to_output_weights = {
+    0.43385774,  -0.17194885, 0.2718237,  0.09215671,  0.24107647, -0.39835793,
+    0.18212086,  0.01301402,  0.48572797, -0.50656658, 0.20047462, -0.20607421,
+    -0.51818722, -0.15390486, 0.0468148,  0.39922136};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{n_batch, sequence_length, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = false;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+    &scratchpad_1, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-0.02973187, 0.1229473,  0.20885126, -0.15358765,
+                                     -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+                                     -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+  std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_simple)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 1;
+  const int32_t n_cell = 1;
+  const int32_t n_output = 1;
+  const int32_t sequence_length = 1;
+
+  std::vector<float> input_to_input_weights = {0.329067};
+  std::vector<float> input_to_forget_weights = {0.308059};
+  std::vector<float> input_to_cell_weights = {0.152916};
+  std::vector<float> input_to_output_weights = {-0.476033};
+
+  std::vector<float> input_gate_bias = {0.};
+  std::vector<float> forget_gate_bias = {1.};
+  std::vector<float> cell_gate_bias = {0.};
+  std::vector<float> output_gate_bias = {0.};
+
+  std::vector<float> recurrent_to_input_weights = {0.207806};
+  std::vector<float> recurrent_to_forget_weights = {0.028718};
+  std::vector<float> recurrent_to_cell_weights = {-0.182756};
+  std::vector<float> recurrent_to_output_weights = {-0.960517};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{0.03653763};
+  Shape input_shape{n_batch, sequence_length, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 10.0;
+  params.proj_clip = 0.0;
+  params.time_major = false;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+    &scratchpad_1, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.00139296};
+  std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+  const float tolerance = 1e-5;
+  auto aa = extractTensorData<float>(output_tensor);
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Unsupported_Type_Configure_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<int8_t> input_data{2, 3, 3, 4, 1, 1}; // int8 is not support as of now
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_input}; // this is wrong
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_2_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  // NOTE provide wrong shaped inputs
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
index 5d8e5db..a04dbcc 100644 (file)
@@ -27,6 +27,27 @@ namespace luci_interpreter
 namespace kernels
 {
 
+TfLiteFusedActivation getTfLiteActivation(Activation activation)
+{
+  switch (activation)
+  {
+    case luci::FusedActFunc::RELU:
+      return kTfLiteActRelu;
+    case luci::FusedActFunc::RELU6:
+      return kTfLiteActRelu6;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      return kTfLiteActReluN1To1;
+    case luci::FusedActFunc::TANH:
+      return kTfLiteActTanh;
+    case luci::FusedActFunc::SIGN_BIT:
+      return kTfLiteActSignBit;
+    case luci::FusedActFunc::NONE:
+      return kTfLiteActNone;
+    default:
+      throw std::runtime_error("Unsupported activation type");
+  }
+}
+
 template <typename T>
 void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
 {
index ebeb20e..e975585 100644 (file)
@@ -21,6 +21,7 @@
 #include "core/KernelParams.h"
 #include "luci_interpreter/core/Tensor.h"
 
+#include <tensorflow/lite/kernels/internal/tensor_utils.h>
 #include <tensorflow/lite/kernels/internal/types.h>
 
 #include <cassert>
@@ -76,6 +77,8 @@ inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2
   return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
 }
 
+TfLiteFusedActivation getTfLiteActivation(Activation activation);
+
 template <typename T>
 void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
 
index 4020709..ba99a57 100644 (file)
@@ -187,7 +187,11 @@ void GraphLoader::loadTensors()
     const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
 
     if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
-      throw std::runtime_error("Unsupported Custom operator. " + node->name());
+    {
+      const auto *cnode = loco::must_cast<const luci::CircleCustom *>(node);
+      throw std::runtime_error("Unsupported Custom operator. " + cnode->custom_code() + " in " +
+                               node->name());
+    }
 
     if (!isTensorProducingNode(node))
       continue;
index 8483a9a..c1e2c63 100644 (file)
 #include "loader/KernelBuilder.h"
 #include "loader/nodes/Builders.h"
 
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/CircleNodeDecl.h>
+
 #include <stdexcept>
 
+namespace
+{
+
+// TODO Extract this helper function
+const std::string toString(luci::CircleOpcode opcode)
+{
+  static const char *names[] = {
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+#undef CIRCLE_VNODE
+  };
+
+  auto const node_name = names[static_cast<int>(opcode)];
+
+  assert(std::string(node_name).substr(0, 6) == "Circle"); // FIX_ME_UNLESS
+
+  // Return substring of class name ("Circle" is sliced out)
+  // Ex: Return "Conv2D" for "CircleConv2D" node
+  return std::string(node_name).substr(6);
+}
+
+} // namespace
+
 namespace luci_interpreter
 {
 
@@ -97,7 +125,7 @@ std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
     return specific_builder(node, *this);
 
   std::string msg = "Unsupported operator: ";
-  msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
+  msg += toString(node->opcode()) + " in " + std::string(node->name());
   throw std::invalid_argument(msg.c_str());
 }
 
diff --git a/compiler/luci-interpreter/src/loader/nodes/Abs.cpp b/compiler/luci-interpreter/src/loader/nodes/Abs.cpp
new file mode 100644 (file)
index 0000000..3947111
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Abs.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAbs(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleAbs *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Abs>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceProd.cpp
new file mode 100644 (file)
index 0000000..1610e20
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReduceProd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReduceProd(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReduceProd *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::ReduceProd>(input, axes, output, temp_index, resolved_axes,
+                                               params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp b/compiler/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp
new file mode 100644 (file)
index 0000000..f4cf0b8
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleUnidirectionalSequenceLSTM(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleUnidirectionalSequenceLSTM *>(circle_node);
+  assert(node->arity() == 24);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *input_to_input_weights =
+    helper.getOptionalInputTensor(node->input_to_input_weights());
+  const Tensor *input_to_cell_weights = helper.getInputTensor(node->input_to_cell_weights());
+  const Tensor *input_to_forget_weights = helper.getInputTensor(node->input_to_forget_weights());
+  const Tensor *input_to_output_weights = helper.getInputTensor(node->input_to_output_weights());
+  const Tensor *recurrent_to_input_weights =
+    helper.getOptionalInputTensor(node->recurrent_to_input_weights());
+  const Tensor *recurrent_to_cell_weights =
+    helper.getInputTensor(node->recurrent_to_cell_weights());
+  const Tensor *recurrent_to_forget_weights =
+    helper.getInputTensor(node->recurrent_to_forget_weights());
+  const Tensor *recurrent_to_output_weights =
+    helper.getInputTensor(node->recurrent_to_output_weights());
+  const Tensor *cell_to_input_weights =
+    helper.getOptionalInputTensor(node->cell_to_input_weights());
+  const Tensor *cell_to_forget_weights =
+    helper.getOptionalInputTensor(node->cell_to_forget_weights());
+  const Tensor *cell_to_output_weights =
+    helper.getOptionalInputTensor(node->cell_to_output_weights());
+  const Tensor *input_gate_bias = helper.getOptionalInputTensor(node->input_gate_bias());
+  const Tensor *forget_gate_bias = helper.getInputTensor(node->forget_gate_bias());
+  const Tensor *cell_gate_bias = helper.getInputTensor(node->cell_gate_bias());
+  const Tensor *output_gate_bias = helper.getInputTensor(node->output_gate_bias());
+  const Tensor *projection_weights = helper.getOptionalInputTensor(node->projection_weights());
+  const Tensor *projection_bias = helper.getOptionalInputTensor(node->projection_bias());
+  const Tensor *output_state = helper.getInputTensor(node->output_state());
+  const Tensor *cell_state = helper.getInputTensor(node->cell_state());
+  const Tensor *input_layer_norm_coefficients =
+    helper.getOptionalInputTensor(node->input_layer_norm_coefficients());
+  const Tensor *forget_layer_norm_coefficients =
+    helper.getOptionalInputTensor(node->forget_layer_norm_coefficients());
+  const Tensor *cell_layer_norm_coefficients =
+    helper.getOptionalInputTensor(node->cell_layer_norm_coefficients());
+  const Tensor *output_layer_norm_coefficients =
+    helper.getOptionalInputTensor(node->output_layer_norm_coefficients());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // scratch pad tensor
+  // NOTE provide more scratch pads if support hybrid or integer
+  auto sp_output_state =
+    std::make_unique<Tensor>(output_state->element_type(), Shape({}), AffineQuantization{}, "");
+  sp_output_state->set_observable(false);
+  sp_output_state->set_data_buffer(nullptr);
+  Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_output_state));
+
+  auto sp_cell_state =
+    std::make_unique<Tensor>(cell_state->element_type(), Shape({}), AffineQuantization{}, "");
+  sp_cell_state->set_observable(false);
+  sp_cell_state->set_data_buffer(nullptr);
+  Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_cell_state));
+
+  auto sp_3 = std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+  sp_3->set_observable(false);
+  sp_3->set_data_buffer(nullptr);
+  Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_3));
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.cell_clip = node->cell_clip();
+  params.proj_clip = node->proj_clip();
+  params.time_major = node->time_major();
+  params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+  return std::make_unique<kernels::UnidirectionalSequenceLSTM>(
+    input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights,
+    input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights,
+    recurrent_to_cell_weights, recurrent_to_output_weights, cell_to_input_weights,
+    cell_to_forget_weights, cell_to_output_weights, input_gate_bias, forget_gate_bias,
+    cell_gate_bias, output_gate_bias, projection_weights, projection_bias, output_state, cell_state,
+    input_layer_norm_coefficients, forget_layer_norm_coefficients, cell_layer_norm_coefficients,
+    output_layer_norm_coefficients, output, tmp_1, tmp_2, tmp_3, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt
deleted file mode 100644 (file)
index 642cf14..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-set(ARM_C_COMPILER "arm-none-eabi-gcc")
-set(ARM_ASM_COMPILER "arm-none-eabi-gcc")
-set(ARM_CXX_COMPILER "arm-none-eabi-g++")
-set(ARM_OBJCOPY "arm-none-eabi-objcopy")
-
-find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
-
-if(NOT ARM_C_COMPILER_PATH)
-  message(STATUS "Build luci-micro: FALSE(ARM compiler is NOT FOUND)")
-  return()
-endif()
-
-set(CMAKE_ARM_OPTIONS
-  -DLUCI_INTERPRETER_STATIC=ON
-  -DLUCI_STATIC=ON
-  -DBUILD_CMSIS_NN_FUNCTIONS=ON
-  -DTARGET_CPU=cortex-m7
-  "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake"
-  "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu"
-  "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
-  "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
-  -DC_COMPILER=${ARM_C_COMPILER}
-  -DCXX_COMPILER=${ARM_CXX_COMPILER}
-  -DASM_COMPILER=${ARM_ASM_COMPILER}
-  -DOBJCOPY=${ARM_OBJCOPY}
-  -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-  -DENABLE_TEST=OFF
-  -DBUILD_GTEST=OFF
-  "-DNNAS_ROOT=${NNAS_PROJECT_SOURCE_DIR}"
-  -DENABLE_STRICT_BUILD=OFF
-)
-
-set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm")
-file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}")
-
-set(MICRO_ARM_BUILD_DEPENDENCY "${MICRO_ARM_BUILD_DIR}/CMakeCache.txt")
-
-add_custom_command(
-  OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
-  COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
-  WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
-  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
-  VERBATIM
-)
-
-add_custom_target(luci_interpreter_micro_arm_cmake DEPENDS "${MICRO_ARM_BUILD_DEPENDENCY}")
-
-set(MICRO_ARM_BINARY "${MICRO_ARM_BUILD_DIR}/compiler/luci-interpreter/src/libluci_interpreter.a")
-
-add_custom_command(
-  OUTPUT "${MICRO_ARM_BINARY}"
-  COMMAND "${CMAKE_MAKE_PROGRAM}" luci_interpreter -j ${CPU_COUNT}
-  WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
-  DEPENDS luci_interpreter_micro_arm_cmake
-  VERBATIM
-)
-
-add_custom_target(luci_interpreter_micro_arm DEPENDS "${MICRO_ARM_BINARY}")
diff --git a/compiler/luci-micro/luci-interpreter/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/CMakeLists.txt
deleted file mode 100644 (file)
index 1f7acee..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
-set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
-if (NOT LUCI_INTERPRETER_PAL_DIR)
-    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux")
-endif()
-
-set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst)
-
-if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
-    set(LUCI_INTERPRETER_SUFFIX "")
-else()
-    set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
-endif()
-
-add_subdirectory(src)
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
deleted file mode 100644 (file)
index 205baa6..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/MemoryManager.h"
-
-#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
-
-namespace luci_interpreter
-{
-
-class BuddyMemoryManager : public IMemoryManager
-{
-public:
-  BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
-
-  void allocate_memory(luci_interpreter::Tensor &tensor) final;
-  void release_memory(luci_interpreter::Tensor &tensor) final;
-
-private:
-  struct Block
-  {
-    Block *next_free;
-    bool is_free;
-    uint32_t size;
-    // debug field
-    Block *self;
-  };
-
-  Block *_start_block;
-  int32_t _num_blocks;
-  uint32_t _size;
-  Block *_free_blocks[32]{};
-
-  static int32_t lowerLog2(uint32_t val)
-  {
-    int32_t i = 0;
-    while (val >>= 1)
-      i++;
-
-    return i;
-  }
-
-  void addToBlocks(Block *block, int32_t l)
-  {
-    if (!block)
-      return;
-
-    block->next_free = _free_blocks[l];
-    _free_blocks[l] = block;
-  }
-
-  void removeFromBlocks(const Block *block, int32_t l)
-  {
-    if (!block)
-      return;
-
-    Block *tmp = _free_blocks[l];
-
-    if (block == tmp)
-    {
-      _free_blocks[l] = block->next_free;
-      return;
-    }
-
-    while (tmp)
-    {
-      if (tmp->next_free == block)
-      {
-        tmp->next_free = block->next_free;
-        return;
-      }
-
-      tmp = tmp->next_free;
-    }
-  }
-
-  void divideBlock(Block *block, int32_t l)
-  {
-    int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
-
-    removeFromBlocks(block, l);
-
-    // there is no need to add to the free_blocks list here
-    block->is_free = true;
-    block->size = size;
-    block->self = block;
-
-    Block *buddy;
-    buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
-    buddy->is_free = true;
-    buddy->size = size;
-    buddy->self = buddy;
-
-    addToBlocks(buddy, l - 1);
-  }
-
-  Block *mergeBlock(Block *block)
-  {
-    Block *buddy;
-
-    const int32_t l = lowerLog2(block->size + sizeof(Block));
-
-    const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
-    buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
-
-    if (!buddy->is_free || buddy->size != block->size)
-      return nullptr;
-
-    if (block > buddy)
-    {
-      Block *x = block;
-      block = buddy;
-      buddy = x;
-    }
-
-    removeFromBlocks(block, l);
-    removeFromBlocks(buddy, l);
-
-    block->size = block->size * 2 + sizeof(Block);
-    block->is_free = true;
-    block->self = block;
-
-    addToBlocks(block, l + 1);
-
-    return block;
-  }
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
deleted file mode 100644 (file)
index 375b1ae..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
-#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
-
-#include <luci/Import/GraphBuilderRegistry.h>
-
-namespace luci_interpreter
-{
-
-/**
- * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
- * model's file.
- *
- * @warning Use this source only in case when model's buffer alive longer than Interpreter.
- */
-std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
-
-} // namespace luci_interpreter
-
-#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
deleted file mode 100644 (file)
index 8e2f457..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_INTERPRETER_H
-#define LUCI_INTERPRETER_INTERPRETER_H
-
-#include "luci_interpreter/core/Tensor.h"
-
-#include <luci/IR/Nodes/CircleInput.h>
-#include <luci/IR/Nodes/CircleOutput.h>
-
-#include "luci_interpreter/MemoryManager.h"
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <vector>
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class ExecutionObserver
-{
-public:
-  virtual ~ExecutionObserver();
-
-  // Called when the value of a tensor has been updated during execution.
-  virtual void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor);
-
-  // Called before / after executing an operator.
-  // Note that these methods are not called for auxiliary operators (CircleInput, CircleOutput,
-  // CircleConst and Circle*Out).
-  virtual void preOperatorExecute(const luci::CircleNode *node);
-  virtual void postOperatorExecute(const luci::CircleNode *node);
-};
-
-class Interpreter
-{
-public:
-  explicit Interpreter(const luci::Module *module);
-
-  explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
-
-  ~Interpreter();
-
-  void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size);
-
-  void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size);
-
-  void interpret();
-
-  void attachObserver(ExecutionObserver *observer);
-
-  const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
-
-private:
-  // _default_memory_manager should be before _runtime_module due to
-  // the order of deletion in the destructor
-  std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
-  std::unique_ptr<class RuntimeModule> _runtime_module;
-
-  // Observer functionality support.
-  std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
-  std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
-  std::unique_ptr<class EventNotifier> _event_notifier;
-  std::vector<ExecutionObserver *> _observers;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_INTERPRETER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h
deleted file mode 100644 (file)
index f32c520..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_MEMORY_MANAGER_H
-
-#include "luci_interpreter/core/DataType.h"
-#include "luci_interpreter/core/Tensor.h"
-
-namespace luci_interpreter
-{
-
-class IMemoryManager
-{
-public:
-  virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0;
-  virtual void release_memory(luci_interpreter::Tensor &tensor) = 0;
-
-  virtual ~IMemoryManager() = default;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
deleted file mode 100644 (file)
index 658a1c6..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
-
-#include "luci_interpreter/MemoryManager.h"
-
-namespace luci_interpreter
-{
-
-class SimpleMemoryManager : public IMemoryManager
-{
-public:
-  void allocate_memory(luci_interpreter::Tensor &tensor) final;
-  void release_memory(luci_interpreter::Tensor &tensor) final;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
deleted file mode 100644 (file)
index ded7bde..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
-
-#include "luci_interpreter/MemoryManager.h"
-
-namespace luci_interpreter
-{
-
-// Used for allocations in static buffer, using offsets defined in luci model.
-class StaticMemoryManager : public IMemoryManager
-{
-public:
-  StaticMemoryManager() = delete;
-
-  explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr)
-  { /* Do nothing */
-  }
-
-  void allocate_memory(luci_interpreter::Tensor &tensor) final;
-  void release_memory(luci_interpreter::Tensor &tensor) final;
-
-private:
-  // Stores a pointer to the beginning of the allocated memory buffer.
-  uint8_t *_buffer_ptr;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
deleted file mode 100644 (file)
index 397bbed..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
-
-#include "luci_interpreter/MemoryManager.h"
-
-namespace luci_interpreter
-{
-// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
-// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
-// delete all allocations.
-class TestMemoryManager : public IMemoryManager
-{
-public:
-  void allocate_memory(luci_interpreter::Tensor &tensor) final;
-  void release_memory(luci_interpreter::Tensor &tensor) final;
-
-  ~TestMemoryManager() override
-  {
-    for (auto allocation : allocations)
-    {
-      delete[] allocation;
-    }
-  }
-
-private:
-  std::vector<uint8_t *> allocations;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
deleted file mode 100644 (file)
index 27bf719..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
-#define LUCI_INTERPRETER_CORE_DATATYPE_H
-
-#include <loco/IR/DataType.h>
-#include <loco/IR/DataTypeTraits.h>
-
-#include <cstddef>
-
-namespace luci_interpreter
-{
-
-using DataType = loco::DataType;
-
-template <DataType DT> using DataTypeImpl = loco::DataTypeImpl<DT>;
-
-inline size_t getDataTypeSize(DataType data_type) { return loco::size(data_type); }
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
deleted file mode 100644 (file)
index bb9ff6d..0000000
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
-#define LUCI_INTERPRETER_CORE_TENSOR_H
-
-#include "luci_interpreter/core/DataType.h"
-
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace luci_interpreter
-{
-
-class Shape
-{
-public:
-  explicit Shape(int rank) : _dims(rank, 0) {}
-
-  Shape(std::initializer_list<int32_t> dims) : _dims(dims.begin(), dims.end()) {}
-
-  int num_dims() const { return _dims.size(); }
-
-  int32_t dim(int i) const
-  {
-    assert(i >= 0 && i < static_cast<int>(_dims.size()));
-    return _dims[i];
-  }
-
-  int32_t &dim(int i)
-  {
-    assert(i >= 0 && i < static_cast<int>(_dims.size()));
-    return _dims[i];
-  }
-
-  int32_t num_elements() const
-  {
-    int32_t result = 1;
-    for (const int32_t dim : _dims)
-    {
-      result *= dim;
-    }
-    return result;
-  }
-
-  bool operator==(const Shape &other) const { return _dims == other._dims; }
-
-  bool operator!=(const Shape &other) const { return !operator==(other); }
-
-private:
-  std::vector<int32_t> _dims;
-};
-
-// Tensor affine quantization parameters.
-//
-// The relationship between real and quantized values:
-//   real_value = (quantized_value - zero_point) * scale
-//
-// In per-tensor case, 'scale' and 'zero_point' are one element each.
-// In per-channel case, 'scale' and 'zero_point' are N elements each, where N is the size
-// of the quantized dimension.
-//
-// Note that due to historical and performance reasons, per-tensor quantization uses unsigned
-// integer types, while per-channel uses signed types assuming 'zero_point' == 0.
-struct AffineQuantization
-{
-  std::vector<float> scale;
-  std::vector<int32_t> zero_point;
-  int32_t quantized_dimension;
-};
-
-class Tensor
-{
-public:
-  Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name);
-
-  DataType element_type() const { return _element_type; }
-
-  const Shape &shape() const { return _shape; }
-
-  float scale() const
-  {
-    assert(_quantization.scale.size() == 1);
-    return _quantization.scale[0];
-  }
-
-  int32_t zero_point() const
-  {
-    assert(_quantization.zero_point.size() == 1);
-    return _quantization.zero_point[0];
-  }
-
-  const std::vector<float> &scales() const { return _quantization.scale; }
-
-  const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
-
-  int32_t quantized_dimension() const { return _quantization.quantized_dimension; }
-
-  template <typename T> const T *data() const
-  {
-    static_assert(std::is_same<uint8_t, char>::value or
-                  std::is_same<uint8_t, unsigned char>::value);
-    return reinterpret_cast<const T *>(_data);
-  }
-
-  template <typename T> T *data()
-  {
-    static_assert(std::is_same<uint8_t, char>::value or
-                  std::is_same<uint8_t, unsigned char>::value);
-    return reinterpret_cast<T *>(_data);
-  }
-
-  const std::string &name() const { return _name; }
-
-  void readData(void *data_ptr, size_t data_size) const;
-
-  void writeData(const void *data_ptr, size_t data_size);
-
-  void resize(const Shape &new_shape);
-
-  void set_data_buffer(uint8_t *buffer)
-  {
-    if (buffer == nullptr)
-    {
-      _data_allocated = false;
-    }
-    else
-    {
-      _data_allocated = true;
-    }
-    _data = buffer;
-  }
-
-  bool is_observable() const { return _is_observable; }
-
-  void set_observable(bool value) { _is_observable = value; }
-
-  bool is_allocatable() const { return _is_allocatable; }
-
-  void set_allocatable(bool value) { _is_allocatable = value; }
-
-  bool is_data_allocated() const { return _data_allocated; }
-
-  int32_t get_offset() const { return _offset; }
-
-  void set_offset(int32_t offset) { _offset = offset; }
-
-private:
-  DataType _element_type;
-  Shape _shape;
-  AffineQuantization _quantization;
-  uint8_t *_data;
-  std::string _name;
-  bool _data_allocated;
-  // Write of tensor is reported to registered Observers only if this tensor is observable
-  // This is needed for tensors used in kernel implementation, but not present in original model.
-  bool _is_observable = true;
-  // Memory manager is called for tensor only if it is "allocatable".
-  // Kernel configuration could disable allocation of some tensors if they are not needed for
-  // particular operation.
-  bool _is_allocatable = true;
-  // Used by static memory manager.
-  // Stores the offset from the beginning of the allocated memory buffer.
-  int32_t _offset = -1;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_TENSOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
deleted file mode 100644 (file)
index f0df58d..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-REGISTER_KERNEL(Add)
-REGISTER_KERNEL(ArgMax)
-REGISTER_KERNEL(AveragePool2D)
-REGISTER_KERNEL(BatchToSpaceND)
-REGISTER_KERNEL(Cast)
-REGISTER_KERNEL(Concatenation)
-REGISTER_KERNEL(Conv2D)
-REGISTER_KERNEL(DepthToSpace)
-REGISTER_KERNEL(DepthwiseConv2D)
-REGISTER_KERNEL(Dequantize)
-REGISTER_KERNEL(Div)
-REGISTER_KERNEL(Elu)
-REGISTER_KERNEL(Exp)
-REGISTER_KERNEL(ExpandDims)
-REGISTER_KERNEL(Fill)
-REGISTER_KERNEL(Floor)
-REGISTER_KERNEL(FloorDiv)
-REGISTER_KERNEL(Equal)
-REGISTER_KERNEL(FullyConnected)
-REGISTER_KERNEL(Greater)
-REGISTER_KERNEL(GreaterEqual)
-REGISTER_KERNEL(If)
-REGISTER_KERNEL(InstanceNorm)
-REGISTER_KERNEL(L2Normalize)
-REGISTER_KERNEL(L2Pool2D)
-REGISTER_KERNEL(LeakyRelu)
-REGISTER_KERNEL(Less)
-REGISTER_KERNEL(LessEqual)
-REGISTER_KERNEL(LogicalAnd)
-REGISTER_KERNEL(LogicalNot)
-REGISTER_KERNEL(LogicalOr)
-REGISTER_KERNEL(Logistic)
-REGISTER_KERNEL(Maximum)
-REGISTER_KERNEL(MaxPool2D)
-REGISTER_KERNEL(Minimum)
-REGISTER_KERNEL(MirrorPad)
-REGISTER_KERNEL(Mul)
-REGISTER_KERNEL(Neg)
-REGISTER_KERNEL(NotEqual)
-REGISTER_KERNEL(Pad)
-REGISTER_KERNEL(PadV2)
-REGISTER_KERNEL(PRelu)
-REGISTER_KERNEL(Quantize)
-REGISTER_KERNEL(Reshape)
-REGISTER_KERNEL(ResizeBilinear)
-REGISTER_KERNEL(ResizeNearestNeighbor)
-REGISTER_KERNEL(Rsqrt)
-REGISTER_KERNEL(Shape)
-REGISTER_KERNEL(Softmax)
-REGISTER_KERNEL(SpaceToBatchND)
-REGISTER_KERNEL(SpaceToDepth)
-REGISTER_KERNEL(StridedSlice)
-REGISTER_KERNEL(Sqrt)
-REGISTER_KERNEL(Square)
-REGISTER_KERNEL(SquaredDifference)
-REGISTER_KERNEL(Squeeze)
-REGISTER_KERNEL(Sub)
-REGISTER_KERNEL(SVDF)
-REGISTER_KERNEL(Tanh)
-REGISTER_KERNEL(Transpose)
-REGISTER_KERNEL(TransposeConv)
-REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
deleted file mode 100644 (file)
index 4dd77ff..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
-#define LUCI_INTERPRETER_PAL_ARGMAX_H
-
-#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
-               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
-               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
-               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
-  tflite::reference_ops::BatchToSpaceND(
-    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
-    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
deleted file mode 100644 (file)
index cfb84ea..0000000
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
-#define LUCI_INTERPRETER_PAL_CONV2D_H
-
-#include <tensorflow/lite/kernels/internal/reference/conv.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
-#include <arm_nn_types.h>
-#include <arm_nnfunctions.h>
-
-namespace luci_interpreter_pal
-{
-static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
-                        const float *input_data, const tflite::RuntimeShape &filter_shape,
-                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
-                        const float *bias_data, const tflite::RuntimeShape &output_shape,
-                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
-                        float *scratchpad_data)
-{
-  (void)scratchpad_shape;
-  (void)scratchpad_data;
-  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
-                              bias_shape, bias_data, output_shape, output_data,
-                              tflite::RuntimeShape(), nullptr);
-}
-
-static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
-                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
-                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
-                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
-                        uint8 *scratchpad_data)
-{
-  (void)scratchpad_shape;
-  (void)scratchpad_data;
-  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
-                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
-                              scratchpad_data, nullptr);
-}
-
-static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
-                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
-                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
-                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
-                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
-                                  int8 *scratchpad_data)
-{
-  if (scratchpad_data)
-  {
-    cmsis_nn_conv_params conv_params;
-    conv_params.dilation.h = params.dilation_height_factor;
-    conv_params.dilation.w = params.dilation_width_factor;
-
-    assert(conv_params.dilation.h == 1);
-    assert(conv_params.dilation.w == 1);
-
-    conv_params.input_offset = params.input_offset;
-    conv_params.output_offset = params.output_offset;
-    conv_params.stride.h = params.stride_height;
-    conv_params.stride.w = params.stride_width;
-    conv_params.padding.h = params.padding_values.height;
-    conv_params.padding.w = params.padding_values.width;
-    conv_params.activation.min = params.quantized_activation_min;
-    conv_params.activation.max = params.quantized_activation_max;
-
-    cmsis_nn_per_channel_quant_params quant_params;
-    quant_params.multiplier = const_cast<int32_t *>(mult);
-    quant_params.shift = const_cast<int32_t *>(shifts);
-
-    assert(conv_params.activation.min <= conv_params.activation.max);
-    assert(input_shape.DimensionsCount() == 4);
-    assert(filter_shape.DimensionsCount() == 4);
-    assert(output_shape.DimensionsCount() == 4);
-    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
-    const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
-    const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
-    if (bias_data)
-    {
-      assert(bias_shape.FlatSize() == output_depth);
-    }
-
-    cmsis_nn_dims input_dims;
-    input_dims.n = batch_size;
-    input_dims.h = input_shape.Dims(1);
-    input_dims.w = input_shape.Dims(2);
-    input_dims.c = input_depth;
-
-    cmsis_nn_dims filter_dims;
-    filter_dims.n = output_depth;
-    filter_dims.h = filter_shape.Dims(1);
-    filter_dims.w = filter_shape.Dims(2);
-    filter_dims.c = input_depth;
-
-    cmsis_nn_dims bias_dims;
-    bias_dims.n = 1;
-    bias_dims.h = 1;
-    bias_dims.w = 1;
-    bias_dims.c = output_depth;
-
-    cmsis_nn_dims output_dims;
-    output_dims.n = batch_size;
-    output_dims.h = output_shape.Dims(1);
-    output_dims.w = output_shape.Dims(2);
-    output_dims.c = output_depth;
-
-    cmsis_nn_context ctx;
-    ctx.buf = scratchpad_data;
-    ctx.size = scratchpad_shape.Dims(0);
-
-    auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
-                                       &filter_dims, filter_data, &bias_dims, bias_data,
-                                       &output_dims, output_data);
-    assert(res == ARM_MATH_SUCCESS);
-  }
-  else
-  {
-    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
-                                                  filter_shape, filter_data, bias_shape, bias_data,
-                                                  output_shape, output_data);
-  }
-}
-
-static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
-                                         const luci_interpreter::DataType &input_data_type,
-                                         const tflite::ConvParams &params,
-                                         const tflite::RuntimeShape &input_shape,
-                                         const tflite::RuntimeShape &filter_shape,
-                                         const tflite::RuntimeShape &output_shape)
-{
-  cmsis_nn_conv_params conv_params;
-  conv_params.dilation.h = params.dilation_height_factor;
-  conv_params.dilation.w = params.dilation_width_factor;
-
-  if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
-      conv_params.dilation.w == 1)
-  {
-    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
-    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
-    const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
-    const int32_t filter_height = filter_shape.Dims(1);
-    const int32_t filter_width = filter_shape.Dims(2);
-    const int32_t output_height = output_shape.Dims(1);
-    const int32_t output_width = output_shape.Dims(2);
-
-    conv_params.input_offset = params.input_offset;
-    conv_params.output_offset = params.output_offset;
-    conv_params.stride.h = params.stride_height;
-    conv_params.stride.w = params.stride_width;
-    conv_params.padding.h = params.padding_values.height;
-    conv_params.padding.w = params.padding_values.width;
-
-    cmsis_nn_dims input_dims;
-    input_dims.n = batches;
-    input_dims.h = input_shape.Dims(1);
-    input_dims.w = input_shape.Dims(2);
-    input_dims.c = input_depth;
-
-    cmsis_nn_dims filter_dims;
-    filter_dims.n = output_depth;
-    filter_dims.h = filter_height;
-    filter_dims.w = filter_width;
-    filter_dims.c = input_depth;
-
-    cmsis_nn_dims output_dims;
-    output_dims.n = batches;
-    output_dims.h = output_height;
-    output_dims.w = output_width;
-    output_dims.c = output_depth;
-
-    const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
-                                                                     &filter_dims, &output_dims);
-
-    luci_interpreter::Shape scratchpad_shape{buf_size};
-    scratchpad->resize(scratchpad_shape);
-  }
-  else
-  {
-    scratchpad->set_allocatable(false);
-  }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
deleted file mode 100644 (file)
index 120dcd8..0000000
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
-#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
-
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
-#include <arm_nnfunctions.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
-                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
-                        const T *input_data, const tflite::RuntimeShape &filter_shape,
-                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
-                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
-                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
-                        T *scratchpad_data)
-{
-  {
-    // MARK: At this moment this operation is not supported
-    assert(false && "DepthwiseConvPerChannel NYI");
-    (void)params;
-    (void)output_multiplier;
-    (void)output_shift;
-    (void)input_shape;
-    (void)output_data;
-    (void)input_data;
-    (void)filter_shape;
-    (void)filter_data;
-    (void)bias_shape;
-    (void)bias_data;
-    (void)output_shape;
-    (void)output_data;
-    (void)scratchpad_shape;
-    (void)scratchpad_data;
-  }
-}
-
-template <>
-inline void DepthwiseConvPerChannel<int8_t>(
-  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
-  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
-  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
-  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
-  const tflite::RuntimeShape &output_shape, int8_t *output_data,
-  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
-{
-  if (scratchpad_data)
-  {
-    cmsis_nn_dw_conv_params dw_conv_params;
-    dw_conv_params.dilation.h = params.dilation_height_factor;
-    dw_conv_params.dilation.w = params.dilation_width_factor;
-    assert(dw_conv_params.dilation.h == 1);
-    assert(dw_conv_params.dilation.w == 1);
-
-    dw_conv_params.input_offset = params.input_offset;
-    dw_conv_params.output_offset = params.output_offset;
-    dw_conv_params.stride.h = params.stride_height;
-    dw_conv_params.stride.w = params.stride_width;
-    dw_conv_params.padding.h = params.padding_values.height;
-    dw_conv_params.padding.w = params.padding_values.width;
-
-    dw_conv_params.activation.min = params.quantized_activation_min;
-    dw_conv_params.activation.max = params.quantized_activation_max;
-    dw_conv_params.ch_mult = params.depth_multiplier;
-
-    cmsis_nn_per_channel_quant_params quant_params;
-    int32_t output_multiplier = params.output_multiplier;
-    int32_t output_shift = params.output_shift;
-
-    quant_params.multiplier = &output_multiplier;
-    quant_params.shift = &output_shift;
-
-    assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
-    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
-    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
-    if (bias_data)
-    {
-      assert(bias_shape.FlatSize() == output_depth);
-    }
-
-    cmsis_nn_dims input_dims;
-    input_dims.n = batch_size;
-    input_dims.h = input_shape.Dims(1);
-    input_dims.w = input_shape.Dims(2);
-    input_dims.c = input_shape.Dims(3);
-
-    cmsis_nn_dims filter_dims;
-    filter_dims.n = filter_shape.Dims(0);
-    filter_dims.h = filter_shape.Dims(1);
-    filter_dims.w = filter_shape.Dims(2);
-    filter_dims.c = output_depth;
-
-    cmsis_nn_dims bias_dims;
-    bias_dims.n = 1;
-    bias_dims.h = 1;
-    bias_dims.w = 1;
-    bias_dims.c = output_depth;
-
-    cmsis_nn_dims output_dims;
-    output_dims.n = batch_size;
-    output_dims.h = output_shape.Dims(1);
-    output_dims.w = output_shape.Dims(2);
-    output_dims.c = output_depth;
-
-    cmsis_nn_context ctx;
-    ctx.buf = scratchpad_data;
-    ctx.size = scratchpad_shape.Dims(0);
-
-    auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
-                                             input_data, &filter_dims, filter_data, &bias_dims,
-                                             bias_data, &output_dims, output_data);
-    assert(res == ARM_MATH_SUCCESS);
-  }
-  else
-  {
-    tflite::reference_integer_ops::DepthwiseConvPerChannel(
-      params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
-      bias_shape, bias_data, output_shape, output_data);
-  }
-}
-
-static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
-                                         const tflite::DepthwiseParams &params,
-                                         const luci_interpreter::DataType &input_data_type,
-                                         const tflite::RuntimeShape &input_shape,
-                                         const tflite::RuntimeShape &filter_shape,
-                                         const tflite::RuntimeShape &output_shape)
-{
-  cmsis_nn_dw_conv_params dw_conv_params;
-  dw_conv_params.dilation.h = params.dilation_height_factor;
-  dw_conv_params.dilation.w = params.dilation_width_factor;
-
-  if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
-      dw_conv_params.dilation.w == 1)
-  {
-    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
-    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
-
-    cmsis_nn_dims input_dims;
-    input_dims.n = batch_size;
-    input_dims.h = input_shape.Dims(1);
-    input_dims.w = input_shape.Dims(2);
-    input_dims.c = input_shape.Dims(3);
-
-    cmsis_nn_dims filter_dims;
-    filter_dims.n = filter_shape.Dims(0);
-    filter_dims.h = filter_shape.Dims(1);
-    filter_dims.w = filter_shape.Dims(2);
-    filter_dims.c = output_depth;
-
-    cmsis_nn_dims output_dims;
-    output_dims.n = batch_size;
-    output_dims.h = output_shape.Dims(1);
-    output_dims.w = output_shape.Dims(2);
-    output_dims.c = output_depth;
-
-    const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
-      &dw_conv_params, &input_dims, &filter_dims, &output_dims);
-
-    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
-
-    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
-    scratchpad->resize(scratchpad_shape);
-  }
-  else
-  {
-    scratchpad->set_allocatable(false);
-  }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h
deleted file mode 100644 (file)
index 15ff032..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
-#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
-
-#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter_pal
-{
-
-template <typename T>
-static inline void Dequantize(tflite::DequantizationParams &params,
-                              const tflite::RuntimeShape &input_shape, const T *input_data,
-                              const tflite::RuntimeShape &output_shape, float *output_data)
-{
-  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
-                                               output_data);
-}
-
-static inline void Dequantize(tflite::DequantizationParams &params,
-                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
-                              const tflite::RuntimeShape &output_shape, float *output_data)
-{
-  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h
deleted file mode 100644 (file)
index 6046789..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
-#define LUCI_INTERPRETER_PAL_QUANTIZE_H
-
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void Quantize(tflite::QuantizationParams &params,
-                            const tflite::RuntimeShape &input_shape, const float *input_data,
-                            const tflite::RuntimeShape &output_shape, T *output_data)
-{
-  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
-}
-
-template <typename Input, typename Output>
-static inline void Requantize(const Input *input_data, int32_t size,
-                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
-                              int32_t input_zero_point, int32_t output_zero_point,
-                              Output *output_data)
-{
-  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
-                                    effective_scale_shift, input_zero_point, output_zero_point,
-                                    output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
deleted file mode 100644 (file)
index cc9f0fd..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
-#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
-
-#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
-               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
-               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
-               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
-  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
-                                        output_size_shape, output_size_data,
-                                        unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
deleted file mode 100644 (file)
index f4d5a6e..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
-#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
-
-#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
-                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
-                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
-                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
-  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
-                                               output_size_shape, output_size_data,
-                                               unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h
deleted file mode 100644 (file)
index a4a5b2a..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SVDF_H
-#define LUCI_INTERPRETER_PAL_SVDF_H
-
-#include <arm_nn_types.h>
-#include <arm_nnfunctions.h>
-
-namespace luci_interpreter_pal
-{
-static inline void
-IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
-            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
-            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
-            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
-            const int32_t *bias_data, int16_t *activation_state_data,
-            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
-            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
-            int scale_2_b, int32_t input_zp, int32_t output_zp)
-{
-  const int32_t rank = params.rank;
-  const int32_t batch_size = input_shape.Dims(0);
-  const int32_t num_filters = weight_feature_shape.Dims(0);
-  const int32_t memory_size = weight_time_shape.Dims(1);
-
-  cmsis_nn_dims input_dims;
-  input_dims.n = input_shape.Dims(0);
-  input_dims.h = input_shape.Dims(1);
-
-  cmsis_nn_dims weights_feature_dims;
-  weights_feature_dims.n = weight_feature_shape.Dims(0);
-  weights_feature_dims.h = weight_feature_shape.Dims(1);
-
-  cmsis_nn_dims weights_time_dims;
-  weights_time_dims.n = weight_time_shape.Dims(0);
-  weights_time_dims.h = weight_time_shape.Dims(1);
-
-  cmsis_nn_dims bias_dims;
-  bias_dims.n = bias_shape.Dims(0);
-
-  cmsis_nn_dims state_dims;
-  state_dims.n = batch_size;
-  state_dims.h = memory_size * num_filters;
-
-  cmsis_nn_dims output_dims;
-  output_dims.n = output_shape.Dims(0);
-  output_dims.h = output_shape.Dims(1);
-
-  cmsis_nn_svdf_params svdf_params;
-  svdf_params.rank = params.rank;
-  svdf_params.input_offset = input_zp;
-  svdf_params.output_offset = output_zp;
-
-  svdf_params.input_activation.min = INT16_MIN;
-  svdf_params.input_activation.max = INT16_MAX;
-
-  svdf_params.output_activation.min = INT8_MIN;
-  svdf_params.output_activation.max = INT8_MAX;
-
-  cmsis_nn_per_tensor_quant_params in_quant_params;
-  in_quant_params.multiplier = scale_1_a;
-  in_quant_params.shift = scale_1_b;
-
-  cmsis_nn_per_tensor_quant_params out_quant_params;
-  out_quant_params.multiplier = scale_2_a;
-  out_quant_params.shift = scale_2_b;
-
-  cmsis_nn_context scratch_ctx;
-  scratch_ctx.buf = scratchpad_data;
-
-  cmsis_nn_context scratch_output_ctx;
-  scratch_output_ctx.buf = output_temp_data;
-
-  arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
-              &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
-              weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
-              &output_dims, output_data);
-}
-static inline void
-FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
-          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
-          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
-          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
-          const float *bias_data, float *scratchpad_data, float *activation_state_data,
-          const tflite::RuntimeShape &output_shape, float *output_data)
-{
-  const int32_t rank = params.rank;
-  const int32_t batch_size = input_shape.Dims(0);
-  const int32_t input_size = input_shape.Dims(1);
-  const int32_t num_filters = weight_feature_shape.Dims(0);
-  const int32_t num_units = num_filters / rank;
-  const int32_t memory_size = weight_time_shape.Dims(1);
-
-  // Left shift the activation_state.
-  {
-    float *new_state_start = activation_state_data;
-    const float *old_state_start = activation_state_data + 1;
-    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
-    while (old_state_start != old_state_end)
-    {
-      *new_state_start++ = *old_state_start++;
-    }
-  }
-
-  // Note: no need to clear the latest activation, matmul is not accumulative.
-
-  // Compute conv1d(inputs, weights_feature).
-  // The activation_state's rightmost column is used to save current cycle
-  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
-  // having the stride equal to memory_size.
-
-  // Perform batched matrix vector multiply operation:
-  {
-    const float *matrix = weight_feature_data;
-    const float *vector = input_data;
-    float *result = &activation_state_data[memory_size - 1];
-    float *result_in_batch = result;
-    for (int i = 0; i < batch_size; ++i)
-    {
-      const float *matrix_ptr = matrix;
-      for (int j = 0; j < num_filters; ++j)
-      {
-        float dot_prod = 0.0f;
-        const float *vector_in_batch = vector + i * input_size;
-        for (int k = 0; k < input_size; ++k)
-        {
-          dot_prod += *matrix_ptr++ * *vector_in_batch++;
-        }
-        *result_in_batch = dot_prod;
-        result_in_batch += memory_size;
-      }
-    }
-  }
-
-  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
-    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
-    params.activation, activation_state_data, scratchpad_data, output_data);
-}
-
-static inline void SetupScratchpadTensor(
-  const luci_interpreter::DataType &input_data_type,
-  const luci_interpreter::DataType &weight_feature_data_type,
-  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
-  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
-  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
-  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
-  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
-{
-  if (input_data_type == loco::DataType::FLOAT32 &&
-      (weight_feature_data_type == loco::DataType::S8 ||
-       weight_feature_data_type == loco::DataType::U8))
-  {
-    (void)input_shape;
-    (void)weight_time_shape;
-    (void)scratchpad_3;
-    (void)scratchpad_4;
-    (void)scratchpad_5;
-    (void)scratchpad_6;
-
-    throw std::runtime_error("Hybrid type is not supported for cmsisnn");
-  }
-
-  // Resize scratchpad_1 tensor
-  scratchpad_1->resize({batch_size, num_filters});
-
-  if (input_data_type == loco::DataType::S8)
-  {
-    // Resize scratchpad_2 for full_integer op
-    scratchpad_2->resize({batch_size, num_units});
-  }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
deleted file mode 100644 (file)
index fdddaa9..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
-#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
-
-#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-SpaceToBatchND(const tflite::SpaceToBatchParams &params,
-               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
-               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
-               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
-               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
-  tflite::reference_ops::SpaceToBatchND(
-    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
-    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake
deleted file mode 100644 (file)
index a68b363..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-macro(initialize_pal)
-    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
-    nnas_find_package(CMSISSource EXACT 5.8.0 QUIET)
-
-    if (NOT TensorFlowSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowGEMMLowpSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowEigenSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: Eigen not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowRuySource_FOUND)
-        message(STATUS "Skipping luci-interpreter: Ruy not found")
-        return()
-    endif ()
-
-    if (NOT CMSISSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: CMSISSource not found")
-        return()
-    endif ()
-
-    set(PAL_INITIALIZED TRUE)
-endmacro()
-
-macro(add_pal_to_target TGT)
-    target_include_directories(${TGT} PRIVATE "${PAL}")
-    target_include_directories(${TGT} PRIVATE
-            "${TensorFlowRuySource_DIR}"
-            "${TensorFlowGEMMLowpSource_DIR}"
-            "${TensorFlowEigenSource_DIR}"
-            "${TensorFlowSource_DIR}")
-    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
-
-    file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
-    list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
-            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
-            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
-    add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
-    set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
-    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
-            "${TensorFlowRuySource_DIR}"
-            "${TensorFlowGEMMLowpSource_DIR}"
-            "${TensorFlowEigenSource_DIR}"
-            "${TensorFlowSource_DIR}"
-    )
-
-    add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
-    target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
-            "${CMSISSource_DIR}/CMSIS/NN/Include"
-            "${CMSISSource_DIR}/CMSIS/DSP/Include"
-            "${CMSISSource_DIR}/CMSIS/Core/Include")
-
-    target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
-endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst
deleted file mode 100644 (file)
index 8e20559..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-REGISTER_KERNEL(Add)
-REGISTER_KERNEL(ArgMax)
-REGISTER_KERNEL(AveragePool2D)
-REGISTER_KERNEL(BatchMatMul)
-REGISTER_KERNEL(BatchToSpaceND)
-REGISTER_KERNEL(Cast)
-REGISTER_KERNEL(Concatenation)
-REGISTER_KERNEL(Conv2D)
-REGISTER_KERNEL(DepthToSpace)
-REGISTER_KERNEL(DepthwiseConv2D)
-REGISTER_KERNEL(Dequantize)
-REGISTER_KERNEL(Div)
-REGISTER_KERNEL(Elu)
-REGISTER_KERNEL(Exp)
-REGISTER_KERNEL(ExpandDims)
-REGISTER_KERNEL(Fill)
-REGISTER_KERNEL(Floor)
-REGISTER_KERNEL(FloorDiv)
-REGISTER_KERNEL(Equal)
-REGISTER_KERNEL(FullyConnected)
-REGISTER_KERNEL(Gather)
-REGISTER_KERNEL(Greater)
-REGISTER_KERNEL(GreaterEqual)
-REGISTER_KERNEL(If)
-REGISTER_KERNEL(InstanceNorm)
-REGISTER_KERNEL(L2Normalize)
-REGISTER_KERNEL(L2Pool2D)
-REGISTER_KERNEL(LeakyRelu)
-REGISTER_KERNEL(Less)
-REGISTER_KERNEL(LessEqual)
-REGISTER_KERNEL(LocalResponseNormalization)
-REGISTER_KERNEL(LogicalAnd)
-REGISTER_KERNEL(LogicalNot)
-REGISTER_KERNEL(LogicalOr)
-REGISTER_KERNEL(Logistic)
-REGISTER_KERNEL(LogSoftmax)
-REGISTER_KERNEL(Maximum)
-REGISTER_KERNEL(MaxPool2D)
-REGISTER_KERNEL(Mean)
-REGISTER_KERNEL(Minimum)
-REGISTER_KERNEL(MirrorPad)
-REGISTER_KERNEL(Mul)
-REGISTER_KERNEL(Neg)
-REGISTER_KERNEL(NotEqual)
-REGISTER_KERNEL(OneHot)
-REGISTER_KERNEL(Pack)
-REGISTER_KERNEL(Pad)
-REGISTER_KERNEL(PadV2)
-REGISTER_KERNEL(Pow)
-REGISTER_KERNEL(PRelu)
-REGISTER_KERNEL(Quantize)
-REGISTER_KERNEL(Relu)
-REGISTER_KERNEL(Relu6)
-REGISTER_KERNEL(Reshape)
-REGISTER_KERNEL(ResizeBilinear)
-REGISTER_KERNEL(ResizeNearestNeighbor)
-REGISTER_KERNEL(ReverseV2)
-REGISTER_KERNEL(Rsqrt)
-REGISTER_KERNEL(Shape)
-REGISTER_KERNEL(Slice)
-REGISTER_KERNEL(Softmax)
-REGISTER_KERNEL(SpaceToBatchND)
-REGISTER_KERNEL(SpaceToDepth)
-REGISTER_KERNEL(Split)
-REGISTER_KERNEL(SplitV)
-REGISTER_KERNEL(StridedSlice)
-REGISTER_KERNEL(Sqrt)
-REGISTER_KERNEL(Square)
-REGISTER_KERNEL(SquaredDifference)
-REGISTER_KERNEL(Squeeze)
-REGISTER_KERNEL(Sub)
-REGISTER_KERNEL(SVDF)
-REGISTER_KERNEL(Tanh)
-REGISTER_KERNEL(Transpose)
-REGISTER_KERNEL(TransposeConv)
-REGISTER_KERNEL(Unpack)
-REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h
deleted file mode 100644 (file)
index 0ffba14..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SVDF_H
-#define LUCI_INTERPRETER_PAL_SVDF_H
-
-#include <tensorflow/lite/kernels/internal/reference/svdf.h>
-
-namespace luci_interpreter_pal
-{
-static inline void
-IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
-            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
-            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
-            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
-            const int32_t *bias_data, int16_t *activation_state_data,
-            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
-            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
-            int scale_2_b, int32_t input_zp, int32_t output_zp)
-{
-  tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
-                                         weight_feature_data, weight_time_shape, weight_time_data,
-                                         bias_shape, bias_data, activation_state_data, output_shape,
-                                         output_data, scratchpad_data, output_temp_data, scale_1_a,
-                                         scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
-}
-static inline void
-FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
-          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
-          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
-          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
-          const float *bias_data, float *scratchpad_data, float *activation_state_data,
-          const tflite::RuntimeShape &output_shape, float *output_data)
-{
-  tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
-                                       weight_feature_data, weight_time_shape, weight_time_data,
-                                       bias_shape, bias_data, scratchpad_data,
-                                       activation_state_data, output_shape, output_data);
-}
-
-static inline void SetupScratchpadTensor(
-  const luci_interpreter::DataType &input_data_type,
-  const luci_interpreter::DataType &weight_feature_data_type,
-  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
-  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
-  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
-  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
-  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
-{
-
-  if (input_data_type == loco::DataType::FLOAT32 &&
-      (weight_feature_data_type == loco::DataType::S8 ||
-       weight_feature_data_type == loco::DataType::U8))
-  {
-    (void)input_shape;
-    (void)weight_time_shape;
-    (void)scratchpad_3;
-    (void)scratchpad_4;
-    (void)scratchpad_5;
-    (void)scratchpad_6;
-
-    throw std::runtime_error("Hybrid type is not currently supported for linux platform");
-  }
-
-  // Resize scratchpad_1 tensor
-  scratchpad_1->resize({batch_size, num_filters});
-
-  if (input_data_type == loco::DataType::S8)
-  {
-    // Resize scratchpad_2 for full_integer op
-    scratchpad_2->resize({batch_size, num_units});
-  }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake
deleted file mode 100644 (file)
index 185700c..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-macro(initialize_pal)
-    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
-
-    if (NOT TensorFlowSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowGEMMLowpSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowEigenSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: Eigen not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowRuySource_FOUND)
-        message(STATUS "Skipping luci-interpreter: Ruy not found")
-        return()
-    endif ()
-
-    find_package(Threads REQUIRED)
-
-    set(PAL_INITIALIZED TRUE)
-endmacro()
-
-macro(add_pal_to_target TGT)
-    target_include_directories(${TGT} PRIVATE "${PAL}")
-    target_include_directories(${TGT} SYSTEM PRIVATE
-            "${TensorFlowRuySource_DIR}"
-            "${TensorFlowGEMMLowpSource_DIR}"
-            "${TensorFlowEigenSource_DIR}"
-            "${TensorFlowSource_DIR}")
-    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
-
-    # TODO put it back, I changed my mind.
-    # instead add sources with visitors in this library
-    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
-            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
-            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
-
-    if(BUILD_ARM32_NEON)
-        # NOTE may need to revise this list for version upgrade
-        set(PAL_SOURCES ${PAL_SOURCES}
-                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
-                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
-                ${TensorFlowRuySource_DIR}/ruy/allocator.cc
-                ${TensorFlowRuySource_DIR}/ruy/block_map.cc
-                ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
-                ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
-                ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
-                ${TensorFlowRuySource_DIR}/ruy/ctx.cc
-                ${TensorFlowRuySource_DIR}/ruy/denormal.cc
-                ${TensorFlowRuySource_DIR}/ruy/frontend.cc
-                ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
-                ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
-                ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
-                ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
-                ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
-                ${TensorFlowRuySource_DIR}/ruy/trmul.cc
-                ${TensorFlowRuySource_DIR}/ruy/tune.cc
-                ${TensorFlowRuySource_DIR}/ruy/wait.cc
-                ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
-                )
-    endif(BUILD_ARM32_NEON)
-
-    add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
-    set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
-            "${TensorFlowRuySource_DIR}"
-            "${TensorFlowGEMMLowpSource_DIR}"
-            "${TensorFlowEigenSource_DIR}"
-            "${TensorFlowSource_DIR}"
-    )
-
-    target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal)
-endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
deleted file mode 100644 (file)
index f0df58d..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-REGISTER_KERNEL(Add)
-REGISTER_KERNEL(ArgMax)
-REGISTER_KERNEL(AveragePool2D)
-REGISTER_KERNEL(BatchToSpaceND)
-REGISTER_KERNEL(Cast)
-REGISTER_KERNEL(Concatenation)
-REGISTER_KERNEL(Conv2D)
-REGISTER_KERNEL(DepthToSpace)
-REGISTER_KERNEL(DepthwiseConv2D)
-REGISTER_KERNEL(Dequantize)
-REGISTER_KERNEL(Div)
-REGISTER_KERNEL(Elu)
-REGISTER_KERNEL(Exp)
-REGISTER_KERNEL(ExpandDims)
-REGISTER_KERNEL(Fill)
-REGISTER_KERNEL(Floor)
-REGISTER_KERNEL(FloorDiv)
-REGISTER_KERNEL(Equal)
-REGISTER_KERNEL(FullyConnected)
-REGISTER_KERNEL(Greater)
-REGISTER_KERNEL(GreaterEqual)
-REGISTER_KERNEL(If)
-REGISTER_KERNEL(InstanceNorm)
-REGISTER_KERNEL(L2Normalize)
-REGISTER_KERNEL(L2Pool2D)
-REGISTER_KERNEL(LeakyRelu)
-REGISTER_KERNEL(Less)
-REGISTER_KERNEL(LessEqual)
-REGISTER_KERNEL(LogicalAnd)
-REGISTER_KERNEL(LogicalNot)
-REGISTER_KERNEL(LogicalOr)
-REGISTER_KERNEL(Logistic)
-REGISTER_KERNEL(Maximum)
-REGISTER_KERNEL(MaxPool2D)
-REGISTER_KERNEL(Minimum)
-REGISTER_KERNEL(MirrorPad)
-REGISTER_KERNEL(Mul)
-REGISTER_KERNEL(Neg)
-REGISTER_KERNEL(NotEqual)
-REGISTER_KERNEL(Pad)
-REGISTER_KERNEL(PadV2)
-REGISTER_KERNEL(PRelu)
-REGISTER_KERNEL(Quantize)
-REGISTER_KERNEL(Reshape)
-REGISTER_KERNEL(ResizeBilinear)
-REGISTER_KERNEL(ResizeNearestNeighbor)
-REGISTER_KERNEL(Rsqrt)
-REGISTER_KERNEL(Shape)
-REGISTER_KERNEL(Softmax)
-REGISTER_KERNEL(SpaceToBatchND)
-REGISTER_KERNEL(SpaceToDepth)
-REGISTER_KERNEL(StridedSlice)
-REGISTER_KERNEL(Sqrt)
-REGISTER_KERNEL(Square)
-REGISTER_KERNEL(SquaredDifference)
-REGISTER_KERNEL(Squeeze)
-REGISTER_KERNEL(Sub)
-REGISTER_KERNEL(SVDF)
-REGISTER_KERNEL(Tanh)
-REGISTER_KERNEL(Transpose)
-REGISTER_KERNEL(TransposeConv)
-REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
deleted file mode 100644 (file)
index 4dd77ff..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
-#define LUCI_INTERPRETER_PAL_ARGMAX_H
-
-#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
-               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
-               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
-               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
-  tflite::reference_ops::BatchToSpaceND(
-    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
-    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h
deleted file mode 100644 (file)
index 1397687..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
-#define LUCI_INTERPRETER_PAL_CONV2D_H
-
-#include <tensorflow/lite/kernels/internal/reference/conv.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
-
-namespace luci_interpreter_pal
-{
-static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
-                        const float *input_data, const tflite::RuntimeShape &filter_shape,
-                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
-                        const float *bias_data, const tflite::RuntimeShape &output_shape,
-                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
-                        float *scratchpad_data)
-{
-  (void)scratchpad_shape;
-  (void)scratchpad_data;
-  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
-                              bias_shape, bias_data, output_shape, output_data,
-                              tflite::RuntimeShape(), nullptr);
-}
-
-static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
-                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
-                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
-                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
-                        uint8 *scratchpad_data)
-{
-  (void)scratchpad_shape;
-  (void)scratchpad_data;
-  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
-                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
-                              scratchpad_data, nullptr);
-}
-
-static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
-                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
-                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
-                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
-                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
-                                  int8 *scratchpad_data)
-{
-  (void)scratchpad_shape;
-  (void)scratchpad_data;
-  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
-                                                filter_shape, filter_data, bias_shape, bias_data,
-                                                output_shape, output_data);
-}
-
-static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
-                                         const luci_interpreter::DataType &input_data_type,
-                                         const tflite::ConvParams &params,
-                                         const tflite::RuntimeShape &input_shape,
-                                         const tflite::RuntimeShape &filter_shape,
-                                         const tflite::RuntimeShape &output_shape)
-{
-  (void)input_data_type;
-  (void)params;
-  (void)input_shape;
-  (void)filter_shape;
-  (void)output_shape;
-  scratchpad->set_allocatable(false);
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h
deleted file mode 100644 (file)
index 15ff032..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
-#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
-
-#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter_pal
-{
-
-template <typename T>
-static inline void Dequantize(tflite::DequantizationParams &params,
-                              const tflite::RuntimeShape &input_shape, const T *input_data,
-                              const tflite::RuntimeShape &output_shape, float *output_data)
-{
-  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
-                                               output_data);
-}
-
-static inline void Dequantize(tflite::DequantizationParams &params,
-                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
-                              const tflite::RuntimeShape &output_shape, float *output_data)
-{
-  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h
deleted file mode 100644 (file)
index 6046789..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
-#define LUCI_INTERPRETER_PAL_QUANTIZE_H
-
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void Quantize(tflite::QuantizationParams &params,
-                            const tflite::RuntimeShape &input_shape, const float *input_data,
-                            const tflite::RuntimeShape &output_shape, T *output_data)
-{
-  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
-}
-
-template <typename Input, typename Output>
-static inline void Requantize(const Input *input_data, int32_t size,
-                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
-                              int32_t input_zero_point, int32_t output_zero_point,
-                              Output *output_data)
-{
-  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
-                                    effective_scale_shift, input_zero_point, output_zero_point,
-                                    output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h
deleted file mode 100644 (file)
index 3bba668..0000000
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SVDF_H
-#define LUCI_INTERPRETER_PAL_SVDF_H
-
-#include <tensorflow/lite/kernels/internal/reference/svdf.h>
-
-namespace luci_interpreter_pal
-{
-static inline void
-IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
-            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
-            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
-            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
-            const int32_t *bias_data, int16_t *activation_state_data,
-            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
-            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
-            int scale_2_b, int32_t input_zp, int32_t output_zp)
-{
-  const int n_rank = params.rank;
-  const int n_batch = input_shape.Dims(0);
-  const int n_input = input_shape.Dims(1);
-  const int n_filter = weight_feature_shape.Dims(0);
-  const int n_unit = n_filter / n_rank;
-  const int n_memory = weight_time_shape.Dims(1);
-
-  // Left shift the activation_state.
-  {
-    int16_t *new_state_start = activation_state_data;
-    const int16_t *old_state_start = activation_state_data + 1;
-    const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
-    while (old_state_start != old_state_end)
-    {
-      *new_state_start++ = *old_state_start++;
-    }
-  }
-
-  // Note: no need to clear the latest activation, matmul is not accumulative.
-
-  // Feature matmul.
-  {
-    const int32_t output_max = std::numeric_limits<int16_t>::max();
-    const int32_t output_min = std::numeric_limits<int16_t>::min();
-    int16_t *result_in_batch = activation_state_data + (n_memory - 1);
-    for (int b = 0; b < n_batch; b++)
-    {
-      const int8_t *matrix_ptr = weight_feature_data;
-      for (int r = 0; r < n_filter; r++)
-      {
-        int32_t dot_prod = 0;
-        const int8_t *vector_in_batch = input_data + b * n_input;
-        for (int c = 0; c < n_input; c++)
-        {
-          dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
-        }
-        dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
-        dot_prod = std::min(std::max(output_min, dot_prod), output_max);
-        // This assumes state is symmetrically quantized. Otherwise last bit of
-        // state should be initialized to its zero point and accumulate the
-        // dot_prod.
-        // Equivalent as the following:
-        //     result_in_batch = zero point, which happens to be zero.
-        //     result_in_batch += dot_prod_56.
-        *result_in_batch = dot_prod;
-        result_in_batch += n_memory;
-      }
-    }
-  }
-
-  // Time.
-  {
-    for (int b = 0; b < n_batch; ++b)
-    {
-      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
-
-      // Perform batched vector dot product:
-      const int16_t *vector1_ptr = weight_time_data;
-      const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
-
-      for (int i = 0; i < n_filter; i++)
-      {
-        *scratch_ptr_batch = 0;
-        for (int j = 0; j < n_memory; j++)
-        {
-          *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
-        }
-        scratch_ptr_batch++;
-      }
-    }
-  }
-
-  // Reduce, add bias, rescale, activation.
-  {
-    // Add bias.
-    if (bias_data)
-    {
-      // Vector batch assign:
-      for (int i = 0; i < n_batch; ++i)
-      {
-        int32_t *output_ptr = output_temp_data + i * n_unit;
-        const int32_t *bias_ptr = bias_data;
-        for (int j = 0; j < n_unit; ++j)
-        {
-          *output_ptr++ = *bias_ptr++;
-        }
-      }
-    }
-    else
-    {
-      int32_t *output_ptr = output_temp_data;
-      for (int i = 0; i < n_batch * n_unit; ++i)
-      {
-        *output_ptr++ = 0;
-      }
-    }
-
-    // Reduce.
-    for (int b = 0; b < n_batch; ++b)
-    {
-      int32_t *output_temp_ptr = output_temp_data + b * n_unit;
-      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
-
-      // Reduction sum vector
-      for (int i = 0; i < n_unit; ++i)
-      {
-        for (int j = 0; j < n_rank; ++j)
-        {
-          output_temp_ptr[i] += *scratch_ptr_batch++;
-        }
-      }
-    }
-
-    // Rescale.
-    const int32_t output_max = std::numeric_limits<int8_t>::max();
-    const int32_t output_min = std::numeric_limits<int8_t>::min();
-    for (int i = 0; i < n_batch * n_unit; ++i)
-    {
-      int32_t x1 = output_temp_data[i];
-      int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
-      int32_t x3 = x2 + output_zp;
-      int32_t x4 = std::min(std::max(output_min, x3), output_max);
-      output_data[i] = static_cast<int8_t>(x4);
-    }
-  }
-}
-static inline void
-FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
-          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
-          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
-          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
-          const float *bias_data, float *scratchpad_data, float *activation_state_data,
-          const tflite::RuntimeShape &output_shape, float *output_data)
-{
-  const int32_t rank = params.rank;
-  const int32_t batch_size = input_shape.Dims(0);
-  const int32_t input_size = input_shape.Dims(1);
-  const int32_t num_filters = weight_feature_shape.Dims(0);
-  const int32_t num_units = num_filters / rank;
-  const int32_t memory_size = weight_time_shape.Dims(1);
-
-  // Left shift the activation_state.
-  {
-    float *new_state_start = activation_state_data;
-    const float *old_state_start = activation_state_data + 1;
-    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
-    while (old_state_start != old_state_end)
-    {
-      *new_state_start++ = *old_state_start++;
-    }
-  }
-
-  // Note: no need to clear the latest activation, matmul is not accumulative.
-
-  // Compute conv1d(inputs, weights_feature).
-  // The activation_state's rightmost column is used to save current cycle
-  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
-  // having the stride equal to memory_size.
-
-  // Perform batched matrix vector multiply operation:
-  {
-    const float *matrix = weight_feature_data;
-    const float *vector = input_data;
-    float *result = &activation_state_data[memory_size - 1];
-    float *result_in_batch = result;
-    for (int i = 0; i < batch_size; ++i)
-    {
-      const float *matrix_ptr = matrix;
-      for (int j = 0; j < num_filters; ++j)
-      {
-        float dot_prod = 0.0f;
-        const float *vector_in_batch = vector + i * input_size;
-        for (int k = 0; k < input_size; ++k)
-        {
-          dot_prod += *matrix_ptr++ * *vector_in_batch++;
-        }
-        *result_in_batch = dot_prod;
-        result_in_batch += memory_size;
-      }
-    }
-  }
-
-  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
-    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
-    params.activation, activation_state_data, scratchpad_data, output_data);
-}
-
-static inline void SetupScratchpadTensor(
-  const luci_interpreter::DataType &input_data_type,
-  const luci_interpreter::DataType &weight_feature_data_type,
-  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
-  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
-  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
-  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
-  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
-{
-
-  if (input_data_type == loco::DataType::FLOAT32 &&
-      (weight_feature_data_type == loco::DataType::S8 ||
-       weight_feature_data_type == loco::DataType::U8))
-  {
-    (void)input_shape;
-    (void)weight_time_shape;
-    (void)scratchpad_3;
-    (void)scratchpad_4;
-    (void)scratchpad_5;
-    (void)scratchpad_6;
-
-    throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
-  }
-
-  // Resize scratchpad_1 tensor
-  scratchpad_1->resize({batch_size, num_filters});
-
-  if (input_data_type == loco::DataType::S8)
-  {
-    // Resize scratchpad_2 for full_integer op
-    scratchpad_2->resize({batch_size, num_units});
-  }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h
deleted file mode 100644 (file)
index 9838b54..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
-#define LUCI_INTERPRETER_PAL_SOFTMAX_H
-
-#include <tensorflow/lite/kernels/internal/reference/softmax.h>
-
-namespace luci_interpreter_pal
-{
-static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
-                                              float beta)
-{
-  // Do nothing for mcu
-  (void)data;
-  (void)input_scale;
-  (void)beta;
-}
-
-static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
-{
-  int32 input_beta_multiplier;
-  int input_beta_left_shift;
-  static const int kScaledDiffIntegerBits = 5;
-  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
-                                   &input_beta_multiplier, &input_beta_left_shift);
-
-  params->input_multiplier = input_beta_multiplier;
-  params->input_left_shift = input_beta_left_shift;
-  params->diff_min =
-    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
-}
-
-template <typename T>
-static inline void Softmax(const tflite::SoftmaxParams &params,
-                           const tflite::RuntimeShape &input_shape, const T *input_data,
-                           const tflite::RuntimeShape &output_shape, T *output_data)
-{
-  // MARK: At this moment this operation doesn't support on mcu
-  assert(false && "Softmax NYI");
-  (void)params;
-  (void)input_shape;
-  (void)input_data;
-  (void)output_shape;
-  (void)output_data;
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake
deleted file mode 100644 (file)
index 907d51d..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-macro(initialize_pal)
-    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
-    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
-
-    if (NOT TensorFlowSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowGEMMLowpSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowEigenSource_FOUND)
-        message(STATUS "Skipping luci-interpreter: Eigen not found")
-        return()
-    endif ()
-
-    if (NOT TensorFlowRuySource_FOUND)
-        message(STATUS "Skipping luci-interpreter: Ruy not found")
-        return()
-    endif ()
-    #find_package(Threads REQUIRED)
-
-    set(PAL_INITIALIZED TRUE)
-endmacro()
-
-macro(add_pal_to_target TGT)
-    target_include_directories(${TGT} PRIVATE "${PAL}")
-    target_include_directories(${TGT} PRIVATE
-            "${TensorFlowRuySource_DIR}"
-            "${TensorFlowGEMMLowpSource_DIR}"
-            "${TensorFlowEigenSource_DIR}"
-            "${TensorFlowSource_DIR}")
-    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
-
-    # TODO put it back, I changed my mind.
-    # instead add sources with visitors in this library
-    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
-            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
-            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
-    add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
-    set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    target_include_directories(luci_interpreter_mcu_pal PRIVATE
-            "${TensorFlowRuySource_DIR}"
-            "${TensorFlowGEMMLowpSource_DIR}"
-            "${TensorFlowEigenSource_DIR}"
-            "${TensorFlowSource_DIR}"
-    )
-
-    target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
-    #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
-endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/requires.cmake b/compiler/luci-micro/luci-interpreter/requires.cmake
deleted file mode 100644 (file)
index f411f38..0000000
+++ /dev/null
@@ -1 +0,0 @@
-require(luci)
diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp
deleted file mode 100644 (file)
index 6ad1f32..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/BuddyMemoryManager.h"
-
-namespace luci_interpreter
-{
-
-BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
-{
-  int32_t p = lowerLog2(memSize);
-
-  // We assume that the requested size of memory does not exceed 4 GB
-  assert(p < 32);
-  memSize = 1 << p;
-
-  _start_block = reinterpret_cast<Block *>(memory_start);
-  _start_block->size = memSize - sizeof(Block);
-  _start_block->is_free = true;
-  _start_block->self = _start_block;
-  _num_blocks = 0;
-  _size = _start_block->size;
-
-  for (auto &_free_block : _free_blocks)
-    _free_block = nullptr;
-
-  addToBlocks(_start_block, p);
-}
-
-void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
-{
-  const size_t element_size = getDataTypeSize(tensor.element_type());
-  const int32_t num_elements = tensor.shape().num_elements();
-  auto size = num_elements * element_size;
-  auto footprint = size + sizeof(Block);
-  auto l = (footprint & (footprint - 1)) == 0
-             ? lowerLog2(footprint)
-             : lowerLog2(footprint) + 1; // check footprint is pow_of_2
-
-  while (l < 32 && !_free_blocks[l])
-    l++;
-
-  assert(l < 32);
-
-  Block *tmp;
-  tmp = _free_blocks[l];
-  removeFromBlocks(tmp, l);
-
-  while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
-  {
-    divideBlock(tmp, l);
-    l--;
-  }
-
-  tmp->is_free = false;
-  tmp->self = tmp;
-  _num_blocks++;
-
-  auto *data = (uint8_t *)(tmp + 1);
-  tensor.set_data_buffer(data);
-}
-
-void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
-{
-  auto data = tensor.data<void>();
-  auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
-
-  assert(tmp->self == tmp);
-
-  tmp->is_free = true;
-  addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
-
-  while (tmp)
-    if (tmp->size == _size)
-      break;
-    else
-      tmp = mergeBlock(tmp);
-
-  _num_blocks--;
-  tensor.set_data_buffer(nullptr);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp
deleted file mode 100644 (file)
index 29fb767..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/BuddyMemoryManager.h"
-#include <gtest/gtest.h>
-
-namespace luci_interpreter
-{
-namespace
-{
-
-using namespace testing;
-
-TEST(BuddyMemoryManager, basic)
-{
-  auto mem_pool = std::make_unique<uint8_t[]>(200);
-  auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
-  Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
-
-  buddy_memory_manager->allocate_memory(first_tensor);
-
-  uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
-
-  first_tensor.writeData(data_1, 8);
-  uint8_t array_1[8];
-  first_tensor.readData(array_1, 8);
-  for (int i = 0; i < 8; i++)
-  {
-    EXPECT_EQ(data_1[i], array_1[i]);
-  }
-
-  Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
-  buddy_memory_manager->allocate_memory(second_tensor);
-
-  uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
-  second_tensor.writeData(data_2, 10);
-
-  uint8_t array_2[2][5];
-  second_tensor.readData(array_2, 10);
-  for (int i = 0; i < 2; i++)
-  {
-    for (int j = 0; j < 5; j++)
-    {
-      EXPECT_EQ(data_2[i][j], array_2[i][j]);
-    }
-  }
-
-  buddy_memory_manager->release_memory(first_tensor);
-  EXPECT_EQ(first_tensor.data<void>(), nullptr);
-
-  buddy_memory_manager->release_memory(second_tensor);
-  EXPECT_EQ(second_tensor.data<void>(), nullptr);
-}
-
-} // namespace
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt
deleted file mode 100644 (file)
index 997b75a..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
-
-initialize_pal()
-
-if (NOT PAL_INITIALIZED)
-  message("PAL Failed to initialize, skip luci-interpreter")
-  return()
-endif()
-
-message(STATUS "LUCI INTERPRETER BEGIN")
-
-set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
-set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
-set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
-set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
-set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
-
-add_subdirectory(core)
-message(STATUS "LUCI INTERPRETER CORE")
-add_subdirectory(kernels)
-message(STATUS "LUCI INTERPRETER KERNELS")
-add_subdirectory(loader)
-message(STATUS "LUCI INTERPRETER LOADER")
-add_subdirectory(import)
-message(STATUS "LUCI INTERPRETER IMPORT")
-
-message(STATUS "LUCI INTERPTER INITALIZED")
-
-set(SOURCES
-    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
-    Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp
-        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp
-        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp
-        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp)
-
-if (NOT LUCI_INTERPRETER_STATIC)
-  add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES})
-else ()
-  add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
-endif ()
-
-set(TEST_SOURCES BuddyMemoryManager.test.cpp)
-
-target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(${LUCI_INTERPRETER_BINARY}
-    PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE}
-    PRIVATE nncc_common)
-
-install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
-install(DIRECTORY include/ DESTINATION include
-        FILES_MATCHING PATTERN "*.h")
-
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-nnas_find_package(GTest REQUIRED)
-
-GTest_AddTest(buddy_manager_test ${TEST_SOURCES})
-target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY})
diff --git a/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp
deleted file mode 100644 (file)
index 8cf272e..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/Interpreter.h"
-#include "luci_interpreter/SimpleMemoryManager.h"
-
-#include "loader/ModuleLoader.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace
-{
-
-class EventNotifierImpl final : public EventNotifier
-{
-public:
-  EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
-                    const std::vector<ExecutionObserver *> &observers)
-    : _runtime_to_ir(runtime_to_ir), _observers(observers)
-  {
-  }
-
-  void postTensorWrite(const Tensor *tensor) override
-  {
-    assert(tensor != nullptr);
-    for (const auto &observer : _observers)
-    {
-      observer->postTensorWrite(_runtime_to_ir.tensor_to_node.at(tensor), tensor);
-    }
-  }
-
-  void preOperatorExecute(const Kernel *kernel) override
-  {
-    assert(kernel != nullptr);
-    for (const auto &observer : _observers)
-    {
-      observer->preOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
-    }
-  }
-
-  void postOperatorExecute(const Kernel *kernel) override
-  {
-    assert(kernel != nullptr);
-    for (const auto &observer : _observers)
-    {
-      observer->postOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
-    }
-  }
-
-private:
-  const RuntimeToIR &_runtime_to_ir;
-  const std::vector<ExecutionObserver *> &_observers;
-};
-
-} // namespace
-
-Interpreter::Interpreter(const luci::Module *module)
-{
-  _runtime_to_ir = std::make_unique<RuntimeToIR>();
-  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
-  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
-
-  _default_memory_manager = std::make_unique<SimpleMemoryManager>();
-
-  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
-                      _default_memory_manager.get());
-  loader.load();
-}
-
-Interpreter::Interpreter(const luci::Module *module,
-                         luci_interpreter::IMemoryManager *memory_manager)
-{
-  assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
-
-  _runtime_to_ir = std::make_unique<RuntimeToIR>();
-  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
-  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
-
-  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
-                      memory_manager);
-  loader.load();
-}
-
-Interpreter::~Interpreter() = default;
-
-void Interpreter::writeInputTensor(const luci::CircleInput *input_node, const void *data,
-                                   size_t data_size)
-{
-  Tensor *tensor = _runtime_module->getInputTensors()[input_node->index()];
-  if (tensor == nullptr)
-  {
-    const std::string &name = input_node->name();
-    throw std::runtime_error("Cannot find tensor for input node named \"" + name + "\".");
-  }
-  if (data != nullptr)
-    tensor->writeData(data, data_size);
-}
-
-void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *data,
-                                   size_t data_size)
-{
-  Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()];
-  if (tensor == nullptr)
-  {
-    const std::string &name = output_node->name();
-    throw std::runtime_error("Cannot find tensor for output node named \"" + name + "\".");
-  }
-  if (data != nullptr)
-    tensor->readData(data, data_size);
-}
-
-void Interpreter::interpret() { _runtime_module->execute(); }
-
-void Interpreter::attachObserver(ExecutionObserver *observer)
-{
-  if (std::find(_observers.cbegin(), _observers.cend(), observer) != _observers.cend())
-    throw std::runtime_error("Observer is already attached.");
-  _observers.push_back(observer);
-}
-
-ExecutionObserver::~ExecutionObserver() = default;
-
-void ExecutionObserver::postTensorWrite(const luci::CircleNode *, const Tensor *) {}
-
-void ExecutionObserver::preOperatorExecute(const luci::CircleNode *) {}
-
-void ExecutionObserver::postOperatorExecute(const luci::CircleNode *) {}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp
deleted file mode 100644 (file)
index 230e398..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/SimpleMemoryManager.h"
-
-namespace luci_interpreter
-{
-
-void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
-{
-  if (!tensor.is_allocatable())
-  {
-    return;
-  }
-  if (tensor.is_data_allocated())
-  {
-    release_memory(tensor);
-  }
-  const auto element_size = getDataTypeSize(tensor.element_type());
-  const auto num_elements = tensor.shape().num_elements();
-
-  auto *data = new uint8_t[num_elements * element_size];
-  tensor.set_data_buffer(data);
-}
-
-void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
-{
-  if (!tensor.is_data_allocated())
-  {
-    tensor.set_data_buffer(nullptr);
-    return;
-  }
-  auto data = tensor.data<uint8_t>();
-  delete[] data;
-  tensor.set_data_buffer(nullptr);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp
deleted file mode 100644 (file)
index 73a8199..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/StaticMemoryManager.h"
-
-namespace luci_interpreter
-{
-
-void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
-{
-  if (!tensor.is_allocatable())
-  {
-    return;
-  }
-  int32_t offset = tensor.get_offset();
-  assert(offset >= 0);
-  auto tensor_ptr = _buffer_ptr + offset;
-  tensor.set_data_buffer(tensor_ptr);
-}
-
-void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
-{
-  tensor.set_data_buffer(nullptr);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp
deleted file mode 100644 (file)
index 3beeee5..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-
-void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
-{
-  if (!tensor.is_allocatable())
-  {
-    return;
-  }
-  if (tensor.is_data_allocated())
-  {
-    release_memory(tensor);
-  }
-  const auto element_size = getDataTypeSize(tensor.element_type());
-  const auto num_elements = tensor.shape().num_elements();
-
-  auto *data = new uint8_t[num_elements * element_size];
-  allocations.push_back(data);
-  tensor.set_data_buffer(data);
-}
-
-void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
-{
-  tensor.set_data_buffer(nullptr);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt
deleted file mode 100644 (file)
index c2471e0..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-set(SOURCES
-    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
-    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
-    EventNotifier.h
-    Kernel.h
-    KernelParams.h
-    RuntimeGraph.h
-    RuntimeGraph.cpp
-    RuntimeModule.h
-    Tensor.cpp)
-
-add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
-if (NOT NNCC_LIBRARY_NO_PIC)
-    set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif(NOT NNCC_LIBRARY_NO_PIC)
-target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
-target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common)
diff --git a/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h
deleted file mode 100644 (file)
index 5c4fbd3..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
-#define LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
-
-namespace luci_interpreter
-{
-
-// Used at execution stage to tell the interpreter that the runtime state has changed in some way.
-class EventNotifier
-{
-public:
-  virtual ~EventNotifier() = default;
-
-  virtual void postTensorWrite(const Tensor *tensor) = 0;
-  virtual void preOperatorExecute(const Kernel *kernel) = 0;
-  virtual void postOperatorExecute(const Kernel *kernel) = 0;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/Kernel.h b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h
deleted file mode 100644 (file)
index a7c4a42..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_KERNEL_H
-#define LUCI_INTERPRETER_CORE_KERNEL_H
-
-#include "luci_interpreter/core/Tensor.h"
-
-#include <vector>
-
-namespace luci_interpreter
-{
-
-// Base class for all kernels.
-class Kernel
-{
-protected:
-  Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
-    : _inputs(std::move(inputs)), _outputs(std::move(outputs))
-  {
-  }
-
-public:
-  virtual ~Kernel() = default;
-
-  const std::vector<const Tensor *> &getInputTensors() const { return _inputs; }
-  const std::vector<Tensor *> &getOutputTensors() const { return _outputs; }
-
-  // Configures the kernel.
-  // This function is currently called once for each kernel during interpreter construction,
-  // which makes it a convenient place for preparing (resizing) output tensors.
-  virtual void configure() = 0;
-
-  // Executes the kernel.
-  virtual void execute() const = 0;
-
-protected:
-  // NOTE Prefer not to use these in derived classes.
-  const std::vector<const Tensor *> _inputs;
-  const std::vector<Tensor *> _outputs;
-};
-
-// Base class for kernels with parameters.
-template <typename Params> class KernelWithParams : public Kernel
-{
-protected:
-  KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
-                   const Params &params)
-    : Kernel(std::move(inputs), std::move(outputs)), _params(params)
-  {
-  }
-
-public:
-  const Params &params() const { return _params; }
-
-protected:
-  const Params _params;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_KERNEL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h
deleted file mode 100644 (file)
index 6c0220c..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_KERNELPARAMS_H
-#define LUCI_INTERPRETER_CORE_KERNELPARAMS_H
-
-#include <luci/IR/AttrPadding.h>
-#include <luci/IR/AttrFusedActFunc.h>
-#include <luci/IR/AttrMirrorPadMode.h>
-#include <luci_interpreter/core/DataType.h>
-
-#include <cstdint>
-#include <vector>
-
-namespace luci_interpreter
-{
-
-// Inject commonly used types into `luci_interpreter` namespace for convenience.
-using Activation = luci::FusedActFunc;
-using Padding = luci::Padding;
-using MirrorPadMode = luci::MirrorPadMode;
-
-struct AddParams
-{
-  Activation activation;
-};
-
-struct ArgMaxParams
-{
-  DataType output_type;
-};
-
-struct BatchMatMulParams
-{
-  bool adj_x;
-  bool adj_y;
-};
-
-struct ConcatenationParams
-{
-  int axis;
-  Activation activation;
-};
-
-struct Conv2DParams
-{
-  Padding padding;
-  int32_t stride_height;
-  int32_t stride_width;
-  int32_t dilation_height_factor;
-  int32_t dilation_width_factor;
-  Activation activation;
-};
-
-struct DepthToSpaceParams
-{
-  int block_size;
-};
-
-struct DepthwiseConv2DParams
-{
-  Padding padding;
-  int32_t depth_multiplier; // TODO Remove, as it can be calculated.
-  int32_t stride_height;
-  int32_t stride_width;
-  int32_t dilation_height_factor;
-  int32_t dilation_width_factor;
-  Activation activation;
-};
-
-struct DivParams
-{
-  Activation activation;
-};
-
-struct FullyConnectedParams
-{
-  Activation activation;
-  bool keep_num_dims = false;
-};
-
-struct GatherParams
-{
-  int32_t axis;
-  int32_t batch_dims;
-};
-
-struct InstanceNormParams
-{
-  float epsilon;
-  Activation activation;
-};
-
-struct L2NormParams
-{
-  Activation activation;
-};
-
-struct LeakyReluParams
-{
-  float alpha;
-};
-
-struct LocalResponseNormalizationParams
-{
-  int32_t radius;
-  float bias;
-  float alpha;
-  float beta;
-};
-
-struct MirrorPadParams
-{
-  MirrorPadMode mode;
-};
-
-struct MulParams
-{
-  Activation activation;
-};
-
-struct OneHotParams
-{
-  int32_t axis;
-};
-
-struct PackParams
-{
-  int32_t values_count;
-  int32_t axis;
-};
-
-struct Pool2DParams
-{
-  Padding padding;
-  int32_t filter_height;
-  int32_t filter_width;
-  int32_t stride_height;
-  int32_t stride_width;
-  Activation activation;
-};
-
-struct ReducerParams
-{
-  bool keep_dims;
-};
-
-struct ResizeBilinearParams
-{
-  bool align_corners;
-  bool half_pixel_centers;
-};
-
-struct ResizeNearestNeighborParams
-{
-  bool align_corners;
-  bool half_pixel_centers;
-};
-
-struct ShapeParams
-{
-  loco::DataType out_type;
-};
-
-struct SubParams
-{
-  Activation activation;
-};
-
-struct SVDFParams
-{
-  bool asymmetric_quantize_inputs;
-  int32_t svdf_rank;
-  Activation activation;
-};
-
-struct SpaceToDepthParams
-{
-  int block_size;
-};
-
-struct SoftmaxParams
-{
-  float beta;
-};
-
-struct StridedSliceParams
-{
-  int32_t begin_mask;
-  int32_t end_mask;
-  int32_t ellipsis_mask;
-  int32_t new_axis_mask;
-  int32_t shrink_axis_mask;
-};
-
-struct SqueezeParams
-{
-  std::vector<int32_t> squeeze_dims;
-};
-
-struct TransposeConvParams
-{
-  Padding padding;
-  int32_t stride_height;
-  int32_t stride_width;
-};
-
-struct UnpackParams
-{
-  int axis;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_KERNELPARAMS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp
deleted file mode 100644 (file)
index c2f8d2e..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "core/RuntimeGraph.h"
-
-#include "core/RuntimeModule.h"
-
-#include <algorithm>
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class RuntimeGraph::TensorAllocPlan
-{
-  std::vector<std::vector<Tensor *>> _alloc_plan;
-  std::vector<std::vector<Tensor *>> _dealloc_plan;
-  bool _valid = false;
-  IMemoryManager *_memory_manager;
-
-public:
-  explicit TensorAllocPlan(IMemoryManager *memory_manager);
-  void invalidate() { _valid = false; }
-  bool isValid() const { return _valid; }
-  void build(const RuntimeGraph &graph);
-  void allocate(size_t kernel_index) const;
-  void deallocate(size_t kernel_index) const;
-};
-
-RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
-  : _memory_manager(memory_manager)
-{
-}
-
-void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
-{
-  invalidate();
-  using Lifetime = std::pair<size_t, size_t>;
-  std::unordered_map<Tensor *, Lifetime> lifetimes;
-  const size_t num_kernels = graph._kernels.size();
-  for (size_t index = 0; index < num_kernels; ++index)
-  {
-    const auto &kernel = graph._kernels[index];
-    for (const Tensor *tensor : kernel->getInputTensors())
-    {
-      auto nc_tensor = const_cast<Tensor *>(tensor);
-      if (lifetimes.count(nc_tensor) > 0)
-        lifetimes.at(nc_tensor).second = index;
-    }
-    for (Tensor *tensor : kernel->getOutputTensors())
-    {
-      assert(lifetimes.count(tensor) == 0);
-      lifetimes[tensor] = Lifetime(index, index);
-    }
-  }
-  for (const Tensor *tensor : graph.getOutputTensors())
-  {
-    auto nc_tensor = const_cast<Tensor *>(tensor);
-    if (lifetimes.count(nc_tensor) > 0)
-      lifetimes.at(nc_tensor).second = num_kernels;
-  }
-  _alloc_plan.assign(num_kernels, std::vector<Tensor *>());
-  _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>());
-  for (const auto &item : lifetimes)
-  {
-    _alloc_plan[item.second.first].push_back(item.first);
-    _dealloc_plan[item.second.second].push_back(item.first);
-  }
-  _valid = true;
-}
-
-void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
-{
-  assert(_valid && kernel_index < _alloc_plan.size());
-  for (Tensor *tensor : _alloc_plan[kernel_index])
-  {
-    _memory_manager->allocate_memory(*tensor);
-  }
-}
-
-void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
-{
-  assert(_valid && kernel_index < _dealloc_plan.size());
-  for (Tensor *tensor : _dealloc_plan[kernel_index])
-  {
-    _memory_manager->release_memory(*tensor);
-  }
-}
-
-RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
-  : _owning_module(owning_module), _memory_manager(memory_manager),
-    _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
-{
-}
-
-RuntimeGraph::~RuntimeGraph()
-{
-  for (auto &tensor : _tensors)
-  {
-    if (tensor->is_data_allocated())
-      _memory_manager->release_memory(*tensor);
-  }
-}
-
-Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
-{
-  assert(tensor != nullptr);
-  _tensors.push_back(std::move(tensor));
-  return _tensors.back().get();
-}
-
-void RuntimeGraph::setInputTensors(const std::vector<Tensor *> &input_tensors)
-{
-  assert(std::all_of(input_tensors.cbegin(), input_tensors.cend(),
-                     [](Tensor *tensor) { return tensor != nullptr; }));
-  _input_tensors = input_tensors;
-}
-
-void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
-{
-  assert(std::all_of(output_tensors.cbegin(), output_tensors.cend(),
-                     [](Tensor *tensor) { return tensor != nullptr; }));
-  _output_tensors = output_tensors;
-}
-
-void RuntimeGraph::configureAllocations(Tensor *tensor)
-{
-  _memory_manager->allocate_memory(*tensor);
-}
-
-void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
-{
-  assert(kernel != nullptr);
-  _kernels.push_back(std::move(kernel));
-  _tensor_alloc_plan->invalidate();
-}
-
-void RuntimeGraph::execute() const
-{
-  if (!_tensor_alloc_plan->isValid())
-    _tensor_alloc_plan->build(*this);
-
-  EventNotifier *event_notifier = _owning_module->getEventNotifier();
-
-  // Notify the observers that the input tensors have changed.
-  if (event_notifier != nullptr)
-  {
-    for (const Tensor *input_tensor : getInputTensors())
-    {
-      if (input_tensor->is_observable())
-        event_notifier->postTensorWrite(input_tensor);
-    }
-  }
-
-  for (size_t index = 0; index < _kernels.size(); ++index)
-  {
-    const auto &kernel = _kernels[index];
-    if (event_notifier != nullptr)
-    {
-      event_notifier->preOperatorExecute(kernel.get());
-    }
-
-    // TODO The `configure` method should only be called if the outputs of an operator need to be
-    //  resized.
-    kernel->configure();
-
-    // Preallocate outputs in advance instead of relying on automatic allocation
-    _tensor_alloc_plan->allocate(index);
-
-    kernel->execute();
-
-    if (event_notifier != nullptr)
-    {
-      event_notifier->postOperatorExecute(kernel.get());
-    }
-
-    for (const Tensor *tensor : kernel->getOutputTensors())
-    {
-      if (event_notifier != nullptr && tensor->is_observable())
-      {
-        event_notifier->postTensorWrite(tensor);
-      }
-    }
-    _tensor_alloc_plan->deallocate(index);
-  }
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h
deleted file mode 100644 (file)
index 8184e24..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
-#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
-
-#include "luci_interpreter/core/Tensor.h"
-#include "luci_interpreter/MemoryManager.h"
-#include "core/Kernel.h"
-
-#include <memory>
-#include <vector>
-
-namespace luci_interpreter
-{
-
-class RuntimeModule;
-
-class RuntimeGraph
-{
-private:
-  class TensorAllocPlan;
-  friend class TensorAllocPlan;
-
-public:
-  explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager);
-  ~RuntimeGraph();
-
-  Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
-
-  void setInputTensors(const std::vector<Tensor *> &input_tensors);
-  void setOutputTensors(const std::vector<Tensor *> &output_tensors);
-
-  void configureAllocations(Tensor *tensor);
-
-  const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
-  const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
-
-  void addKernel(std::unique_ptr<Kernel> &&kernel);
-
-  void execute() const;
-
-private:
-  IMemoryManager *_memory_manager;
-  RuntimeModule *_owning_module;
-  std::vector<std::unique_ptr<Tensor>> _tensors;
-  std::vector<Tensor *> _input_tensors;
-  std::vector<Tensor *> _output_tensors;
-
-  // Kernels in execution order.
-  std::vector<std::unique_ptr<Kernel>> _kernels;
-  // Tensors that are not used anymore after given op
-  std::unique_ptr<TensorAllocPlan> _tensor_alloc_plan;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h
deleted file mode 100644 (file)
index 78873b0..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
-#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
-
-#include "core/RuntimeGraph.h"
-#include "core/EventNotifier.h"
-#include "luci_interpreter/MemoryManager.h"
-
-#include <memory>
-#include <vector>
-
-namespace luci_interpreter
-{
-
-class RuntimeModule
-{
-public:
-  explicit RuntimeModule(EventNotifier *event_notifier) : _event_notifier(event_notifier) {}
-
-  EventNotifier *getEventNotifier() const { return _event_notifier; }
-
-  RuntimeGraph *addGraph(IMemoryManager *memory_manager)
-  {
-    _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager));
-    return _graphs.back().get();
-  }
-
-  const std::vector<Tensor *> &getInputTensors() const { return getMainGraph()->getInputTensors(); }
-  const std::vector<Tensor *> &getOutputTensors() const
-  {
-    return getMainGraph()->getOutputTensors();
-  }
-
-  void execute() const { getMainGraph()->execute(); }
-
-private:
-  RuntimeGraph *getMainGraph() const { return _graphs[0].get(); }
-
-  EventNotifier *const _event_notifier;
-  std::vector<std::unique_ptr<RuntimeGraph>> _graphs;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp
deleted file mode 100644 (file)
index 3c3c5ff..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/core/Tensor.h"
-
-#include <cstring>
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
-               std::string name)
-  : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
-    _name(std::move(name)), _data_allocated(false)
-{
-}
-
-void Tensor::readData(void *data_ptr, size_t data_size) const
-{
-  const size_t element_size = getDataTypeSize(element_type());
-  const int32_t num_elements = shape().num_elements();
-  if (data_size != num_elements * element_size)
-  {
-    throw std::invalid_argument("Invalid data size.");
-  }
-  assert(data_ptr != nullptr);
-  std::memcpy(data_ptr, data<void>(), data_size);
-}
-
-void Tensor::writeData(const void *data_ptr, size_t data_size)
-{
-  const size_t element_size = getDataTypeSize(element_type());
-  const int32_t num_elements = shape().num_elements();
-  if (data_size != num_elements * element_size)
-  {
-    throw std::invalid_argument("Invalid data size.");
-  }
-  assert(data_ptr != nullptr);
-  std::memcpy(data<void>(), data_ptr, data_size);
-}
-
-void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt
deleted file mode 100644 (file)
index dd9733f..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-set(SOURCES
-    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
-    GraphBuilderRegistry.cpp)
-
-# include specific builders
-file(GLOB_RECURSE NODES "Nodes/*")
-list(APPEND SOURCES ${NODES})
-
-add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
-if (NOT NNCC_LIBRARY_NO_PIC)
-  set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif(NOT NNCC_LIBRARY_NO_PIC)
-
-target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
diff --git a/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp
deleted file mode 100644 (file)
index a33bca6..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "luci_interpreter/GraphBuilderRegistry.h"
-#include "Nodes/CircleReferencingConst.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
-{
-  auto builder = std::make_unique<luci::GraphBuilderRegistry>();
-  {
-    // redefine NodeBuilder of BUFFER type
-    builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
-  }
-
-  return builder;
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
deleted file mode 100644 (file)
index 14e90f2..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CircleReferencingConst.h"
-
-#include <vector>
-
-namespace
-{
-
-// helper struct which describes data loaded to custom_options of CircleReferencingConst node
-struct ConstDataReference
-{
-  const uint8_t *data = nullptr;
-  uint32_t size = 0;
-};
-
-} // namespace
-
-namespace luci_interpreter
-{
-using namespace luci;
-
-CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
-                                                     GraphBuilderContext *context) const
-{
-  assert(tensor_index >= 0);
-
-  const auto graph = context->graph();
-  const auto reader = context->reader();
-  const auto tensors = reader->tensors();
-  auto const const_tensor = tensors[tensor_index];
-  assert(const_tensor != nullptr);
-  if (const_tensor->is_variable())
-  {
-    // Create CircleVariable for variable
-    return nullptr;
-  }
-
-  auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
-  auto const const_dims = wrap(const_tensor->shape()); // in NHWC
-  if (const_dims.empty() && buffer.empty())
-  {
-    // unknown shape tensor and scalar tensor
-    return nullptr;
-  }
-
-  // if tensor_index is used as output to some other operator, this is not a constant
-  auto tensoroutputs = context->tensoroutputs();
-  if (tensoroutputs->find(tensor_index))
-  {
-    // other operator output tensor
-    return nullptr;
-  }
-
-  uint32_t num_elements = 1;
-  for (uint32_t r = 0; r < const_dims.size(); ++r)
-  {
-    num_elements = num_elements * const_dims[r];
-  }
-
-  if (buffer.empty() && num_elements > 0)
-  {
-    // normal empty tensor
-    return nullptr;
-  }
-
-  // create CircleReferencingConst
-  auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
-  {
-    custom_node->custom_code("CircleReferencingConst");
-
-    copy_tensor_attributes(const_tensor, custom_node);
-    custom_node->shape_status(luci::ShapeStatus::VALID);
-
-    // custom options stores size of buffer and pointer's value to buffer's data
-    {
-      std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
-      {
-        auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
-        const_data_ref = {buffer.data(), buffer.size()};
-      }
-      custom_node->custom_options(custom_options);
-    }
-  }
-
-  // Output of CircleCustom node presented with CircleConstNode
-  auto out_node = graph->nodes()->create<CircleCustomOut>();
-  {
-    out_node->index(0);
-    out_node->input(custom_node);
-
-    copy_tensor_attributes(const_tensor, out_node);
-    out_node->shape_status(luci::ShapeStatus::VALID);
-  }
-
-  return out_node;
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
deleted file mode 100644 (file)
index ed8f951..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
-#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
-
-#include <luci/Import/NodeBuilder.h>
-
-#include <luci/IR/Nodes/CircleConst.h>
-
-namespace luci_interpreter
-{
-using namespace luci;
-
-/**
- * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
- */
-class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
-{
-public:
-  CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
-};
-
-} // namespace luci_interpreter
-
-#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp
deleted file mode 100644 (file)
index d7bf308..0000000
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Add.h"
-
-#include "kernels/BinaryOpCommon.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/add.h>
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
-  : KernelWithParams<AddParams>({input1, input2}, {output}, params)
-{
-}
-
-void Add::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
-  if (input1()->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
-                           input2()->zero_points().size() == 1);
-    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
-                           output()->zero_point() == 0);
-  }
-
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Add::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    case DataType::S16:
-      evalQuantizedS16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Add::evalFloat() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<float>(params, _params.activation);
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastAdd4DSlow(
-      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
-                               getTensorShape(input2()), getTensorData<float>(input2()),
-                               getTensorShape(output()), getTensorData<float>(output()));
-  }
-}
-
-template <typename T> void Add::evalInteger() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<T>(params, _params.activation);
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastAdd4DSlow(
-      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
-      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
-                               getTensorShape(input2()), getTensorData<T>(input2()),
-                               getTensorShape(output()), getTensorData<T>(output()));
-  }
-}
-
-void Add::evalQuantized() const
-{
-  const auto input1_scale = static_cast<double>(input1()->scale());
-  const auto input2_scale = static_cast<double>(input2()->scale());
-  const auto output_scale = static_cast<double>(output()->scale());
-
-  const int left_shift = 20;
-  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
-  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
-  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
-  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
-
-  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
-  int input1_shift{}, input2_shift{}, output_shift{};
-  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
-  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
-  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::ArithmeticParams params{};
-  params.left_shift = left_shift;
-  // The kernel expects inputs' zero points to be negated.
-  params.input1_offset = -input1()->zero_point(); // Note the '-'.
-  params.input1_multiplier = input1_multiplier;
-  params.input1_shift = input1_shift;
-  params.input2_offset = -input2()->zero_point(); // Note the '-'.
-  params.input2_multiplier = input2_multiplier;
-  params.input2_shift = input2_shift;
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = output_multiplier;
-  params.output_shift = output_shift;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastAdd4DSlow(
-      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
-      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
-                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
-                               getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-}
-
-void Add::evalQuantizedS16() const
-{
-  const auto input1_scale = static_cast<double>(input1()->scale());
-  const auto input2_scale = static_cast<double>(input2()->scale());
-  const auto output_scale = static_cast<double>(output()->scale());
-
-  constexpr int left_shift = 12;
-  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
-  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
-  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
-  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
-
-  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
-  int input1_shift{}, input2_shift{}, output_shift{};
-  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
-  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
-  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  auto fn = [input1_multiplier, input1_shift, //
-             input2_multiplier, input2_shift, //
-             output_multiplier, output_shift, //
-             activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
-    const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
-    const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
-    const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
-      shifted_input1_val, input1_multiplier, input1_shift);
-    const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
-      shifted_input2_val, input2_multiplier, input2_shift);
-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-    const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
-      raw_sum, output_multiplier, output_shift);
-    const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
-    return static_cast<int16_t>(clamped_output);
-  };
-
-  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
-                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
-                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp
deleted file mode 100644 (file)
index 6561a17..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ArgMax.h"
-#include "kernels/Utils.h"
-#include "PALArgMax.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params)
-  : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
-{
-}
-
-void ArgMax::configure()
-{
-  assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64);
-  assert(input()->shape().num_dims() >= 1);
-  const Shape &input_shape = input()->shape();
-  const int num_dims = input_shape.num_dims();
-  Shape output_shape(num_dims - 1);
-
-  // If axis value is negative, then update by adding input_shape's num_dims.
-  // If updated value also negative, then assert.
-  assert(axis()->shape().num_elements() == 1);
-  int axis_value = getTensorData<int32_t>(axis())[0];
-  if (axis_value < 0)
-    axis_value = axis_value + num_dims;
-  assert(axis_value >= 0);
-
-  int j = 0;
-  for (int i = 0; i < num_dims; i++)
-  {
-    if (i == axis_value)
-      continue;
-    output_shape.dim(j++) = input_shape.dim(i);
-  }
-
-  assert(output()->element_type() == _params.output_type);
-
-  output()->resize(output_shape);
-}
-
-void ArgMax::execute() const
-{
-
-#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                    \
-  luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
-                                  getTensorData<axis_type>(axis()), getTensorShape(output()), \
-                                  getTensorData<output_type>(output()), std::greater<data_type>())
-  if (axis()->element_type() == DataType::S32)
-  {
-    switch (_params.output_type)
-    {
-      case DataType::S32:
-        switch (input()->element_type())
-        {
-          case DataType::FLOAT32:
-            TF_LITE_ARG_MAX(float, int32_t, int32_t);
-            break;
-          case DataType::U8:
-            TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
-            break;
-          default:
-            throw std::runtime_error("Unsupported input type.");
-        }
-        break;
-      case DataType::S64:
-        switch (input()->element_type())
-        {
-          case DataType::FLOAT32:
-            TF_LITE_ARG_MAX(float, int32_t, int64_t);
-            break;
-          case DataType::U8:
-            TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
-            break;
-          default:
-            throw std::runtime_error("Unsupported input type.");
-        }
-        break;
-      default:
-        throw std::runtime_error("Unsupported output type.");
-    }
-  }
-  else
-  {
-    switch (_params.output_type)
-    {
-      case DataType::S32:
-        switch (input()->element_type())
-        {
-          case DataType::FLOAT32:
-            TF_LITE_ARG_MAX(float, int64_t, int32_t);
-            break;
-          case DataType::U8:
-            TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
-            break;
-          default:
-            throw std::runtime_error("Unsupported input type.");
-        }
-        break;
-      case DataType::S64:
-        switch (input()->element_type())
-        {
-          case DataType::FLOAT32:
-            TF_LITE_ARG_MAX(float, int64_t, int64_t);
-            break;
-          case DataType::U8:
-            TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
-            break;
-          default:
-            throw std::runtime_error("Unsupported input type.");
-        }
-        break;
-      default:
-        throw std::runtime_error("Unsupported output type.");
-    }
-  }
-#undef TF_LITE_ARG_MAX
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
deleted file mode 100644 (file)
index d3bade9..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/AveragePool2D.h"
-
-#include "kernels/Utils.h"
-
-#include "PALAveragePool2d.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
-                             const Pool2DParams &params)
-  : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
-{
-}
-
-void AveragePool2D::configure()
-{
-  if (input()->element_type() != output()->element_type())
-  {
-    throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
-  }
-  if (input()->shape().num_dims() != 4)
-  {
-    throw std::runtime_error("Input Tensor Shape must be 4-D");
-  }
-  const Shape &input_shape = input()->shape();
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t depth = input_shape.dim(3);
-
-  const int32_t output_height =
-    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
-  const int32_t output_width =
-    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
-
-  _padding_height =
-    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
-  _padding_width =
-    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
-  if (input()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
-    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
-  }
-  else if (input()->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
-    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
-  }
-  else if (input()->element_type() == DataType::S8)
-  {
-    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
-    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
-  }
-  output()->resize({batches, output_height, output_width, depth});
-
-  auto scratchpad = getOutputTensors()[1];
-  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
-                                              getTensorShape(input()), getTensorShape(output()));
-}
-
-void AveragePool2D::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    case DataType::S16:
-      evalSInt16();
-      break;
-    case DataType::S8:
-      evalSInt8();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void AveragePool2D::evalFloat() const
-{
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
-  tflite::PoolParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.filter_height = _params.filter_height;
-  params.filter_width = _params.filter_width;
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
-
-  tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()),
-                                     getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void AveragePool2D::evalQuantized() const
-{
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::PoolParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.filter_height = _params.filter_height;
-  params.filter_width = _params.filter_width;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  tflite::reference_ops::AveragePool(params, getTensorShape(input()),
-                                     getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                     getTensorData<uint8_t>(output()));
-}
-
-void AveragePool2D::evalSInt8() const
-{
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-  tflite::PoolParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.filter_height = _params.filter_height;
-  params.filter_width = _params.filter_width;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  auto scratchpad = getOutputTensors()[1];
-  int8_t *scratchpad_data = nullptr;
-  if (scratchpad->is_allocatable())
-    scratchpad_data = scratchpad->data<int8_t>();
-
-  luci_interpreter_pal::AveragePool<int8_t>(
-    params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
-    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
-}
-
-void AveragePool2D::evalSInt16() const
-{
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::PoolParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.filter_height = _params.filter_height;
-  params.filter_width = _params.filter_width;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  tflite::reference_integer_ops::AveragePool(
-    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
-    getTensorShape(output()), getTensorData<int16_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
deleted file mode 100644 (file)
index 24ca229..0000000
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/BatchMatMul.h"
-#include "kernels/Utils.h"
-
-#include "PALBatchMatMul.h"
-
-#include <tensorflow/lite/kernels/internal/reference/transpose.h>
-
-#include <stdexcept>
-
-namespace
-{
-
-tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
-{
-  tflite::RuntimeShape swapped_shape(shape);
-  const int32_t dims = shape.DimensionsCount();
-  swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
-  swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
-  return swapped_shape;
-}
-
-} // namespace
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
-                         Tensor *y_tmp, const BatchMatMulParams &params)
-  : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
-{
-}
-
-void BatchMatMul::configure()
-{
-  auto lhs = x();
-  auto rhs = y();
-  auto adj_x = params().adj_x;
-  auto adj_y = params().adj_y;
-
-  // TODO Support non-float types
-  if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
-    throw std::runtime_error("Unsupported type.");
-
-  LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
-
-  auto lhs_rank = lhs->shape().num_dims();
-  auto rhs_rank = rhs->shape().num_dims();
-  LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
-  LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
-
-  auto lhs_scratchpad = temp_lhs();
-  auto rhs_scratchpad = temp_rhs();
-  luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
-                                              getTensorShape(rhs));
-
-  auto output_rank = std::max(lhs_rank, rhs_rank);
-
-  auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
-  auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
-
-  // Ensure any batch dimensions obey broacasting rules.
-  for (int i = 0; i < output_rank - 2; ++i)
-  {
-    const int lhs_dim = extended_lhs_shape.Dims(i);
-    const int rhs_dim = extended_rhs_shape.Dims(i);
-    if (lhs_dim != rhs_dim)
-    {
-      if (lhs_dim != 1)
-      {
-        LUCI_INTERPRETER_CHECK(rhs_dim == 1);
-      }
-    }
-  }
-
-  // Ensure other dimensions work for matrix multiplication.
-  int accum_dim_lhs =
-    adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
-  int accum_dim_rhs =
-    adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
-  LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
-
-  Shape output_shape(output_rank);
-  // Fill in any broadcast dimensions.
-  for (int i = 0; i < output_rank - 2; ++i)
-  {
-    const int lhs_dim = extended_lhs_shape.Dims(i);
-    const int rhs_dim = extended_rhs_shape.Dims(i);
-    int broadcast_dim = lhs_dim;
-    if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
-    {
-      broadcast_dim = rhs_dim;
-    }
-    output_shape.dim(i) = broadcast_dim;
-  }
-  // Fill in the matmul dimensions.
-  int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
-  int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
-
-  output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
-  output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
-
-  output()->resize(output_shape);
-}
-
-void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
-{
-  tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
-  tflite::RuntimeShape shape(getTensorShape(tensor_in));
-  tflite::TransposeParams params;
-  int rank = shape.DimensionsCount();
-  params.perm_count = rank;
-  for (int i = 0; i < rank - 2; ++i)
-  {
-    params.perm[i] = i;
-  }
-  // Transpose the last two dimensions.
-  params.perm[rank - 2] = rank - 1;
-  params.perm[rank - 1] = rank - 2;
-  transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
-  transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
-  switch (tensor_in->element_type())
-  {
-    case DataType::FLOAT32:
-      tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
-                                       transposed_shape, getTensorData<float>(tensor_out));
-      break;
-    default:
-      throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
-  }
-}
-
-void BatchMatMul::execute() const
-{
-  auto lhs = x();
-  auto rhs = y();
-
-  bool adj_x = params().adj_x;
-  bool adj_y = params().adj_y;
-
-  auto orig_lhs_shape = getTensorShape(lhs);
-  auto orig_rhs_shape = getTensorShape(rhs);
-
-  auto rhs_tensor = adj_y ? rhs : temp_rhs();
-  auto lhs_tensor = adj_x ? temp_lhs() : lhs;
-  if (not adj_y)
-  {
-    TransposeRowsColumns(rhs, temp_rhs());
-  }
-  if (adj_x)
-  {
-    TransposeRowsColumns(lhs, temp_lhs());
-  }
-  tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
-  tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
-
-  switch (x()->element_type())
-  {
-    case DataType::FLOAT32:
-      luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
-                                        getTensorData<float>(lhs_tensor), getTensorShape(output()),
-                                        getTensorData<float>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
deleted file mode 100644 (file)
index bd315ff..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/BatchToSpaceND.h"
-#include "kernels/Utils.h"
-
-#include "PALBatchToSpaceND.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-namespace
-{
-const int kInputMinDimensionNum = 3;
-const int kInputMaxDimensionNum = 4;
-} // namespace
-
-BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
-                               Tensor *output)
-  : Kernel({input, block_shape, crops}, {output})
-{
-}
-
-void BatchToSpaceND::configure()
-{
-
-  const auto *block_shape_data = block_shape()->data<int32_t>();
-  const auto *crops_data = crops()->data<int32_t>();
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
-  int spatial_dims_num = input()->shape().num_dims() - 2;
-
-  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
-  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
-
-  LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
-  LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
-  LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
-  for (int i = 0; i < spatial_dims_num * 2; ++i)
-  {
-    LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
-  }
-
-  Shape output_shape = Shape(input()->shape().num_dims());
-  int output_batch_size = input()->shape().dim(0);
-  for (int i = 0; i < spatial_dims_num; ++i)
-  {
-    LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
-    output_batch_size = output_batch_size / block_shape_data[i];
-    output_shape.dim(i + 1) =
-      input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
-  }
-
-  output_shape.dim(0) = output_batch_size;
-  output_shape.dim(input()->shape().num_dims() - 1) =
-    input()->shape().dim(input()->shape().num_dims() - 1);
-  output()->resize(output_shape);
-}
-
-void BatchToSpaceND::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      luci_interpreter_pal::BatchToSpaceND(
-        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
-        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
-        getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      luci_interpreter_pal::BatchToSpaceND(
-        getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
-        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
-        getTensorData<int32_t>(crops()), getTensorShape(output()),
-        getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt
deleted file mode 100644 (file)
index 9f4ba0e..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-set(SOURCES
-        BinaryOpCommon.h
-        Utils.h
-        Utils.cpp
-        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h"
-        ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp
-        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h"
-        ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp)
-
-macro(REGISTER_KERNEL NODE)
-  list(APPEND SOURCES "${NODE}.h")
-  list(APPEND SOURCES "${NODE}.cpp")
-endmacro(REGISTER_KERNEL)
-
-include(${KERNEL_REGISTER_FILE})
-
-add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
-if (NOT NNCC_LIBRARY_NO_PIC)
-  set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif(NOT NNCC_LIBRARY_NO_PIC)
-target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
-
-target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
-target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common)
-
-add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
-
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-nnas_find_package(GTest REQUIRED)
-
-macro(REGISTER_KERNEL NODE)
-  list(APPEND TEST_SOURCES "${NODE}.test.cpp")
-endmacro(REGISTER_KERNEL)
-
-include(${KERNEL_REGISTER_FILE})
-
-list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
-
-GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
-target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp
deleted file mode 100644 (file)
index 39ee725..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Cast.h"
-#include "kernels/Utils.h"
-
-namespace
-{
-
-using namespace luci_interpreter;
-using namespace luci_interpreter::kernels;
-
-template <typename InT, typename OutT>
-void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
-{
-  std::transform(in_data, in_data + elements_count, out_data,
-                 [](InT a) { return static_cast<OutT>(a); });
-}
-
-template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
-{
-  auto const out_type = out_tensor->element_type();
-  auto const elements_count = out_tensor->shape().num_elements();
-
-  switch (out_type)
-  {
-    case loco::DataType::U8:
-      cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
-      break;
-    case loco::DataType::U16:
-      cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
-      break;
-    case loco::DataType::U32:
-      cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
-      break;
-    case loco::DataType::U64:
-      cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
-      break;
-    case loco::DataType::S8:
-      cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
-      break;
-    case loco::DataType::S16:
-      cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
-      break;
-    case loco::DataType::S32:
-      cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
-      break;
-    case loco::DataType::S64:
-      cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
-      break;
-    case loco::DataType::FLOAT32:
-      cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
-      break;
-    case loco::DataType::BOOL:
-      cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
-      break;
-    default:
-      throw std::runtime_error("Unsupported output type.");
-  }
-}
-
-void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
-{
-  auto in_type = in_tensor->element_type();
-
-  switch (in_type)
-  {
-    case loco::DataType::U8:
-      cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::U16:
-      cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::U32:
-      cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::U64:
-      cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::S8:
-      cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::S16:
-      cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::S32:
-      cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::S64:
-      cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::FLOAT32:
-      cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
-      break;
-    case loco::DataType::BOOL:
-      cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
-      break;
-    default:
-      throw std::runtime_error("Unsupported input type.");
-  }
-}
-
-} // namespace
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Cast::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown);
-  LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown);
-
-  const Shape &shape = input()->shape();
-  output()->resize(shape);
-}
-
-void Cast::execute() const
-{
-  assert(input()->shape().num_elements() == output()->shape().num_elements());
-
-  cast_from_tensor_to_tensor(input(), output());
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp
deleted file mode 100644 (file)
index 46ee594..0000000
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Concatenation.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
-                             const ConcatenationParams &params)
-  : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
-{
-}
-
-void Concatenation::configure()
-{
-  const int num_inputs = _inputs.size();
-  LUCI_INTERPRETER_CHECK(num_inputs > 0);
-  const Tensor *t0 = _inputs[0];
-
-  // TODO: Support concat with fused activation function
-  LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE);
-
-  int axis = _params.axis;
-  if (axis < 0)
-    axis += t0->shape().num_dims();
-  LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims());
-
-  int32_t sum_axis = t0->shape().dim(axis);
-  for (int i = 1; i < num_inputs; ++i)
-  {
-    const Tensor *tensor = _inputs[i];
-    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
-    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
-    for (int d = 0; d < t0->shape().num_dims(); ++d)
-    {
-      if (d == axis)
-      {
-        sum_axis += tensor->shape().dim(axis);
-      }
-      else
-      {
-        LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
-      }
-    }
-  }
-
-  Shape output_shape = t0->shape();
-  output_shape.dim(axis) = sum_axis;
-
-  // If input tensors are INT8 type then quantization parameters of all input tensors and the output
-  // should be the same
-  for (auto current_tensor : _inputs)
-  {
-    if (current_tensor->element_type() == DataType::S8)
-    {
-      LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
-                             output()->quantized_dimension());
-
-      LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
-                             current_tensor->scales().size());
-      LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
-      LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
-    }
-  }
-  output()->resize(output_shape);
-}
-
-void Concatenation::execute() const
-{
-  switch (_inputs[0]->element_type())
-  {
-    case DataType::FLOAT32:
-      evalGeneric<float>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    case DataType::S8:
-      evalGeneric<int8_t>();
-      break;
-    case DataType::S32:
-      evalGeneric<int32_t>();
-      break;
-    case DataType::S64:
-      evalGeneric<int64_t>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T> void Concatenation::evalGeneric() const
-{
-  int axis = _params.axis;
-  if (axis < 0)
-    axis += output()->shape().num_dims();
-
-  VectorOfTensors<T, true> inputs(_inputs);
-  tflite::ConcatenationParams params{};
-  params.axis = axis;
-  params.inputs_count = _inputs.size();
-  tflite::reference_ops::Concatenation(params, inputs.shapes(), inputs.data(),
-                                       getTensorShape(output()), getTensorData<T>(output()));
-}
-
-void Concatenation::evalQuantized() const
-{
-  int axis = _params.axis;
-  if (axis < 0)
-    axis += output()->shape().num_dims();
-
-  VectorOfQuantizedTensors<true> inputs(_inputs);
-  tflite::ConcatenationParams params{};
-  params.axis = axis;
-  params.input_zeropoint = inputs.zero_point();
-  params.input_scale = inputs.scale();
-  params.inputs_count = _inputs.size();
-  params.output_zeropoint = output()->zero_point();
-  params.output_scale = output()->scale();
-
-  tflite::reference_ops::ConcatenationWithScaling(params, inputs.shapes(), inputs.data(),
-                                                  getTensorShape(output()),
-                                                  getTensorData<uint8_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h
deleted file mode 100644 (file)
index b48c8ed..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_CONCATENATION_H
-#define LUCI_INTERPRETER_KERNELS_CONCATENATION_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Concatenation : public KernelWithParams<ConcatenationParams>
-{
-public:
-  Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
-                const ConcatenationParams &params);
-
-  const Tensor *input(int index) const { return _inputs[index]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-
-private:
-  template <typename T> void evalGeneric() const;
-  void evalQuantized() const;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_CONCATENATION_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
deleted file mode 100644 (file)
index f893b38..0000000
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Concatenation.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class ConcatenationTest : public ::testing::Test
-{
-protected:
-  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
-  std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-TEST_F(ConcatenationTest, Float)
-{
-  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
-  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
-  Tensor input2_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-  ConcatenationParams params{};
-
-  // Try different 'axis' and expect different results.
-  {
-    params.axis = 0;
-    params.activation = luci::FusedActFunc::NONE;
-
-    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-    kernel.configure();
-    for (auto t : kernel.getOutputTensors())
-    {
-      _memory_manager->allocate_memory(*t);
-    }
-    kernel.execute();
-
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
-  }
-  {
-    params.axis = -2; // Same as '0'.
-    params.activation = luci::FusedActFunc::NONE;
-
-    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-    kernel.configure();
-    _memory_manager->allocate_memory(output_tensor);
-    kernel.execute();
-
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
-  }
-  {
-    params.axis = 1;
-    params.activation = luci::FusedActFunc::NONE;
-
-    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-    kernel.configure();
-    _memory_manager->allocate_memory(output_tensor);
-    kernel.execute();
-
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
-  }
-  {
-    params.axis = -1; // Same as '1'.
-    params.activation = luci::FusedActFunc::NONE;
-
-    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-    kernel.configure();
-    _memory_manager->allocate_memory(output_tensor);
-    kernel.execute();
-
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
-  }
-}
-
-TEST_F(ConcatenationTest, Input_Number_Check_NEG)
-{
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-  ConcatenationParams params{};
-
-  params.axis = -1;
-  params.activation = luci::FusedActFunc::NONE;
-
-  Concatenation kernel({}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Invalid_Axis_NEG)
-{
-  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
-  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
-  Tensor input2_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-  ConcatenationParams params{};
-
-  params.axis = -3;
-  params.activation = luci::FusedActFunc::NONE;
-
-  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
-{
-  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
-  std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-  ConcatenationParams params{};
-
-  params.axis = -1;
-  params.activation = luci::FusedActFunc::NONE;
-
-  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
-{
-  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
-  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
-  Tensor input2_tensor =
-    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-  ConcatenationParams params{};
-
-  params.axis = -1;
-  params.activation = luci::FusedActFunc::NONE;
-
-  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
-{
-  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
-  std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
-  Tensor input1_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
-  Tensor input2_tensor =
-    makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-  ConcatenationParams params{};
-
-  params.axis = -1;
-  params.activation = luci::FusedActFunc::NONE;
-
-  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
-{
-  std::vector<uint8_t> input1_data{1, 2, 3, 4};
-  std::vector<int8_t> input2_data{5, 6, 7, 8};
-  Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S8);
-  ConcatenationParams params{};
-
-  params.axis = -1;
-  params.activation = luci::FusedActFunc::NONE;
-
-  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
-{
-  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
-  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  int quantized_dimension = 3;
-  std::vector<float> scales{0.1, 0.2, 0.3};
-  std::vector<int32_t> zero_points{1, -1, 1};
-
-  Tensor input1_tensor = makeInputTensor<DataType::S8>(
-    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::S8>(
-    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
-  ConcatenationParams params{};
-
-  params.axis = -1;
-  params.activation = luci::FusedActFunc::NONE;
-
-  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
-{
-  std::vector<float> input1_data{1, 2, 3, 4};
-  std::vector<float> input2_data{5, 6, 7, 8};
-  float scale = 0.1;
-  int32_t zero_point_1 = 1;
-  int32_t zero_point_2 = -1;
-
-  Tensor input1_tensor =
-    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
-  Tensor input2_tensor =
-    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
-
-  Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
-  ConcatenationParams params{};
-
-  params.axis = -1;
-  params.activation = luci::FusedActFunc::NONE;
-
-  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-// TODO: Remove this test when concat w/ fused_activation is supported
-TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
-{
-  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
-  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
-  Tensor input2_tensor =
-    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-  ConcatenationParams params{};
-
-  params.axis = 1;
-  params.activation = luci::FusedActFunc::RELU;
-
-  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp
deleted file mode 100644 (file)
index 234f954..0000000
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Conv2D.h"
-
-#include "kernels/Utils.h"
-
-#include "PALConv2d.h"
-
-#include <stdexcept>
-#include <thread>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-               Tensor *scratchpad, const Conv2DParams &params)
-  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
-{
-}
-
-void Conv2D::configure()
-{
-  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
-  //     | input filter bias  output |
-  // ----+---------------------------+
-  // (1) | float float  float float  |
-  // (2) | float int8   float float  | hybrid
-  // (3) | uint8 uint8  int32 uint8  | quantized
-  // (4) | int8  int8   int32 int8   | quantized per channel
-  //
-  // We only support (1), (3) and (4) for now, and additionally the following:
-  //     | input filter bias  output |
-  // ----+---------------------------+
-  // (5) | int16 int16  int64 int16  |
-  //
-  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
-  {
-    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
-  }
-  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
-  }
-  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
-  {
-    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
-    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
-    LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
-                           static_cast<size_t>(filter()->shape().dim(0)));
-    for (auto zerop : filter()->zero_points())
-    {
-      LUCI_INTERPRETER_CHECK(zerop == 0);
-    }
-  }
-  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported type.");
-  }
-  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
-
-  const Shape &input_shape = input()->shape();
-  const Shape &filter_shape = filter()->shape();
-  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t output_depth = filter_shape.dim(0);
-  const int32_t filter_height = filter_shape.dim(1);
-  const int32_t filter_width = filter_shape.dim(2);
-  LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
-
-  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
-                                               bias()->shape().dim(0) == output_depth));
-
-  const int32_t output_height =
-    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
-                      _params.dilation_height_factor);
-  const int32_t output_width =
-    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
-                      _params.dilation_width_factor);
-
-  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
-                                   input_height, filter_height, output_height);
-  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
-                                  filter_width, output_width);
-
-  output()->resize({batches, output_height, output_width, output_depth});
-
-  // Allocate tensor for scratchpad, if needed.
-  tflite::ConvParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.dilation_height_factor = _params.dilation_height_factor;
-  params.dilation_width_factor = _params.dilation_width_factor;
-  auto scratchpad = getOutputTensors()[1];
-  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
-                                              getTensorShape(input()), getTensorShape(filter()),
-                                              getTensorShape(output()));
-
-  switch (_params.activation)
-  {
-    case Activation::NONE:
-    case Activation::RELU:
-    case Activation::RELU6:
-    case Activation::RELU_N1_TO_1:
-      break;
-    default:
-      throw std::runtime_error("Unsupported fused activation");
-  }
-}
-
-void Conv2D::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      if (filter()->element_type() == DataType::FLOAT32)
-      {
-        evalFloat();
-        break;
-      }
-      throw std::runtime_error("Unsupported type.");
-    case DataType::U8:
-      if (filter()->scales().size() == 1)
-      {
-        evalQuantized();
-      }
-      else if (filter()->scales().size() > 1)
-      {
-        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
-        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
-                               static_cast<size_t>(filter()->shape().dim(0)));
-        evalQuantizedPerChannel();
-      }
-      break;
-    case DataType::S8:
-      evalQuantizedS8PerChannel();
-      break;
-    case DataType::S16:
-      evalQuantizedS16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Conv2D::evalFloat() const
-{
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
-  tflite::ConvParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.dilation_height_factor = _params.dilation_height_factor;
-  params.dilation_width_factor = _params.dilation_width_factor;
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
-
-  auto scratchpad = getOutputTensors()[1];
-  float *scratchpad_data = nullptr;
-  if (scratchpad->is_allocatable())
-    scratchpad_data = scratchpad->data<float>();
-
-  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
-                             getTensorShape(filter()), getTensorData<float>(filter()),
-                             getTensorShape(bias()), getTensorData<float>(bias()),
-                             getTensorShape(output()), getTensorData<float>(output()),
-                             getTensorShape(scratchpad), scratchpad_data);
-}
-
-void Conv2D::evalQuantized() const
-{
-  const auto input_scale = static_cast<double>(input()->scale());
-  const auto filter_scale = static_cast<double>(filter()->scale());
-  const auto output_scale = static_cast<double>(output()->scale());
-
-  const double real_multiplier = input_scale * filter_scale / output_scale;
-  int32_t output_multiplier{};
-  int output_shift{};
-  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::ConvParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.dilation_height_factor = _params.dilation_height_factor;
-  params.dilation_width_factor = _params.dilation_width_factor;
-  // The kernel expects input and filter zero points to be negated.
-  params.input_offset = -input()->zero_point();    // Note the '-'.
-  params.weights_offset = -filter()->zero_point(); // Note the '-'.
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = output_multiplier;
-  params.output_shift = output_shift;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  auto scratchpad = getOutputTensors()[1];
-  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                             getTensorShape(filter()), getTensorData<uint8_t>(filter()),
-                             getTensorShape(bias()), getTensorData<int32_t>(bias()),
-                             getTensorShape(output()), getTensorData<uint8_t>(output()),
-                             getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
-}
-
-void Conv2D::evalQuantizedPerChannel() const
-{
-  const auto *input_data = getTensorData<uint8_t>(input());
-  const auto *filter_data = getTensorData<uint8_t>(filter());
-  const auto *bias_data = getTensorData<int32_t>(bias());
-  auto *output_data = getTensorData<uint8_t>(output());
-
-  const Shape &input_shape = input()->shape();
-  const Shape &filter_shape = filter()->shape();
-  const Shape &output_shape = output()->shape();
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t input_depth = input_shape.dim(3);
-  const int32_t output_depth = filter_shape.dim(0);
-  const int32_t filter_height = filter_shape.dim(1);
-  const int32_t filter_width = filter_shape.dim(2);
-  const int32_t output_height = output_shape.dim(1);
-  const int32_t output_width = output_shape.dim(2);
-
-  const int32_t stride_height = _params.stride_height;
-  const int32_t stride_width = _params.stride_width;
-  const int32_t dilation_height_factor = _params.dilation_height_factor;
-  const int32_t dilation_width_factor = _params.dilation_width_factor;
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  const std::vector<double> effective_output_scale =
-    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
-  const std::vector<ChannelQuantMultipliers> multipliers_raw =
-    quantizeMultipliers(effective_output_scale);
-  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
-
-  for (int32_t batch = 0; batch < batches; ++batch)
-  {
-    for (int32_t out_y = 0; out_y < output_height; ++out_y)
-    {
-      for (int32_t out_x = 0; out_x < output_width; ++out_x)
-      {
-        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
-        {
-          const int32_t in_y_origin = out_y * stride_height - _padding_height;
-          const int32_t in_x_origin = out_x * stride_width - _padding_width;
-          int32_t acc = 0;
-          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
-          {
-            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
-            {
-              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
-              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
-              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
-              {
-                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
-                {
-                  const uint8_t input_val =
-                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
-                  const uint8_t filter_val =
-                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
-                  acc += static_cast<int32_t>(input_val - input()->zero_point()) *
-                         static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
-                }
-              }
-            }
-          }
-          if (bias_data)
-          {
-            acc += bias_data[out_c];
-          }
-
-          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
-            acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
-
-          scaled_acc += output()->zero_point();
-          scaled_acc = std::max(scaled_acc, activation_min);
-          scaled_acc = std::min(scaled_acc, activation_max);
-          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
-        }
-      }
-    }
-  }
-}
-
-void Conv2D::evalQuantizedS8PerChannel() const
-{
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::ConvParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.dilation_height_factor = _params.dilation_height_factor;
-  params.dilation_width_factor = _params.dilation_width_factor;
-  // The kernel expects filter zero points to be negated.
-  params.input_offset = -input()->zero_point(); // Note the '-'.
-  params.weights_offset = 0;                    // Unused in tflite code
-  params.output_offset = output()->zero_point();
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  const std::vector<double> effective_output_scales =
-    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
-  std::vector<ChannelQuantMultipliers> quant_multipliers =
-    quantizeMultipliers(effective_output_scales);
-
-  std::vector<int32_t> shifts;
-  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
-                 [](ChannelQuantMultipliers cm) { return cm.shift; });
-  std::vector<int32_t> multipliers;
-  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
-                 std::back_inserter(multipliers),
-                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
-
-  auto scratchpad = getOutputTensors()[1];
-  int8_t *scratchpad_data = nullptr;
-  if (scratchpad->is_allocatable())
-    scratchpad_data = scratchpad->data<int8_t>();
-
-  luci_interpreter_pal::ConvPerChannel(
-    params, multipliers.data(), shifts.data(), getTensorShape(input()),
-    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
-    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
-    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
-}
-
-void Conv2D::evalQuantizedS16() const
-{
-  const auto *input_data = getTensorData<int16_t>(input());
-  const auto *filter_data = getTensorData<int16_t>(filter());
-  const auto *bias_data = getTensorData<int64_t>(bias());
-  auto *output_data = getTensorData<int16_t>(output());
-
-  const Shape &input_shape = input()->shape();
-  const Shape &filter_shape = filter()->shape();
-  const Shape &output_shape = output()->shape();
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t input_depth = input_shape.dim(3);
-  const int32_t output_depth = filter_shape.dim(0);
-  const int32_t filter_height = filter_shape.dim(1);
-  const int32_t filter_width = filter_shape.dim(2);
-  const int32_t output_height = output_shape.dim(1);
-  const int32_t output_width = output_shape.dim(2);
-
-  const int32_t stride_height = _params.stride_height;
-  const int32_t stride_width = _params.stride_width;
-  const int32_t dilation_height_factor = _params.dilation_height_factor;
-  const int32_t dilation_width_factor = _params.dilation_width_factor;
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  const std::vector<double> effective_output_scale =
-    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
-  const std::vector<ChannelQuantMultipliers> multipliers_raw =
-    quantizeMultipliers(effective_output_scale);
-  BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
-
-  for (int32_t batch = 0; batch < batches; ++batch)
-  {
-    for (int32_t out_y = 0; out_y < output_height; ++out_y)
-    {
-      for (int32_t out_x = 0; out_x < output_width; ++out_x)
-      {
-        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
-        {
-          const int32_t in_y_origin = out_y * stride_height - _padding_height;
-          const int32_t in_x_origin = out_x * stride_width - _padding_width;
-          int64_t acc = 0;
-          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
-          {
-            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
-            {
-              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
-              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
-              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
-              {
-                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
-                {
-                  const int16_t input_val =
-                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
-                  const int16_t filter_val =
-                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
-                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
-                }
-              }
-            }
-          }
-          if (bias_data)
-          {
-            acc += bias_data[out_c];
-          }
-
-          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
-            acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
-
-          scaled_acc = std::max(scaled_acc, activation_min);
-          scaled_acc = std::min(scaled_acc, activation_max);
-
-          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
-        }
-      }
-    }
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h
deleted file mode 100644 (file)
index 330bf3a..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_CONV2D_H
-#define LUCI_INTERPRETER_KERNELS_CONV2D_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-#include <memory>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Conv2D : public KernelWithParams<Conv2DParams>
-{
-public:
-  Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-         Tensor *scratchpad, const Conv2DParams &params);
-
-  const Tensor *input() const { return _inputs[0]; }
-  const Tensor *filter() const { return _inputs[1]; }
-  const Tensor *bias() const { return _inputs[2]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-
-private:
-  void evalFloat() const;
-  void evalQuantized() const;
-  void evalQuantizedPerChannel() const;
-  void evalQuantizedS8PerChannel() const;
-  void evalQuantizedS16() const;
-
-private:
-  int32_t _padding_height{};
-  int32_t _padding_width{};
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
deleted file mode 100644 (file)
index 0fe6ef7..0000000
+++ /dev/null
@@ -1,707 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Conv2D.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class Conv2DTest : public ::testing::Test
-{
-protected:
-  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
-  std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-TEST_F(Conv2DTest, Float)
-{
-  Shape input_shape{1, 4, 3, 2};
-  Shape filter_shape{2, 2, 2, 2};
-  Shape bias_shape{2};
-  std::vector<float> input_data{
-    1,  2,  3,  4,  5,  6,  // row = 0
-    7,  8,  9,  10, 11, 12, // row = 1
-    13, 14, 15, 16, 17, 18, // row = 2
-    19, 20, 21, 22, 23, 24, // row = 3
-  };
-  std::vector<float> filter_data{
-    1,  2,  -3, -4, // out = 0, row = 0
-    -5, 6,  -7, 8,  // out = 1, row = 0
-    4,  -2, 3,  -1, // out = 0, row = 1
-    -8, -6, 7,  5,  // out = 1, row = 1
-  };
-  std::vector<float> bias_data{1, 2};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::RELU;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(im2col);
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  std::vector<float> ref_output_data{
-    11, 16, 7, 20, // row = 0
-    0,  40, 0, 44, // row = 1
-  };
-  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, FloatPointwise)
-{
-  Shape input_shape{1, 2, 2, 2};
-  Shape filter_shape{2, 1, 1, 2};
-  Shape bias_shape{2};
-  std::vector<float> input_data{
-    1, 2, // row = 0, col = 0
-    3, 4, // row = 0, col = 1
-    5, 6, // row = 1, col = 0
-    7, 8, // row = 1, col = 1
-  };
-  std::vector<float> filter_data{
-    -1, 2, // out = 0
-    -3, 4, // out = 1
-  };
-  std::vector<float> bias_data{1, 2};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 1;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::RELU;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(im2col);
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  std::vector<float> ref_output_data{
-    4, 7,  6,  9,  // row = 0
-    8, 11, 10, 13, // row = 1
-  };
-  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, FloatCheck)
-{
-  Shape input_shape{2, 2, 4, 1};
-  Shape filter_shape{3, 2, 2, 1};
-  Shape bias_shape{3};
-  std::vector<float> input_data{
-    // First batch
-    1, 1, 1, 1, // row = 1
-    2, 2, 2, 2, // row = 2
-    // Second batch
-    1, 2, 3, 4, // row = 1
-    1, 2, 3, 4, // row = 2
-  };
-  std::vector<float> filter_data{
-    1,  2,  3,  4, // first 2x2 filter
-    -1, 1,  -1, 1, // second 2x2 filter
-    -1, -1, 1,  1, // third 2x2 filter
-  };
-  std::vector<float> bias_data{1, 2, 3};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 2;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::NONE;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  _memory_manager->allocate_memory(im2col);
-  kernel.execute();
-
-  std::vector<float> ref_output_data{
-    18, 2, 5, // first batch, left
-    18, 2, 5, // first batch, right
-    17, 4, 3, // second batch, left
-    37, 4, 3, // second batch, right
-  };
-  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, Uint8)
-{
-  std::vector<float> input_data{
-    // First batch
-    1, 1, 1, 1, // row = 1
-    2, 2, 2, 2, // row = 2
-                // Second batch
-    1, 2, 3, 4, // row = 1
-    1, 2, 3, 4, // row = 2
-  };
-  std::vector<float> filter_data{
-    1,  2,  3,  4, // first 2x2 filter
-    -1, 1,  -1, 1, // second 2x2 filter
-    -1, -1, 1,  1, // third 2x2 filter
-  };
-  std::vector<float> bias_data{1, 2, 3};
-
-  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
-  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
-                                  input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
-                                  filter_data, _memory_manager.get());
-  Tensor bias_tensor = makeInputTensor<DataType::S32>(
-    {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::U8, Shape({}), {}, "");
-  Tensor output_tensor =
-    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 2;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::NONE;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  _memory_manager->allocate_memory(im2col);
-  kernel.execute();
-
-  std::vector<float> ref_output_data{
-    18, 2, 5, // first batch, left
-    18, 2, 5, // first batch, right
-    17, 4, 3, // second batch, left
-    37, 4, 3, // second batch, right
-  };
-  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
-  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, Uint8_CWQ)
-{
-  const int output_channels = 3;
-  std::vector<float> input_data{
-    // First batch
-    1, 1, 1, 1, // row = 1
-    2, 2, 2, 2, // row = 2
-                // Second batch
-    1, 2, 3, 4, // row = 1
-    1, 2, 3, 4, // row = 2
-  };
-  std::vector<float> filter_data{
-    1,  2,  3,  4, // first 2x2 filter
-    -1, 1,  -1, 1, // second 2x2 filter
-    -1, -1, 1,  1, // third 2x2 filter
-  };
-  std::vector<float> bias_data{1, 2, 3};
-  Shape filter_shape{output_channels, 2, 2, 1};
-
-  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
-  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
-
-  std::vector<std::pair<float, int32_t>> filter_quant_params;
-  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
-  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
-  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
-
-  std::vector<float> filter_scales;
-  std::vector<int32_t> filter_zerops;
-  for (auto iter : filter_quant_params)
-  {
-    filter_scales.push_back(iter.first);
-    filter_zerops.push_back(iter.second);
-  }
-
-  std::vector<float> bias_scales;
-  for (int i = 0; i < output_channels; ++i)
-    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
-  std::vector<int32_t> zerop(output_channels, 0);
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
-                                  input_data, _memory_manager.get());
-  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
-                                                       0, filter_data, _memory_manager.get());
-  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
-                                                      bias_data, _memory_manager.get());
-  Tensor im2col(DataType::U8, Shape({}), {}, "");
-  Tensor output_tensor =
-    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 2;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::NONE;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  _memory_manager->allocate_memory(im2col);
-  kernel.execute();
-
-  std::vector<float> ref_output_data{
-    18, 2, 5, // first batch, left
-    18, 2, 5, // first batch, right
-    17, 4, 3, // second batch, left
-    37, 4, 3, // second batch, right
-  };
-  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
-  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, SInt8_CWQ)
-{
-  const int output_channels = 3;
-  std::vector<float> input_data{
-    // First batch
-    1, 1, 1, 1, // row = 1
-    2, 2, 2, 2, // row = 2
-                // Second batch
-    1, 2, 3, 4, // row = 1
-    1, 2, 3, 4, // row = 2
-  };
-  std::vector<float> filter_data{
-    1,  2,  3,  4, // first 2x2 filter
-    -1, 1,  -1, 1, // second 2x2 filter
-    -1, -1, 1,  1, // third 2x2 filter
-  };
-  std::vector<float> bias_data{1, 2, 3};
-  Shape filter_shape{output_channels, 2, 2, 1};
-
-  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
-  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
-
-  std::vector<std::pair<float, int32_t>> filter_quant_params;
-  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
-  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
-  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
-
-  std::vector<float> filter_scales;
-  std::vector<int32_t> filter_zerops;
-  for (auto iter : filter_quant_params)
-  {
-    filter_scales.push_back(iter.first);
-    filter_zerops.push_back(iter.second);
-  }
-
-  std::vector<float> bias_scales;
-  for (int i = 0; i < output_channels; ++i)
-    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
-  std::vector<int32_t> zerop(output_channels, 0);
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
-                                  input_data, _memory_manager.get());
-  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
-                                                       0, filter_data, _memory_manager.get());
-  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
-                                                      bias_data, _memory_manager.get());
-  Tensor im2col(DataType::S8, Shape({}), {}, "");
-  Tensor output_tensor =
-    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 2;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::NONE;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  _memory_manager->allocate_memory(im2col);
-  kernel.execute();
-
-  std::vector<float> ref_output_data{
-    18, 2, 5, // first batch, left
-    18, 2, 5, // first batch, right
-    17, 4, 3, // second batch, left
-    37, 4, 3, // second batch, right
-  };
-  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
-  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, SInt16)
-{
-  Shape input_shape{1, 4, 3, 2};
-  Shape filter_shape{2, 2, 2, 2};
-  Shape bias_shape{2};
-  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
-
-  std::vector<float> input_data{
-    1,  2,  3,  4,  5,  6,  // row = 0
-    7,  8,  9,  10, 11, 12, // row = 1
-    13, 14, 15, 16, 17, 18, // row = 2
-    19, 20, 21, 22, 23, 24, // row = 3
-  };
-  std::vector<float> filter_data{
-    1,  2,  -3, -4, // out = 0, row = 0
-    -5, 6,  -7, 8,  // out = 1, row = 0
-    4,  -2, 3,  -1, // out = 0, row = 1
-    -8, -6, 7,  5,  // out = 1, row = 1
-  };
-  std::vector<float> bias_data{1, 2};
-  std::vector<float> ref_output_data{
-    11, 16, 7, 20, // row = 0
-    0,  40, 0, 44, // row = 1
-  };
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::S16, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::RELU;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  _memory_manager->allocate_memory(im2col);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-}
-
-TEST_F(Conv2DTest, SInt16_CWQ_weights)
-{
-  Shape input_shape{1, 2, 2, 2};  // Batch x H x W x C
-  Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
-  Shape bias_shape{3};
-  std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
-
-  std::vector<float> input_data{
-    1, 2, // row = 0, col 0
-    3, 4, // row = 0, col 1
-    5, 6, // row = 1, col 0
-    7, 8, // row = 1, col 1
-  };
-  std::vector<float> filter_data{
-    4, -3, // out = 0
-    1, -3, // out = 1
-    5, -3, // out = 2
-  };
-  std::vector<float> bias_data{1, 10, 5};
-  std::vector<float> ref_output_data{
-    0, 5, 4,  // row 0, col 0
-    1, 1, 8,  // row 0, col 1
-    3, 0, 12, // row 1, col 0
-    5, 0, 16, // row 1, col 1
-  };
-
-  float input_scale = 0.25f;
-  float output_scale = 0.05f;
-  std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f};
-  std::vector<float> bias_scales;
-  for (int i = 0; i < filter_scales.size(); ++i)
-    bias_scales.push_back(filter_scales[i] * input_scale);
-  std::vector<int32_t> zerop = {0, 0, 0};
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
-  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
-                                                        filter_data, _memory_manager.get());
-  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
-                                                      _memory_manager.get());
-  Tensor im2col(DataType::S16, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 1;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::RELU;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  _memory_manager->allocate_memory(im2col);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-}
-
-TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
-{
-  Shape input_shape{1, 4, 3, 2};
-  Shape filter_shape{2, 2, 2, 2};
-  Shape bias_shape{2};
-  std::vector<int32_t> input_data{
-    1,  2,  3,  4,  5,  6,  // row = 0
-    7,  8,  9,  10, 11, 12, // row = 1
-    13, 14, 15, 16, 17, 18, // row = 2
-    19, 20, 21, 22, 23, 24, // row = 3
-  };
-  std::vector<float> filter_data{
-    1,  2,  -3, -4, // out = 0, row = 0
-    -5, 6,  -7, 8,  // out = 1, row = 0
-    4,  -2, 3,  -1, // out = 0, row = 1
-    -8, -6, 7,  5,  // out = 1, row = 1
-  };
-  std::vector<float> bias_data{1, 2};
-  Tensor input_tensor =
-    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::RELU;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
-{
-  Shape input_shape{1, 4, 3, 2};
-  Shape filter_shape{2, 2, 2, 2};
-  Shape bias_shape{2};
-  std::vector<float> input_data{
-    1,  2,  3,  4,  5,  6,  // row = 0
-    7,  8,  9,  10, 11, 12, // row = 1
-    13, 14, 15, 16, 17, 18, // row = 2
-    19, 20, 21, 22, 23, 24, // row = 3
-  };
-  std::vector<float> filter_data{
-    1,  2,  -3, -4, // out = 0, row = 0
-    -5, 6,  -7, 8,  // out = 1, row = 0
-    4,  -2, 3,  -1, // out = 0, row = 1
-    -8, -6, 7,  5,  // out = 1, row = 1
-  };
-  std::vector<uint8_t> bias_data{1, 2};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
-  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::RELU;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
-{
-  Shape input_shape{1, 4, 3, 2};
-  Shape filter_shape{2, 2, 2, 2};
-  Shape bias_shape{3};
-  std::vector<float> input_data{
-    1,  2,  3,  4,  5,  6,  // row = 0
-    7,  8,  9,  10, 11, 12, // row = 1
-    13, 14, 15, 16, 17, 18, // row = 2
-    19, 20, 21, 22, 23, 24, // row = 3
-  };
-  std::vector<float> filter_data{
-    1,  2,  -3, -4, // out = 0, row = 0
-    -5, 6,  -7, 8,  // out = 1, row = 0
-    4,  -2, 3,  -1, // out = 0, row = 1
-    -8, -6, 7,  5,  // out = 1, row = 1
-  };
-  std::vector<float> bias_data{1, 2, 3};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::RELU;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
-{
-  Shape input_shape{1, 4, 6, 1};
-  Shape filter_shape{2, 2, 2, 2};
-  Shape bias_shape{2};
-  std::vector<float> input_data{
-    1,  2,  3,  4,  5,  6,  // row = 0
-    7,  8,  9,  10, 11, 12, // row = 1
-    13, 14, 15, 16, 17, 18, // row = 2
-    19, 20, 21, 22, 23, 24, // row = 3
-  };
-  std::vector<float> filter_data{
-    1,  2,  -3, -4, // out = 0, row = 0
-    -5, 6,  -7, 8,  // out = 1, row = 0
-    4,  -2, 3,  -1, // out = 0, row = 1
-    -8, -6, 7,  5,  // out = 1, row = 1
-  };
-  std::vector<float> bias_data{1, 2};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::RELU;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
-{
-  Shape input_shape{1, 4, 3, 2};
-  Shape filter_shape{2, 2, 2, 2};
-  Shape bias_shape{2};
-  std::vector<float> input_data{
-    1,  2,  3,  4,  5,  6,  // row = 0
-    7,  8,  9,  10, 11, 12, // row = 1
-    13, 14, 15, 16, 17, 18, // row = 2
-    19, 20, 21, 22, 23, 24, // row = 3
-  };
-  std::vector<float> filter_data{
-    1,  2,  -3, -4, // out = 0, row = 0
-    -5, 6,  -7, 8,  // out = 1, row = 0
-    4,  -2, 3,  -1, // out = 0, row = 1
-    -8, -6, 7,  5,  // out = 1, row = 1
-  };
-  std::vector<float> bias_data{1, 2};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor filter_tensor =
-    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
-  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Conv2DParams params{};
-  params.padding = Padding::VALID;
-  params.stride_height = 2;
-  params.stride_width = 1;
-  params.dilation_height_factor = 1;
-  params.dilation_width_factor = 1;
-  params.activation = Activation::TANH;
-
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
deleted file mode 100644 (file)
index 3a9acd1..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthToSpace.h"
-#include "Utils.h"
-#include "PALDepthToSpace.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
-  : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
-{
-}
-
-void DepthToSpace::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
-                         output()->element_type() == DataType::U8)
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
-  const int block_size = params().block_size;
-  const int32_t input_height = input()->shape().dim(1);
-  const int32_t input_width = input()->shape().dim(2);
-  const int32_t input_channels = input()->shape().dim(3);
-  int32_t output_height = input_height * block_size;
-  int32_t output_width = input_width * block_size;
-  int32_t output_channels = input_channels / block_size / block_size;
-
-  LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
-  LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
-  LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
-
-  Shape output_shape(4);
-  output_shape.dim(0) = input()->shape().dim(0);
-  output_shape.dim(1) = output_height;
-  output_shape.dim(2) = output_width;
-  output_shape.dim(3) = output_channels;
-
-  output()->resize(output_shape);
-}
-
-void DepthToSpace::execute() const
-{
-  tflite::DepthToSpaceParams op_params;
-  op_params.block_size = params().block_size;
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
-                                         getTensorData<float>(input()), getTensorShape(output()),
-                                         getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
-                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                         getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported Type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
deleted file mode 100644 (file)
index c554c30..0000000
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/DepthwiseConv2D.h"
-
-#include "kernels/Utils.h"
-
-#include "PALDepthwiseConv2d.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
-                                 Tensor *output, Tensor *scratchpad,
-                                 const DepthwiseConv2DParams &params)
-  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
-{
-}
-
-void DepthwiseConv2D::configure()
-{
-  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
-  //     | input filter bias  output |
-  // ----+---------------------------+
-  // (1) | float float  float float  |
-  // (2) | float int8   float float  | hybrid
-  // (3) | uint8 uint8  int32 uint8  | quantized
-  // (4) | int8  int8   int32 int8   | quantized per channel
-  // (5) | int16 int8   int64 int16  | quantized per channel 16x8
-  //
-  // We only support (1), (3) and (4) for now, and additionally the following:
-  //     | input filter bias  output |
-  // ----+---------------------------+
-  // (5) | int16 int16  int64 int16  |
-  //
-  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
-  {
-    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
-  }
-  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
-  }
-  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
-  {
-    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
-    LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
-                           filter()->scales().size());
-    for (auto zerop : filter()->zero_points())
-    {
-      LUCI_INTERPRETER_CHECK(zerop == 0);
-    }
-    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
-  }
-  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported type.");
-  }
-  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
-
-  const Shape &input_shape = input()->shape();
-  const Shape &filter_shape = filter()->shape();
-  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  // Filter format: [1, H, W, O].
-  LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
-  const int32_t filter_height = filter_shape.dim(1);
-  const int32_t filter_width = filter_shape.dim(2);
-  const int32_t channels_out = filter_shape.dim(3);
-
-  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
-                                               bias()->shape().dim(0) == channels_out));
-
-  const int32_t output_height =
-    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
-                      _params.dilation_height_factor);
-  const int32_t output_width =
-    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
-                      _params.dilation_width_factor);
-
-  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
-                                   input_height, filter_height, output_height);
-  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
-                                  filter_width, output_width);
-
-  output()->resize({batches, output_height, output_width, channels_out});
-
-  tflite::DepthwiseParams params{};
-
-  params.dilation_height_factor = _params.dilation_height_factor;
-  params.dilation_width_factor = _params.dilation_width_factor;
-
-  auto scratchpad = getOutputTensors()[1];
-  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
-                                              getTensorShape(input()), getTensorShape(filter()),
-                                              getTensorShape(output()));
-}
-
-void DepthwiseConv2D::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      if (filter()->element_type() == DataType::FLOAT32)
-      {
-        evalFloat();
-        break;
-      }
-      throw std::runtime_error("Unsupported type.");
-    case DataType::U8:
-      if (filter()->scales().size() == 1)
-      {
-        evalQuantized();
-      }
-      else if (filter()->scales().size() > 1)
-      {
-        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
-        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
-                               static_cast<size_t>(filter()->shape().dim(3)));
-        evalQuantizedPerChannel();
-      }
-      break;
-    case DataType::S8:
-      evalQuantizedS8PerChannel();
-      break;
-    case DataType::S16:
-      evalQuantizedS16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void DepthwiseConv2D::evalFloat() const
-{
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
-  tflite::DepthwiseParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.dilation_height_factor = _params.dilation_height_factor;
-  params.dilation_width_factor = _params.dilation_width_factor;
-  params.depth_multiplier = _params.depth_multiplier;
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
-
-  tflite::reference_ops::DepthwiseConv(
-    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-    getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void DepthwiseConv2D::evalQuantizedPerChannel() const
-{
-  const auto *input_data = getTensorData<uint8_t>(input());
-  const auto *filter_data = getTensorData<uint8_t>(filter());
-  const auto *bias_data = getTensorData<int32_t>(bias());
-  auto *output_data = getTensorData<uint8_t>(output());
-
-  const Shape &input_shape = input()->shape();
-  const Shape &filter_shape = filter()->shape();
-  const Shape &output_shape = output()->shape();
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t input_depth = input_shape.dim(3);
-  const int32_t filter_height = filter_shape.dim(1);
-  const int32_t filter_width = filter_shape.dim(2);
-  const int32_t output_height = output_shape.dim(1);
-  const int32_t output_width = output_shape.dim(2);
-
-  const int32_t stride_height = _params.stride_height;
-  const int32_t stride_width = _params.stride_width;
-  const int32_t dilation_height_factor = _params.dilation_height_factor;
-  const int32_t dilation_width_factor = _params.dilation_width_factor;
-  const int32_t depth_multiplier = _params.depth_multiplier;
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  const std::vector<double> effective_output_scales =
-    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
-  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
-    quantizeMultipliers(effective_output_scales);
-  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
-
-  for (int batch = 0; batch < batches; ++batch)
-  {
-    for (int out_y = 0; out_y < output_height; ++out_y)
-    {
-      for (int out_x = 0; out_x < output_width; ++out_x)
-      {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
-        {
-          for (int m = 0; m < depth_multiplier; ++m)
-          {
-            const int output_channel = m + in_channel * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - _padding_width;
-            const int in_y_origin = (out_y * stride_height) - _padding_height;
-            int32 acc = 0;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
-            {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
-              {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y = in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
-                if (is_point_inside_image)
-                {
-                  int32 input_val =
-                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
-                  int32 filter_val =
-                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
-                  acc += (filter_val - filter()->zero_points()[output_channel]) *
-                         (input_val - input()->zero_point());
-                }
-              }
-            }
-            if (bias_data)
-            {
-              acc += bias_data[output_channel];
-            }
-            int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
-            int output_shift = quant_multipliers[output_channel].shift;
-            int32_t scaled_acc =
-              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-            scaled_acc += output()->zero_point();
-            scaled_acc = std::max(scaled_acc, activation_min);
-            scaled_acc = std::min(scaled_acc, activation_max);
-            output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
-              static_cast<uint8_t>(scaled_acc);
-          }
-        }
-      }
-    }
-  }
-}
-
-void DepthwiseConv2D::evalQuantized() const
-{
-  const auto input_scale = static_cast<double>(input()->scale());
-  const auto filter_scale = static_cast<double>(filter()->scale());
-  const auto output_scale = static_cast<double>(output()->scale());
-
-  const double real_multiplier = input_scale * filter_scale / output_scale;
-  int32_t output_multiplier{};
-  int output_shift{};
-  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::DepthwiseParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.dilation_height_factor = _params.dilation_height_factor;
-  params.dilation_width_factor = _params.dilation_width_factor;
-  params.depth_multiplier = _params.depth_multiplier;
-  // The kernel expects input and filter zero points to be negated.
-  params.input_offset = -input()->zero_point();    // Note the '-'.
-  params.weights_offset = -filter()->zero_point(); // Note the '-'.
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = output_multiplier;
-  params.output_shift = output_shift;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  tflite::reference_ops::DepthwiseConv(
-    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
-    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-    getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-void DepthwiseConv2D::evalQuantizedS8PerChannel() const
-{
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::DepthwiseParams params{};
-
-  params.padding_type = tflite::PaddingType::kSame;
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.dilation_height_factor = _params.dilation_height_factor;
-  params.dilation_width_factor = _params.dilation_width_factor;
-  params.depth_multiplier = _params.depth_multiplier;
-  // The kernel expects input and filter zero points to be negated.
-  params.input_offset = -input()->zero_point(); // Note the '-'.
-  params.weights_offset = 0;
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = 1; // unused in tflite code
-  params.output_shift = 0;      // unused in tflite code
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  const std::vector<double> effective_output_scales =
-    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
-  std::vector<ChannelQuantMultipliers> quant_multipliers =
-    quantizeMultipliers(effective_output_scales);
-
-  std::vector<int32_t> shifts;
-  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
-                 [](ChannelQuantMultipliers cm) { return cm.shift; });
-  std::vector<int32_t> multipliers;
-  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
-                 std::back_inserter(multipliers),
-                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
-
-  auto scratchpad = getOutputTensors()[1];
-  int8_t *scratchpad_data = nullptr;
-  if (scratchpad->is_allocatable())
-    scratchpad_data = scratchpad->data<int8_t>();
-
-  luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
-    params, multipliers.data(), shifts.data(), getTensorShape(input()),
-    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
-    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
-    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
-}
-
-void DepthwiseConv2D::evalQuantizedS16() const
-{
-  const auto *input_data = getTensorData<int16_t>(input());
-  const auto *filter_data = getTensorData<int16_t>(filter());
-  const auto *bias_data = getTensorData<int64_t>(bias());
-  auto *output_data = getTensorData<int16_t>(output());
-
-  const Shape &input_shape = input()->shape();
-  const Shape &filter_shape = filter()->shape();
-  const Shape &output_shape = output()->shape();
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t input_depth = input_shape.dim(3);
-  const int32_t filter_height = filter_shape.dim(1);
-  const int32_t filter_width = filter_shape.dim(2);
-  const int32_t output_height = output_shape.dim(1);
-  const int32_t output_width = output_shape.dim(2);
-
-  const int32_t stride_height = _params.stride_height;
-  const int32_t stride_width = _params.stride_width;
-  const int32_t dilation_height_factor = _params.dilation_height_factor;
-  const int32_t dilation_width_factor = _params.dilation_width_factor;
-  const int32_t depth_multiplier = _params.depth_multiplier;
-
-  const std::vector<double> effective_output_scales =
-    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
-  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
-    quantizeMultipliers(effective_output_scales);
-
-  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  for (int32_t batch = 0; batch < batches; ++batch)
-  {
-    for (int32_t out_y = 0; out_y < output_height; ++out_y)
-    {
-      for (int32_t out_x = 0; out_x < output_width; ++out_x)
-      {
-        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
-        {
-          for (int32_t m = 0; m < depth_multiplier; ++m)
-          {
-            const int32_t out_c = m + in_c * depth_multiplier;
-            const int32_t in_y_origin = out_y * stride_height - _padding_height;
-            const int32_t in_x_origin = out_x * stride_width - _padding_width;
-            int64_t acc = 0;
-            for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
-            {
-              for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
-              {
-                const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
-                const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
-                if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
-                {
-                  const int16_t input_val =
-                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
-                  const int16_t filter_val =
-                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
-                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
-                }
-              }
-            }
-            if (bias_data != nullptr)
-            {
-              acc += bias_data[out_c];
-            }
-
-            int32_t output_multiplier = quant_multipliers[out_c].multiplier;
-            int output_shift = quant_multipliers[out_c].shift;
-            int32_t scaled_acc =
-              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-
-            scaled_acc = std::max(scaled_acc, activation_min);
-            scaled_acc = std::min(scaled_acc, activation_max);
-
-            output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
-          }
-        }
-      }
-    }
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp
deleted file mode 100644 (file)
index 96399e5..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Dequantize.h"
-#include "kernels/Utils.h"
-#include "PALDequantize.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Dequantize::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
-                         input()->element_type() == loco::DataType::U8 ||
-                         input()->element_type() == loco::DataType::S16);
-
-  LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
-
-  if (input()->element_type() == loco::DataType::S16)
-    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
-
-  LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
-
-  output()->resize(input()->shape());
-}
-
-void Dequantize::execute() const
-{
-  tflite::DequantizationParams op_params;
-  op_params.zero_point = input()->zero_point();
-  op_params.scale = input()->scale();
-
-  switch (input()->element_type())
-  {
-    case loco::DataType::U8:
-    {
-      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
-                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                       getTensorData<float>(output()));
-      break;
-    }
-    case loco::DataType::S8:
-    {
-      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
-                                       getTensorData<int8_t>(input()), getTensorShape(output()),
-                                       getTensorData<float>(output()));
-      break;
-    }
-    case loco::DataType::S16:
-    {
-      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
-                                       getTensorData<int16_t>(input()), getTensorShape(output()),
-                                       getTensorData<float>(output()));
-      break;
-    }
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp
deleted file mode 100644 (file)
index dd15322..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Div.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/div.h>
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
-  : KernelWithParams<DivParams>({input1, input2}, {output}, params)
-{
-}
-
-void Div::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
-
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Div::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Div::evalFloat() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<float>(params, _params.activation);
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastDivSlow(
-      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
-                               getTensorShape(input2()), getTensorData<float>(input2()),
-                               getTensorShape(output()), getTensorData<float>(output()));
-  }
-}
-
-template <typename T> void Div::evalInteger() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<T>(params, _params.activation);
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastDivSlow(
-      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
-      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
-                               getTensorShape(input2()), getTensorData<T>(input2()),
-                               getTensorShape(output()), getTensorData<T>(output()));
-  }
-}
-
-void Div::evalQuantized() const
-{
-  const auto input1_scale = static_cast<double>(input1()->scale());
-  const auto input2_scale = static_cast<double>(input2()->scale());
-  const auto output_scale = static_cast<double>(output()->scale());
-
-  const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
-
-  int32_t output_multiplier{};
-  int output_shift{};
-
-  quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::ArithmeticParams params{};
-
-  params.input1_offset = -input1()->zero_point(); // Note the '-'.
-  params.input2_offset = -input2()->zero_point(); // Note the '-'.
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = output_multiplier;
-  params.output_shift = output_shift;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastDivSlow(
-      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
-      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
-                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
-                               getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp
deleted file mode 100644 (file)
index 697d63b..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Elu.h"
-#include "kernels/Utils.h"
-
-#include "PALElu.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Elu::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  output()->resize(input()->shape());
-}
-
-void Elu::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
-                                getTensorShape(output()), getTensorData<float>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp
deleted file mode 100644 (file)
index a57e127..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Equal.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void Equal::configure()
-{
-  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
-  if (x()->element_type() == DataType::U8)
-  {
-    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
-    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
-  }
-  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void Equal::execute() const
-{
-  switch (x()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Equal::evalFloat() const
-{
-  const auto x_data = getTensorData<float>(x());
-  const auto y_data = getTensorData<float>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
-                                                getTensorShape(y()), y_data,
-                                                getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
-                                 y_data, getTensorShape(output()), output_data);
-  }
-}
-
-template <typename T> void Equal::evalInteger() const
-{
-  const auto x_data = getTensorData<T>(x());
-  const auto y_data = getTensorData<T>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
-                                                         getTensorShape(y()), y_data,
-                                                         getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
-                                          getTensorShape(y()), y_data, getTensorShape(output()),
-                                          output_data);
-  }
-}
-
-void Equal::evalQuantized() const
-{
-  const auto x_data = getTensorData<uint8_t>(x());
-  const auto y_data = getTensorData<uint8_t>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.left_shift = 8;
-  op_params.input1_offset = -x()->zero_point(); // Note the '-'
-  op_params.input1_shift = _x_shift;
-  op_params.input1_multiplier = _x_multiplier;
-  op_params.input2_offset = -y()->zero_point(); // Note the '-'
-  op_params.input2_shift = _y_shift;
-  op_params.input2_multiplier = _y_multiplier;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
-                                                           getTensorShape(y()), y_data,
-                                                           getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
-                                            getTensorShape(y()), y_data, getTensorShape(output()),
-                                            output_data);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp
deleted file mode 100644 (file)
index e7c560a..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Exp.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/exp.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Exp::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  output()->resize(input()->shape());
-}
-
-void Exp::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Exp::evalFloat() const
-{
-  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
-  tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp
deleted file mode 100644 (file)
index ba35c99..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ExpandDims.h"
-#include "kernels/Utils.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
-  : Kernel({input, axis}, {output})
-{
-}
-
-void ExpandDims::configure()
-{
-  int32_t axis_value;
-
-  switch (axis()->element_type())
-  {
-    case loco::DataType::S32:
-      axis_value = *getTensorData<int32_t>(axis());
-      break;
-    case loco::DataType::S64:
-      axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-
-  const auto input_shape = input()->shape();
-
-  if (axis_value < 0)
-  {
-    axis_value += input_shape.num_dims() + 1;
-  }
-
-  LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
-
-  Shape output_shape(input_shape.num_dims() + 1);
-  for (int32_t i = 0; i < output_shape.num_dims(); ++i)
-  {
-    if (i < axis_value)
-    {
-      output_shape.dim(i) = input_shape.dim(i);
-    }
-    else if (i == axis_value)
-    {
-      output_shape.dim(i) = 1;
-    }
-    else
-    {
-      LUCI_INTERPRETER_CHECK(i >= 1);
-      output_shape.dim(i) = input_shape.dim(i - 1);
-    }
-  }
-
-  output()->resize(output_shape);
-}
-
-void ExpandDims::execute() const
-{
-  // Just copy input to output
-  const auto *input_data = input()->data<void>();
-  auto *output_data = output()->data<void>();
-
-  const size_t element_size = getDataTypeSize(input()->element_type());
-  const int32_t num_elements = input()->shape().num_elements();
-  std::memcpy(output_data, input_data, num_elements * element_size);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h
deleted file mode 100644 (file)
index e510b11..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
-#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class ExpandDims : public Kernel
-{
-public:
-  ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
-
-  const Tensor *input() const { return _inputs[0]; }
-  const Tensor *axis() const { return _inputs[1]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
deleted file mode 100644 (file)
index df9eacc..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ExpandDims.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class ExpandDimsTest : public ::testing::Test
-{
-protected:
-  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
-  std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-TEST_F(ExpandDimsTest, PositiveAxis)
-{
-  std::vector<int32_t> input_data{-1, 1, -2, 2};
-  std::initializer_list<int32_t> input_shape = {2, 2};
-
-  std::initializer_list<int32_t> axis_value = {0};
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S32);
-
-  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
-}
-
-TEST_F(ExpandDimsTest, NegAxis)
-{
-  std::vector<int32_t> input_data{-1, 1, -2, 2};
-  std::initializer_list<int32_t> input_shape = {2, 2};
-
-  std::initializer_list<int32_t> axis_value = {-1};
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S32);
-
-  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
-}
-
-TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
-{
-  std::vector<int32_t> input_data{-1, 1, -2, 2};
-  std::initializer_list<int32_t> input_shape = {2, 2};
-
-  std::initializer_list<float> axis_value = {1.0};
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
-  Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S32);
-
-  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
-{
-  std::vector<int32_t> input_data{-1, 1, -2, 2};
-  std::initializer_list<int32_t> input_shape = {2, 2};
-
-  std::initializer_list<int32_t> axis_value = {3};
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S32);
-
-  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp
deleted file mode 100644 (file)
index e09d633..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Fill.h"
-#include "kernels/Utils.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
-  : Kernel({dims, value}, {output})
-{
-}
-
-template <typename T> void Fill::configureShape()
-{
-  const auto dims_data = getTensorData<T>(dims());
-  Shape output_shape(dims()->shape().dim(0));
-
-  for (int i = 0; i < output_shape.num_dims(); ++i)
-  {
-    T data = dims_data[i];
-    if (data < 0)
-      throw std::runtime_error("Fill dimensions must be >= 0");
-
-    output_shape.dim(i) = data;
-  }
-
-  output()->resize(output_shape);
-}
-
-void Fill::configure()
-{
-  const auto dims_shape = dims()->shape();
-  const auto value_shape = value()->shape();
-
-  // Make sure the 1st input tensor is 1-D
-  LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
-
-  // Make sure the 1st input tensor is int32 or int64
-  LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
-                         dims()->element_type() == DataType::S64);
-
-  // Make sure the 2nd input tensor is a scalar
-  LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
-
-  // Check zero point and scale for S16 and S8
-  if (value()->element_type() == loco::DataType::S16 or
-      value()->element_type() == loco::DataType::S8)
-  {
-    LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
-    LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
-
-    if (value()->element_type() == loco::DataType::S16)
-      LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
-  }
-  // Resize output
-  switch (dims()->element_type())
-  {
-    case DataType::S32:
-      configureShape<int32_t>();
-      break;
-    case DataType::S64:
-      configureShape<int64_t>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Fill::execute() const
-{
-  switch (output()->element_type())
-  {
-    case DataType::S8:
-      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
-                                  getTensorShape(output()), getTensorData<int8_t>(output()));
-      break;
-    case DataType::S16:
-      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
-                                  getTensorShape(output()), getTensorData<int16_t>(output()));
-      break;
-    case DataType::S32:
-      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
-                                  getTensorShape(output()), getTensorData<int32_t>(output()));
-      break;
-    case DataType::S64:
-      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
-                                  getTensorShape(output()), getTensorData<int64_t>(output()));
-      break;
-    case DataType::FLOAT32:
-      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
-                                  getTensorShape(output()), getTensorData<float>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp
deleted file mode 100644 (file)
index e3c4246..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Floor.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/floor.h>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Floor::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  output()->resize(input()->shape());
-}
-
-void Floor::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Floor::evalFloat() const
-{
-  tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
-                               getTensorShape(output()), getTensorData<float>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp
deleted file mode 100644 (file)
index a7a10a3..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/FloorDiv.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
-#include <cmath>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
-  : Kernel({input, alpha}, {output})
-{
-}
-
-void FloorDiv::configure()
-{
-  LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
-  LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
-
-  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void FloorDiv::execute() const
-{
-  switch (x()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void FloorDiv::evalFloat() const
-{
-  auto FloorDivFunc = [](float x, float y) -> float {
-    return std::floor(static_cast<double>(x) / static_cast<double>(y));
-  };
-
-  const auto x_data = getTensorData<float>(x());
-  const auto y_data = getTensorData<float>(y());
-
-  // Check the denominator
-  for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
-  {
-    LUCI_INTERPRETER_CHECK(y_data[i] != 0);
-  }
-
-  if (x()->shape() != y()->shape())
-  {
-    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
-      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
-      getTensorData<float>(output()), FloorDivFunc);
-  }
-  else
-  {
-    tflite::reference_ops::BinaryFunction<float, float, float>(
-      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
-      getTensorData<float>(output()), FloorDivFunc);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp
deleted file mode 100644 (file)
index bd2bb2f..0000000
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/FullyConnected.h"
-
-#include "kernels/Utils.h"
-
-#include "PALFullyConnected.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
-                               Tensor *output, const FullyConnectedParams &params)
-  : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
-{
-}
-
-void FullyConnected::configure()
-{
-  if (weights()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8);
-    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8);
-    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
-  }
-  else if (weights()->element_type() == DataType::FLOAT32)
-  {
-    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
-    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
-    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
-  }
-  else if (weights()->element_type() == DataType::S8)
-  {
-    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8);
-    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
-    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported type.");
-  }
-
-  const Shape &input_shape = input()->shape();
-  const Shape &weights_shape = weights()->shape();
-
-  LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2);
-  LUCI_INTERPRETER_CHECK(bias() == nullptr ||
-                         bias()->shape().num_elements() == weights_shape.dim(0));
-
-  LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0);
-  const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1);
-  const int32_t num_units = weights_shape.dim(0);
-
-  if (bias())
-    LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
-
-  if (params().keep_num_dims == false)
-  {
-    output()->resize({batch_size, num_units});
-  }
-  else
-  {
-    luci_interpreter::Shape output_shape(input_shape.num_dims());
-    for (int i = 0; i < input_shape.num_dims(); ++i)
-      output_shape.dim(i) = input_shape.dim(i);
-    output_shape.dim(input_shape.num_dims() - 1) = num_units;
-    output()->resize(output_shape);
-  }
-}
-
-void FullyConnected::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::U8:
-      evalQuantized();
-      break;
-    case DataType::S8:
-      evalQuantizedS8();
-      break;
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void FullyConnected::evalFloat() const
-{
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
-  tflite::FullyConnectedParams params{};
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
-  params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
-
-  tflite::reference_ops::FullyConnected(
-    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
-    getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
-    getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void FullyConnected::evalQuantized() const
-{
-  double real_multiplier = 0.0;
-  int output_shift;
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-  int32_t output_multiplier;
-  real_multiplier =
-    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
-  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
-                                    &output_activation_max);
-
-  int32_t input_offset = -input()->zero_point();
-  int32_t filter_offset = -weights()->zero_point();
-  int32_t output_offset = output()->zero_point();
-
-  tflite::FullyConnectedParams op_params{};
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.lhs_cacheable = false;
-  op_params.rhs_cacheable = false;
-  tflite::reference_ops::FullyConnected(
-    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()),
-    getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-    getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-void FullyConnected::evalQuantizedS8() const
-{
-  double real_multiplier = 0.0;
-  int output_shift;
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-  int32_t output_multiplier;
-  real_multiplier =
-    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
-  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
-                                    &output_activation_max);
-
-  int32_t input_offset = -input()->zero_point();
-  int32_t filter_offset = -weights()->zero_point();
-  int32_t output_offset = output()->zero_point();
-
-  tflite::FullyConnectedParams op_params{};
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = output_multiplier;
-  op_params.output_shift = output_shift;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.lhs_cacheable = false;
-  op_params.rhs_cacheable = false;
-  luci_interpreter_pal::FullyConnected<int8_t>(
-    op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
-    getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-    getTensorShape(output()), getTensorData<int8_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h
deleted file mode 100644 (file)
index 2a7c068..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
-#define LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class FullyConnected : public KernelWithParams<FullyConnectedParams>
-{
-public:
-  FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output,
-                 const FullyConnectedParams &params);
-
-  const Tensor *input() const { return _inputs[0]; }
-  const Tensor *weights() const { return _inputs[1]; }
-  const Tensor *bias() const { return _inputs[2]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-
-private:
-  void evalFloat() const;
-  void evalQuantized() const;
-  void evalQuantizedS8() const;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
deleted file mode 100644 (file)
index 4474cc4..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/FullyConnected.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-template <typename T>
-void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
-           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
-           std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
-           std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
-  Tensor weights_tensor =
-    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  FullyConnectedParams params{};
-  params.activation = Activation::RELU;
-
-  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
-  kernel.configure();
-  memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
-}
-
-template <>
-void Check<int8_t>(std::initializer_list<int32_t> input_shape,
-                   std::initializer_list<int32_t> weights_shape,
-                   std::initializer_list<int32_t> bias_shape,
-                   std::initializer_list<int32_t> output_shape,
-                   std::initializer_list<float> input_data,
-                   std::initializer_list<float> weights_data,
-                   std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-  const float quantized_tolerance = getTolerance(-127, 128, 255);
-  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
-  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
-  Tensor input_tensor =
-    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
-                                  input_data, memory_manager.get());
-  Tensor weights_tensor =
-    makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
-                                  weights_data, memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
-                                   bias_data, memory_manager.get());
-  Tensor output_tensor =
-    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
-
-  FullyConnectedParams params{};
-  params.activation = Activation::RELU;
-
-  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
-  kernel.configure();
-  memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-  EXPECT_THAT(dequantizeTensorData(output_tensor),
-              FloatArrayNear(output_data, quantized_tolerance));
-}
-
-template <>
-void Check<uint8_t>(
-  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
-  std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
-  std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
-  std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-  const float quantized_tolerance = getTolerance(-127, 128, 255);
-  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
-  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
-                                  input_data, memory_manager.get());
-  Tensor weights_tensor =
-    makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
-                                  weights_data, memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
-                                   bias_data, memory_manager.get());
-  Tensor output_tensor =
-    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-
-  FullyConnectedParams params{};
-  params.activation = Activation::RELU;
-
-  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
-  kernel.configure();
-  memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-  EXPECT_THAT(dequantizeTensorData(output_tensor),
-              FloatArrayNear(output_data, quantized_tolerance));
-}
-
-template <typename T> class FullyConnectedTest : public ::testing::Test
-{
-};
-
-using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
-
-TYPED_TEST(FullyConnectedTest, Simple)
-{
-  Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
-                   {
-                     -3, -5, 5, 4, 9, -2,  // batch = 0
-                     -3, -2, -4, 9, -8, 1, // batch = 1
-                   },
-                   {
-                     -3, -7, 4, -4, -6, 4, // unit = 0
-                     3, 5, 2, 3, -3, -8,   // unit = 1
-                     -3, 7, 4, 9, 0, -5,   // unit = 2
-                   },
-                   {-1, -5, -8},
-                   {
-                     0, 0, 32,   // batch = 0
-                     22, 11, 47, // batch = 1
-                   });
-}
-
-TEST(FullyConnectedTest, InvalidBiasType_NEG)
-{
-  Shape input_shape{3, 2, 2, 1};
-  std::vector<float> input_data{
-    -3, -5, 5,  4, 9,  -2, // batch = 0
-    -3, -2, -4, 9, -8, 1,  // batch = 1
-  };
-  Shape weights_shape{3, 6};
-  std::vector<float> weights_data{
-    -3, -7, 4, -4, -6, 4,  // unit = 0
-    3,  5,  2, 3,  -3, -8, // unit = 1
-    -3, 7,  4, 9,  0,  -5, // unit = 2
-  };
-  Shape bias_shape{3};
-  std::vector<int32_t> bias_data{-1, -5, -8};
-
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
-  Tensor weights_tensor =
-    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
-  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  FullyConnectedParams params{};
-  params.activation = Activation::RELU;
-
-  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
-{
-  Shape input_shape{3, 2, 2, 1};
-  std::vector<float> input_data{
-    -3, -5, 5,  4, 9,  -2, // batch = 0
-    -3, -2, -4, 9, -8, 1,  // batch = 1
-  };
-  Shape weights_shape{1, 3, 6};
-  std::vector<float> weights_data{
-    -3, -7, 4, -4, -6, 4,  // unit = 0
-    3,  5,  2, 3,  -3, -8, // unit = 1
-    -3, 7,  4, 9,  0,  -5, // unit = 2
-  };
-  Shape bias_shape{3};
-  std::vector<float> bias_data{-1, -5, -8};
-
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
-  Tensor weights_tensor =
-    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  FullyConnectedParams params{};
-  params.activation = Activation::RELU;
-
-  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
-{
-  Shape input_shape{3, 2, 2, 1};
-  std::vector<float> input_data{
-    -3, -5, 5,  4, 9,  -2, // batch = 0
-    -3, -2, -4, 9, -8, 1,  // batch = 1
-  };
-  Shape weights_shape{6, 3};
-  std::vector<float> weights_data{
-    -3, -7, 4,  // unit = 0
-    -4, -6, 4,  // unit = 1
-    3,  5,  2,  // unit = 2
-    3,  -3, -8, // unit = 3
-    -3, 7,  4,  // unit = 4
-    9,  0,  -5, // unit = 5
-  };
-  Shape bias_shape{3};
-  std::vector<float> bias_data{-1, -5, -8};
-
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
-  Tensor weights_tensor =
-    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
-  Tensor bias_tensor =
-    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  FullyConnectedParams params{};
-  params.activation = Activation::RELU;
-
-  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp
deleted file mode 100644 (file)
index f125666..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Gather.h"
-#include "kernels/Utils.h"
-#include "PALGather.h"
-
-#include <stdexcept>
-#include <cassert>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
-               const GatherParams &gparams)
-  : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
-{
-}
-
-void Gather::configure()
-{
-  if (params()->element_type() == DataType::FLOAT32)
-  {
-    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported type.");
-  }
-
-  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
-                         indices()->element_type() == DataType::S64);
-
-  // refer tensorflow/lite/kernels/gather.cc
-
-  const Shape &params_shape = params()->shape();
-  const Shape &indices_shape = indices()->shape();
-
-  int axis = _params.axis;
-  if (axis < 0)
-  {
-    axis += params_shape.num_dims();
-  }
-  LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
-
-  int batch_dims = _params.batch_dims;
-  // batch_dims should be in range: [-rank(indices), rank(indices)].
-  // Negative batch_dims is added with rank of positions.
-  if (batch_dims < 0)
-  {
-    batch_dims += indices_shape.num_dims();
-  }
-  LUCI_INTERPRETER_CHECK(batch_dims <= axis);
-  LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
-  LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
-  for (int i = 0; i < batch_dims; ++i)
-  {
-    LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
-  }
-
-  const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
-
-  Shape output_shape(num_dimensions);
-  int output_index = 0;
-  for (int i = 0; i < axis; ++i)
-  {
-    output_shape.dim(output_index++) = params_shape.dim(i);
-  }
-  for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
-  {
-    output_shape.dim(output_index++) = indices_shape.dim(i);
-  }
-  for (int i = axis + 1; i < params_shape.num_dims(); ++i)
-  {
-    output_shape.dim(output_index++) = params_shape.dim(i);
-  }
-  output()->resize(output_shape);
-}
-
-void Gather::execute() const
-{
-  switch (params()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Gather::evalFloat() const
-{
-  assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
-
-  const auto params_data = getTensorData<float>(params());
-  auto output_data = getTensorData<float>(output());
-
-  tflite::GatherParams tparams;
-  tparams.axis = _params.axis;
-  tparams.batch_dims = _params.batch_dims;
-
-  if (indices()->element_type() == DataType::S32)
-  {
-    const auto indices_data = getTensorData<int32_t>(indices());
-
-    luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
-                                                 getTensorShape(indices()), indices_data,
-                                                 getTensorShape(output()), output_data);
-  }
-  else
-  {
-    const auto indices_data = getTensorData<int64_t>(indices());
-
-    luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
-                                                 getTensorShape(indices()), indices_data,
-                                                 getTensorShape(output()), output_data);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp
deleted file mode 100644 (file)
index 5ccae3c..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Greater.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void Greater::configure()
-{
-  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
-  if (x()->element_type() == DataType::U8)
-  {
-    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
-    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
-  }
-  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void Greater::execute() const
-{
-  switch (x()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Greater::evalFloat() const
-{
-  const auto x_data = getTensorData<float>(x());
-  const auto y_data = getTensorData<float>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
-                                                  getTensorShape(y()), y_data,
-                                                  getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
-                                   y_data, getTensorShape(output()), output_data);
-  }
-}
-
-template <typename T> void Greater::evalInteger() const
-{
-  const auto x_data = getTensorData<T>(x());
-  const auto y_data = getTensorData<T>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
-                                                           getTensorShape(y()), y_data,
-                                                           getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
-                                            getTensorShape(y()), y_data, getTensorShape(output()),
-                                            output_data);
-  }
-}
-
-void Greater::evalQuantized() const
-{
-  const auto x_data = getTensorData<uint8_t>(x());
-  const auto y_data = getTensorData<uint8_t>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.left_shift = 8;
-  op_params.input1_offset = -x()->zero_point(); // Note the '-'
-  op_params.input1_shift = _x_shift;
-  op_params.input1_multiplier = _x_multiplier;
-  op_params.input2_offset = -y()->zero_point(); // Note the '-'
-  op_params.input2_shift = _y_shift;
-  op_params.input2_multiplier = _y_multiplier;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
-                                                             getTensorShape(y()), y_data,
-                                                             getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
-                                              getTensorShape(y()), y_data, getTensorShape(output()),
-                                              output_data);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
deleted file mode 100644 (file)
index 27e42c9..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/GreaterEqual.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
-  : Kernel({x, y}, {output})
-{
-}
-
-void GreaterEqual::configure()
-{
-  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
-  if (x()->element_type() == DataType::U8)
-  {
-    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
-    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
-  }
-  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void GreaterEqual::execute() const
-{
-  switch (x()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void GreaterEqual::evalFloat() const
-{
-  const auto x_data = getTensorData<float>(x());
-  const auto y_data = getTensorData<float>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
-                                                       getTensorShape(y()), y_data,
-                                                       getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
-                                        y_data, getTensorShape(output()), output_data);
-  }
-}
-
-template <typename T> void GreaterEqual::evalInteger() const
-{
-  const auto x_data = getTensorData<T>(x());
-  const auto y_data = getTensorData<T>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
-      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
-      output_data);
-  }
-  else
-  {
-    tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
-                                                 getTensorShape(y()), y_data,
-                                                 getTensorShape(output()), output_data);
-  }
-}
-
-void GreaterEqual::evalQuantized() const
-{
-  const auto x_data = getTensorData<uint8_t>(x());
-  const auto y_data = getTensorData<uint8_t>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.left_shift = 8;
-  op_params.input1_offset = -x()->zero_point(); // Note the '-'
-  op_params.input1_shift = _x_shift;
-  op_params.input1_multiplier = _x_multiplier;
-  op_params.input2_offset = -y()->zero_point(); // Note the '-'
-  op_params.input2_shift = _y_shift;
-  op_params.input2_multiplier = _y_multiplier;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
-      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
-      output_data);
-  }
-  else
-  {
-    tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
-                                                   getTensorShape(y()), y_data,
-                                                   getTensorShape(output()), output_data);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
deleted file mode 100644 (file)
index 22a329b..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/InstanceNorm.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/common.h>
-#include <cmath>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
-                           Tensor *output, const InstanceNormParams &params)
-  : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
-{
-}
-
-void InstanceNorm::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
-  LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
-  LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
-                         gamma()->shape().dim(0) == 1);
-  LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
-  LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
-  LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
-                         beta()->shape().dim(0) == 1);
-  output()->resize(input()->shape());
-}
-
-void InstanceNorm::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void InstanceNorm::evalFloat() const
-{
-  float activation_min, activation_max;
-  calculateActivationRange(params().activation, &activation_min, &activation_max);
-  auto input_shape = getTensorShape(input());
-  auto output_shape = getTensorShape(output());
-  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
-  const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
-  const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
-  const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
-  const float *input_data = getTensorData<float>(input());
-  const float *gamma_data = getTensorData<float>(gamma());
-  auto gamma_shape = getTensorShape(gamma());
-  bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
-  const float *beta_data = getTensorData<float>(beta());
-  auto beta_shape = getTensorShape(beta());
-  bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
-  float *output_data = getTensorData<float>(output());
-  for (int32_t batch = 0; batch < batches; batch++)
-  {
-    for (int32_t channel = 0; channel < channels; channel++)
-    {
-      double sum = 0.0f;
-      double square_sum = 0.0f;
-      int32_t size = heights * widths;
-      for (int32_t height = 0; height < heights; height++)
-      {
-        for (int32_t width = 0; width < widths; width++)
-        {
-          double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
-          sum += input_val;
-          square_sum += (input_val * input_val);
-        }
-      }
-      double mean = sum / size;
-      double var = square_sum / size - mean * mean;
-
-      double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
-      double beta = single_beta ? beta_data[0] : beta_data[channel];
-      double a = gamma / (std::sqrt(var + params().epsilon));
-      double b = -mean * a + beta;
-
-      for (int32_t height = 0; height < heights; height++)
-      {
-        for (int32_t width = 0; width < widths; width++)
-        {
-          double input_value =
-            input_data[tflite::Offset(output_shape, batch, height, width, channel)];
-          double output_value = input_value * a + b;
-          output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
-            tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
-                                                 activation_max);
-        }
-      }
-    }
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp
deleted file mode 100644 (file)
index 6422295..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/L2Normalize.h"
-#include "kernels/Utils.h"
-
-#include "PALL2Normalize.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
-  : KernelWithParams<L2NormParams>({input}, {output}, params)
-{
-}
-
-void L2Normalize::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
-                         output()->element_type() == DataType::U8);
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  if (output()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
-    LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
-  }
-  LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
-  output()->resize(input()->shape());
-}
-
-void L2Normalize::execute() const
-{
-  switch (output()->element_type())
-  {
-    case DataType::FLOAT32:
-      eval<float>(0);
-      break;
-    case DataType::U8:
-      eval<uint8_t>(input()->zero_point());
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T> void L2Normalize::eval(int32_t zero_point) const
-{
-  tflite::L2NormalizationParams op_params{};
-  op_params.input_zero_point = zero_point;
-  luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
-                                        getTensorData<T>(input()), getTensorShape(output()),
-                                        getTensorData<T>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
deleted file mode 100644 (file)
index 5a88808..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/L2Pool2D.h"
-
-#include "kernels/Utils.h"
-
-#include "PALL2Pool2D.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-  : KernelWithParams<Pool2DParams>({input}, {output}, params)
-{
-}
-
-void L2Pool2D::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
-  int batches = input()->shape().dim(0);
-  int height = input()->shape().dim(1);
-  int width = input()->shape().dim(2);
-  int channels_out = input()->shape().dim(3);
-
-  // Matching GetWindowedOutputSize in TensorFlow.
-  auto padding = params().padding;
-  int out_width, out_height;
-  out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
-  out_height =
-    computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
-  _padding_width =
-    computePadding(params().stride_width, 1, width, params().filter_width, out_width);
-  _padding_height =
-    computePadding(params().stride_height, 1, height, params().filter_height, out_height);
-
-  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
-  output()->resize({batches, out_height, out_width, channels_out});
-}
-
-void L2Pool2D::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      float activation_min, activation_max;
-      calculateActivationRange(params().activation, &activation_min, &activation_max);
-      tflite::PoolParams op_params;
-      op_params.stride_height = params().stride_height;
-      op_params.stride_width = params().stride_width;
-      op_params.filter_height = params().filter_height;
-      op_params.filter_width = params().filter_width;
-      op_params.padding_values.height = _padding_height;
-      op_params.padding_values.width = _padding_width;
-      op_params.float_activation_min = activation_min;
-      op_params.float_activation_max = activation_max;
-      luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
-                                   getTensorData<float>(input()), getTensorShape(output()),
-                                   getTensorData<float>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
deleted file mode 100644 (file)
index 3833a55..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LeakyRelu.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
-
-#include "PALLeakyRelu.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params)
-  : KernelWithParams<LeakyReluParams>({input}, {output}, params)
-{
-}
-
-void LeakyRelu::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  if (input()->element_type() == DataType::U8)
-  {
-    double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
-    quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
-    double identity_multiplier = input()->scale() / output()->scale();
-    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
-  }
-  output()->resize(input()->shape());
-}
-
-void LeakyRelu::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void LeakyRelu::evalFloat() const
-{
-  tflite::LeakyReluParams op_params{};
-  op_params.alpha = params().alpha;
-  luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
-                                  getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void LeakyRelu::evalQuantized() const
-{
-  tflite::LeakyReluParams op_params{};
-  op_params.input_offset = input()->zero_point();
-  op_params.output_offset = output()->zero_point();
-  op_params.output_multiplier_alpha = _output_multiplier_alpha;
-  op_params.output_shift_alpha = _output_shift_alpha;
-  op_params.output_multiplier_identity = _output_multiplier_identity;
-  op_params.output_shift_identity = _output_shift_identity;
-
-  tflite::reference_ops::QuantizeLeakyRelu(
-    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
-    getTensorData<uint8_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp
deleted file mode 100644 (file)
index 8d26ff2..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Less.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void Less::configure()
-{
-  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
-  if (x()->element_type() == DataType::U8)
-  {
-    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
-    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
-  }
-  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void Less::execute() const
-{
-  switch (x()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Less::evalFloat() const
-{
-  const auto x_data = getTensorData<float>(x());
-  const auto y_data = getTensorData<float>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
-                                               getTensorShape(y()), y_data,
-                                               getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
-                                getTensorShape(output()), output_data);
-  }
-}
-
-template <typename T> void Less::evalInteger() const
-{
-  const auto x_data = getTensorData<T>(x());
-  const auto y_data = getTensorData<T>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
-                                                        getTensorShape(y()), y_data,
-                                                        getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
-                                         getTensorShape(y()), y_data, getTensorShape(output()),
-                                         output_data);
-  }
-}
-
-void Less::evalQuantized() const
-{
-  const auto x_data = getTensorData<uint8_t>(x());
-  const auto y_data = getTensorData<uint8_t>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.left_shift = 8;
-  op_params.input1_offset = -x()->zero_point(); // Note the '-'
-  op_params.input1_shift = _x_shift;
-  op_params.input1_multiplier = _x_multiplier;
-  op_params.input2_offset = -y()->zero_point(); // Note the '-'
-  op_params.input2_shift = _y_shift;
-  op_params.input2_multiplier = _y_multiplier;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
-                                                          getTensorShape(y()), y_data,
-                                                          getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
-                                           getTensorShape(y()), y_data, getTensorShape(output()),
-                                           output_data);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp
deleted file mode 100644 (file)
index b474bc4..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LessEqual.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void LessEqual::configure()
-{
-  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
-  if (x()->element_type() == DataType::U8)
-  {
-    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
-    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
-  }
-  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void LessEqual::execute() const
-{
-  switch (x()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void LessEqual::evalFloat() const
-{
-  const auto x_data = getTensorData<float>(x());
-  const auto y_data = getTensorData<float>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
-                                                    getTensorShape(y()), y_data,
-                                                    getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
-                                     y_data, getTensorShape(output()), output_data);
-  }
-}
-
-template <typename T> void LessEqual::evalInteger() const
-{
-  const auto x_data = getTensorData<T>(x());
-  const auto y_data = getTensorData<T>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
-                                                             getTensorShape(y()), y_data,
-                                                             getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
-                                              getTensorShape(y()), y_data, getTensorShape(output()),
-                                              output_data);
-  }
-}
-
-void LessEqual::evalQuantized() const
-{
-  const auto x_data = getTensorData<uint8_t>(x());
-  const auto y_data = getTensorData<uint8_t>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.left_shift = 8;
-  op_params.input1_offset = -x()->zero_point(); // Note the '-'
-  op_params.input1_shift = _x_shift;
-  op_params.input1_multiplier = _x_multiplier;
-  op_params.input2_offset = -y()->zero_point(); // Note the '-'
-  op_params.input2_shift = _y_shift;
-  op_params.input2_multiplier = _y_multiplier;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
-      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
-      output_data);
-  }
-  else
-  {
-    tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
-                                                getTensorShape(y()), y_data,
-                                                getTensorShape(output()), output_data);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
deleted file mode 100644 (file)
index a2bf442..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LocalResponseNormalization.h"
-
-#include "kernels/Utils.h"
-
-#include "PALLocalResponseNormalization.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-LocalResponseNormalization::LocalResponseNormalization(
-  const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
-  : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
-{
-}
-
-void LocalResponseNormalization::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  output()->resize(input()->shape());
-}
-
-void LocalResponseNormalization::execute() const
-{
-  switch (output()->element_type())
-  {
-    case DataType::FLOAT32:
-      tflite::LocalResponseNormalizationParams op_params;
-      op_params.range = params().radius;
-      op_params.bias = params().bias;
-      op_params.alpha = params().alpha;
-      op_params.beta = params().beta;
-      luci_interpreter_pal::LocalResponseNormalization(
-        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
-        getTensorData<float>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
deleted file mode 100644 (file)
index 79c3153..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LogSoftmax.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
-
-#include "PALLogSoftmax.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void LogSoftmax::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  if (input()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
-    LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
-
-    tflite::SoftmaxParams params{};
-
-    params.table = _table;
-    params.beta = 1.0;
-    luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
-  }
-  output()->resize(input()->shape());
-}
-
-void LogSoftmax::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void LogSoftmax::evalFloat() const
-{
-  tflite::SoftmaxParams params{};
-  tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
-                                    getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void LogSoftmax::evalQuantized() const
-{
-  const auto input_shape = getTensorShape(input());
-  const auto output_shape = getTensorShape(output());
-  const auto input_scale = input()->scale();
-  uint8_t *output_data = getTensorData<uint8_t>(output());
-  const uint8_t *input_data = getTensorData<uint8_t>(input());
-  const float beta = 1.0;
-
-  tflite::SoftmaxParams params{};
-
-  params.table = const_cast<float *>(_table);
-  params.zero_point = output()->zero_point();
-  params.scale = output()->scale();
-
-  luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
-  luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
-                                   output_data);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
deleted file mode 100644 (file)
index 8e72632..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LogicalAnd.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
-  : Kernel({input1, input2}, {output})
-{
-}
-
-void LogicalAnd::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void LogicalAnd::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::BOOL:
-      evalLogicalAnd();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-inline void LogicalAnd::evalLogicalAnd() const
-{
-  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
-                        getTensorShape(input2()), getTensorData<bool>(input2()),
-                        getTensorShape(output()), getTensorData<bool>(output()),
-                        [](bool x, bool y) { return x && y; });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp
deleted file mode 100644 (file)
index 65ab961..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LogicalNot.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void LogicalNot::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  output()->resize(input()->shape());
-}
-
-void LogicalNot::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::BOOL:
-      evalLogicalNot();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-inline void LogicalNot::evalLogicalNot() const
-{
-  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
-  bool *output_data = getTensorData<bool>(output());
-  const bool *input_data = getTensorData<bool>(input());
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = !input_data[i];
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp
deleted file mode 100644 (file)
index f289ca6..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LogicalOr.h"
-
-#include "kernels/Utils.h"
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
-  : Kernel({input1, input2}, {output})
-{
-}
-
-void LogicalOr::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL);
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void LogicalOr::execute() const
-{
-  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
-                        getTensorShape(input2()), getTensorData<bool>(input2()),
-                        getTensorShape(output()), getTensorData<bool>(output()),
-                        [](bool x, bool y) { return x || y; });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp
deleted file mode 100644 (file)
index 58e4f18..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Logistic.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/logistic.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Logistic::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  if (input()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256);
-    populateLookupTable();
-  }
-  output()->resize(input()->shape());
-}
-
-void Logistic::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Logistic::evalFloat() const
-{
-  tflite::reference_ops::Logistic(getTensorShape(input()), getTensorData<float>(input()),
-                                  getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void Logistic::evalQuantized() const
-{
-  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
-  uint8_t *output_data = getTensorData<uint8_t>(output());
-  const uint8_t *input_data = getTensorData<uint8_t>(input());
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = getTableValue(input_data[i]);
-  }
-}
-
-void Logistic::populateLookupTable()
-{
-  const auto input_scale = static_cast<double>(input()->scale());
-  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
-  const auto output_scale = static_cast<double>(output()->scale());
-  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
-  const float inverse_scale = 1 / output_scale;
-  int32_t maxval = std::numeric_limits<uint8_t>::max();
-  int32_t minval = std::numeric_limits<uint8_t>::min();
-  for (int32_t val = minval; val <= maxval; ++val)
-  {
-    const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
-    const float rescaled = std::round(transformed * inverse_scale);
-    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
-    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
-                  static_cast<uint8_t>(val));
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h
deleted file mode 100644 (file)
index 31de6ad..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_LOGISTIC_H
-#define LUCI_INTERPRETER_KERNELS_LOGISTIC_H
-
-#include "core/Kernel.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Logistic : public Kernel
-{
-public:
-  Logistic(const Tensor *input, Tensor *output);
-
-  const Tensor *input() const { return _inputs[0]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-
-private:
-  void evalFloat() const;
-  void evalQuantized() const;
-  void populateLookupTable();
-  void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
-  uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
-
-private:
-  uint8_t _table[256]{};
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_LOGISTIC_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp
deleted file mode 100644 (file)
index 5a1ea66..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Logistic.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-template <typename T>
-void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
-           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
-  Tensor input_tensor =
-    makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(getElementType<T>());
-
-  Logistic kernel(&input_tensor, &output_tensor);
-  kernel.configure();
-  memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-}
-
-template <>
-void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
-                    std::initializer_list<int32_t> output_shape,
-                    std::initializer_list<float> input_data,
-                    std::initializer_list<float> output_data)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
-  std::pair<float, int32_t> input_quant_param =
-    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
-                                  input_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
-
-  Logistic kernel(&input_tensor, &output_tensor);
-  kernel.configure();
-  memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(dequantizeTensorData(output_tensor),
-              FloatArrayNear(output_data, output_tensor.scale() * 2));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-}
-
-template <typename T> class LogisticTest : public ::testing::Test
-{
-};
-
-using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_SUITE(LogisticTest, DataTypes);
-
-TYPED_TEST(LogisticTest, Simple)
-{
-  Check<TypeParam>(
-    {89}, {89},
-    {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
-     -8.6363636364,  -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
-     -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
-     -5.9090909091,  -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
-     -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
-     -3.1818181818,  -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
-     -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
-     -0.4545454545,  -0.2272727273, 0.0000000000,  0.2272727273,  0.4545454545,  0.6818181818,
-     0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,  1.8181818182,  2.0454545455,
-     2.2727272727,   2.5000000000,  2.7272727273,  2.9545454545,  3.1818181818,  3.4090909091,
-     3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,  4.5454545455,  4.7727272727,
-     5.0000000000,   5.2272727273,  5.4545454545,  5.6818181818,  5.9090909091,  6.1363636364,
-     6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,  7.2727272727,  7.5000000000,
-     7.7272727273,   7.9545454545,  8.1818181818,  8.4090909091,  8.6363636364,  8.8636363636,
-     9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,  10.0000000000},
-    {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
-     0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
-     0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
-     0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
-     0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
-     0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
-     0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
-     0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
-     0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
-     0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
-     0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
-     0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
-     0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
-     0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
-     0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
-}
-
-TEST(LogisticTest, IvalidInputOutputType_NEG)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
-  Shape input_shape = {1};
-  std::vector<float> input_data{10};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
-
-  Logistic kernel(&input_tensor, &output_tensor);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST(LogisticTest, IvalidQuantParam_NEG)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-  Shape input_shape = {2};
-  std::vector<float> input_data{-10, 10};
-  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
-                                  input_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
-
-  Logistic kernel(&input_tensor, &output_tensor);
-  EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
deleted file mode 100644 (file)
index 8d9760f..0000000
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/MaxPool2D.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
-#include <tensorflow/lite/kernels/internal/reference/pooling.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-  : KernelWithParams<Pool2DParams>({input}, {output}, params)
-{
-}
-
-void MaxPool2D::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  assert(input()->shape().num_dims() == 4);
-  const Shape &input_shape = input()->shape();
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t depth = input_shape.dim(3);
-
-  const int32_t output_height =
-    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
-  const int32_t output_width =
-    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
-
-  _padding_height =
-    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
-  _padding_width =
-    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
-
-  output()->resize({batches, output_height, output_width, depth});
-  if (input()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
-    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
-  }
-  else if (input()->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
-    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
-  }
-}
-
-void MaxPool2D::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    case DataType::S16:
-      evalSInt16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void MaxPool2D::evalFloat() const
-{
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
-  tflite::PoolParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.filter_height = _params.filter_height;
-  params.filter_width = _params.filter_width;
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
-
-  tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<float>(input()),
-                                 getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void MaxPool2D::evalQuantized() const
-{
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::PoolParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.filter_height = _params.filter_height;
-  params.filter_width = _params.filter_width;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                                 getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-void MaxPool2D::evalSInt16() const
-{
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::PoolParams params{};
-  params.padding_values.height = _padding_height;
-  params.padding_values.width = _padding_width;
-  params.stride_height = _params.stride_height;
-  params.stride_width = _params.stride_width;
-  params.filter_height = _params.filter_height;
-  params.filter_width = _params.filter_width;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  tflite::reference_integer_ops::MaxPool(
-    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
-    getTensorShape(output()), getTensorData<int16_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h
deleted file mode 100644 (file)
index bb76663..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
-#define LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class MaxPool2D : public KernelWithParams<Pool2DParams>
-{
-public:
-  MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
-
-  const Tensor *input() const { return _inputs[0]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-
-private:
-  void evalFloat() const;
-  void evalQuantized() const;
-  void evalSInt16() const;
-
-private:
-  int32_t _padding_height{};
-  int32_t _padding_width{};
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
deleted file mode 100644 (file)
index 44f2a22..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/MaxPool2D.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class MaxPool2DTest : public ::testing::Test
-{
-protected:
-  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
-  std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-TEST_F(MaxPool2DTest, Float)
-{
-  Shape input_shape{1, 3, 5, 1};
-  std::vector<float> input_data{
-    1,  -1, 0,  -2, 2,  //
-    -7, -6, -5, -4, -3, //
-    5,  4,  3,  6,  7,  //
-  };
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Pool2DParams params{};
-  params.padding = Padding::VALID;
-  params.filter_height = 2;
-  params.filter_width = 3;
-  params.stride_height = 1;
-  params.stride_width = 2;
-  params.activation = Activation::RELU6;
-
-  MaxPool2D kernel(&input_tensor, &output_tensor, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  std::vector<float> ref_output_data{
-    1, 2, //
-    5, 6, //
-  };
-  std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(MaxPool2DTest, Uint8)
-{
-  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
-  std::vector<float> input_data{
-    0,  -6, 12, 4, //
-    -3, -2, 10, 7, //
-  };
-  Tensor input_tensor = makeInputTensor<DataType::U8>(
-    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-
-  Pool2DParams params{};
-  params.padding = Padding::VALID;
-  params.filter_height = 2;
-  params.filter_width = 2;
-  params.stride_height = 2;
-  params.stride_width = 2;
-  params.activation = Activation::RELU6;
-
-  MaxPool2D kernel(&input_tensor, &output_tensor, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  std::vector<float> ref_output_data{0.0, 6.0};
-  std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
-  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(MaxPool2DTest, SInt16)
-{
-  Shape input_shape{1, 3, 5, 1};
-  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
-  std::vector<float> input_data{
-    1,  -1, 0,  -2, 2,  //
-    -7, -6, -5, -4, -3, //
-    5,  4,  3,  6,  7,  //
-  };
-  std::vector<float> ref_output_data{
-    1, 2, //
-    5, 6, //
-  };
-
-  Tensor input_tensor =
-    makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
-
-  Pool2DParams params{};
-  params.padding = Padding::VALID;
-  params.filter_height = 2;
-  params.filter_width = 3;
-  params.stride_height = 1;
-  params.stride_width = 2;
-  params.activation = Activation::RELU6;
-
-  MaxPool2D kernel(&input_tensor, &output_tensor, params);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp
deleted file mode 100644 (file)
index b102b5e..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Maximum.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
-  : Kernel({input1, input2}, {output})
-{
-}
-
-void Maximum::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Maximum::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalMaximum<float>();
-      break;
-    case DataType::U8:
-      evalMaximum<uint8_t>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T> inline void Maximum::evalMaximum() const
-{
-  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
-                        getTensorShape(input2()), getTensorData<T>(input2()),
-                        getTensorShape(output()), getTensorData<T>(output()),
-                        [](T x, T y) { return std::max(x, y); });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp
deleted file mode 100644 (file)
index 8e65e0d..0000000
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Mean.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/reduce.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
-{
-  params->axis_count = num_axes;
-  for (int i = 0; i < num_axes; ++i)
-  {
-    params->axis[i] = static_cast<int16>(axes_data[i]);
-  }
-  for (int i = num_axes; i < 4; ++i)
-  {
-    params->axis[i] = 1;
-  }
-}
-
-// Returns the number of axes that will be reduced. Removes duplicates.
-static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
-{
-  int reduction_count = num_axes;
-  for (int i = 0; i < num_axes; ++i)
-  {
-    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
-    assert(current >= 0 && current < input_num_dims);
-    for (int j = 0; j < i; j++)
-    {
-      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
-      // This checks for duplicate axis
-      if (current == previous)
-      {
-        --reduction_count;
-        break;
-      }
-    }
-  }
-  return reduction_count;
-}
-
-static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
-                            bool keep_dims)
-{
-  int input_num_dims = input_shape.num_dims();
-  if (input_num_dims == 0)
-  {
-    return Shape(0);
-  }
-
-  if (keep_dims)
-  {
-    Shape output_shape(input_num_dims);
-    for (int idx = 0; idx < input_num_dims; ++idx)
-    {
-      bool is_axis = false;
-      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
-      {
-        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
-        {
-          is_axis = true;
-          break;
-        }
-      }
-      if (is_axis)
-      {
-        output_shape.dim(idx) = 1;
-      }
-      else
-      {
-        output_shape.dim(idx) = input_shape.dim(idx);
-      }
-    }
-    return output_shape;
-  }
-  else
-  {
-    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
-    Shape output_shape(input_num_dims - num_reduce_axes);
-    int num_skip_axes = 0;
-    for (int idx = 0; idx < input_num_dims; ++idx)
-    {
-      bool is_axis = false;
-      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
-      {
-        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
-        {
-          ++num_skip_axes;
-          is_axis = true;
-          break;
-        }
-      }
-      if (!is_axis)
-      {
-        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
-      }
-    }
-    return output_shape;
-  }
-}
-
-Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
-           Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
-  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
-                                    params)
-{
-}
-
-void Mean::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
-  if (input()->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
-  }
-
-  const Shape &input_shape = input()->shape();
-  int input_num_dims = input_shape.num_dims();
-
-  const auto *axes_data = getTensorData<int32_t>(axes());
-  int num_axes = axes()->shape().num_elements();
-  assert(num_axes <= 4);
-
-  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
-  output()->resize(output_shape);
-
-  tflite::MeanParams params{};
-  resolveAxes(axes_data, num_axes, &params);
-  _need_temporaries = !(
-    _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
-    ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
-  if (_need_temporaries)
-  {
-    auto temp_index = getOutputTensors()[1];
-    auto resolved_axes = getOutputTensors()[2];
-    auto temp_sum = getOutputTensors()[3];
-
-    temp_index->resize(Shape(input_num_dims));
-    resolved_axes->resize(Shape(num_axes));
-    temp_sum->resize(output()->shape());
-  }
-  else
-  {
-    auto temp_index = getOutputTensors()[1];
-    auto resolved_axes = getOutputTensors()[2];
-    auto temp_sum = getOutputTensors()[3];
-
-    temp_index->set_allocatable(false);
-    resolved_axes->set_allocatable(false);
-    temp_sum->set_allocatable(false);
-  }
-}
-
-void Mean::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    case DataType::S16:
-      evalQuantizedS16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Mean::evalFloat() const
-{
-  const Shape &input_shape = input()->shape();
-  int input_num_dims = input_shape.num_dims();
-  const auto *axes_data = getTensorData<int32_t>(axes());
-  int num_axes = axes()->shape().num_elements();
-
-  tflite::MeanParams params{};
-  resolveAxes(axes_data, num_axes, &params);
-
-  auto temp_index = getOutputTensors()[1];
-  auto resolved_axes = getOutputTensors()[2];
-  auto temp_sum = getOutputTensors()[3];
-
-  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
-      ((params.axis[0] == 1 && params.axis[1] == 2) ||
-       (params.axis[0] == 2 && params.axis[1] == 1)))
-  {
-    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
-                                getTensorShape(output()), getTensorData<float>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
-                                input()->shape().num_dims(), getTensorData<float>(output()),
-                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
-                                axes_data, num_axes, _params.keep_dims,
-                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
-                                getTensorData<float>(temp_sum));
-  }
-}
-
-void Mean::evalQuantized() const
-{
-  const Shape &input_shape = input()->shape();
-  int input_num_dims = input_shape.num_dims();
-  const auto *axes_data = getTensorData<int32_t>(axes());
-  int num_axes = axes()->shape().num_elements();
-
-  tflite::MeanParams params{};
-  resolveAxes(axes_data, num_axes, &params);
-
-  auto temp_index = getOutputTensors()[1];
-  auto resolved_axes = getOutputTensors()[2];
-  auto temp_sum = getOutputTensors()[3];
-
-  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
-      ((params.axis[0] == 1 && params.axis[1] == 2) ||
-       (params.axis[0] == 2 && params.axis[1] == 1)))
-  {
-    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                                input()->zero_point(), input()->scale(), getTensorShape(output()),
-                                getTensorData<uint8_t>(output()), output()->zero_point(),
-                                output()->scale());
-  }
-  else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
-  {
-    tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
-                                input()->shape().num_dims(), getTensorData<uint8_t>(output()),
-                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
-                                axes_data, num_axes, _params.keep_dims,
-                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
-                                getTensorData<int>(temp_sum));
-  }
-  else
-  {
-    tflite::reference_ops::QuantizedMeanOrSum<>(
-      getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
-      getTensorShape(input()).DimsData(), input()->shape().num_dims(),
-      getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
-      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-      _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
-      getTensorData<int>(temp_sum),
-      /*compute_sum=*/false);
-  }
-}
-
-void Mean::evalQuantizedS16() const
-{
-  const auto *input_data = getTensorData<int16_t>(input());
-  auto *output_data = getTensorData<int16_t>(output());
-
-  const Shape &input_shape = input()->shape();
-  const Shape &output_shape = output()->shape();
-
-  const auto *axes_data = getTensorData<int32_t>(axes());
-  const int num_axes = axes()->shape().num_elements();
-
-  constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
-  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
-
-  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-  if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
-      ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
-  {
-    const int32_t batches = input_shape.dim(0);
-    const int32_t input_height = input_shape.dim(1);
-    const int32_t input_width = input_shape.dim(2);
-    const int32_t depth = input_shape.dim(3);
-    assert(output_shape.num_dims() == 4);
-    assert(output_shape.dim(0) == batches);
-    assert(output_shape.dim(1) == 1);
-    assert(output_shape.dim(2) == 1);
-    assert(output_shape.dim(3) == depth);
-
-    const double real_multiplier =
-      static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
-
-    int32_t output_multiplier{};
-    int output_shift{};
-    quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
-    const int32_t num_elements_in_axes = input_height * input_width;
-
-    for (int32_t batch = 0; batch < batches; ++batch)
-    {
-      for (int32_t c = 0; c < depth; ++c)
-      {
-        int32_t acc = 0;
-        for (int32_t in_y = 0; in_y < input_height; ++in_y)
-        {
-          for (int32_t in_x = 0; in_x < input_width; ++in_x)
-          {
-            acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
-          }
-        }
-        int32_t scaled_acc =
-          tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-        // Divide by the number of elements rounding to the nearest integer.
-        scaled_acc = scaled_acc > 0
-                       ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
-                       : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
-
-        scaled_acc = std::max(scaled_acc, output_min);
-        scaled_acc = std::min(scaled_acc, output_max);
-
-        output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
-      }
-    }
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported configuration.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp
deleted file mode 100644 (file)
index 5d3dcde..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Minimum.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
-  : Kernel({input1, input2}, {output})
-{
-}
-
-void Minimum::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Minimum::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalMinimum<float>();
-      break;
-    case DataType::U8:
-      evalMinimum<uint8_t>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T> inline void Minimum::evalMinimum() const
-{
-  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
-                        getTensorShape(input2()), getTensorData<T>(input2()),
-                        getTensorShape(output()), getTensorData<T>(output()),
-                        [](T x, T y) { return std::min(x, y); });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp
deleted file mode 100644 (file)
index bae1eac..0000000
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/MirrorPad.h"
-
-#include "kernels/Utils.h"
-
-#include <limits>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
-                     const MirrorPadParams &params)
-  : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
-{
-}
-
-void MirrorPad::configure()
-{
-  const Shape &input_shape = input()->shape();
-  const int num_dims = input_shape.num_dims();
-
-  if (num_dims > 4)
-    throw std::runtime_error("Unsupported number of dimensions.");
-
-  assert(output()->element_type() == input()->element_type());
-  assert(paddings()->element_type() == DataType::S32);
-  // Paddings shape should be [N, 2].
-  assert(paddings()->shape().num_dims() == 2);
-  assert(paddings()->shape().dim(0) == num_dims);
-  assert(paddings()->shape().dim(1) == 2);
-
-  Shape output_shape(num_dims);
-  const auto *paddings_data = getTensorData<int32_t>(paddings());
-  for (int i = 0; i < num_dims; ++i)
-  {
-    const int32_t padding_before = paddings_data[i * 2];
-    const int32_t padding_after = paddings_data[i * 2 + 1];
-    assert(padding_before >= 0 && padding_after >= 0);
-    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
-  }
-
-  output()->resize(output_shape);
-}
-
-template <typename T>
-inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
-                          Tensor &output);
-
-void MirrorPad::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-    {
-      MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
-      break;
-    }
-    case DataType::U8:
-    {
-      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
-      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
-
-      MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
-      break;
-    }
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T>
-inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
-                          Tensor &output)
-{
-  auto const input_dims = input.shape().num_dims();
-  auto const input_data = input.data<T>();
-  auto const paddings_data = paddings.data<int32_t>();
-  auto const output_data = output.data<T>();
-
-  auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
-  auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
-  auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
-  auto const input_d = input.shape().dim(input_dims - 1);
-
-  auto const input_h_offset = input_d * input_w;
-  auto const input_b_offset = input_h_offset * input_h;
-
-  auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
-  auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
-  auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
-  auto const output_d = output.shape().dim(input_dims - 1);
-
-  auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
-  auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
-  auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
-  auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
-
-  auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
-  auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
-  auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
-  auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
-
-  const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
-  const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
-                                                                      auto b) {
-    return d + w * input_d + h * input_h_offset + b * input_b_offset;
-  };
-
-  const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
-    bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
-    return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
-  };
-
-  const T *in_ptr = input_data;
-  T *out_ptr = output_data;
-
-  for (int32_t b = 0; b < output_b; ++b)
-  {
-    for (int32_t h = 0; h < output_h; ++h)
-    {
-      for (int32_t w = 0; w < output_w; ++w)
-      {
-        for (int32_t d = 0; d < output_d; ++d)
-        {
-          if (b < left_b_pad || b >= output_b - right_b_pad || //
-              h < left_h_pad || h >= output_h - right_h_pad || //
-              w < left_w_pad || w >= output_w - right_w_pad || //
-              d < left_d_pad || d >= output_d - right_d_pad)
-          {
-            if (mode == MirrorPadMode::REFLECT)
-            {
-              *out_ptr++ = input_data[offset_index(
-                positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
-                positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
-            }
-            else
-            {
-              *out_ptr++ = input_data[offset_index(
-                symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
-                symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
-            }
-          }
-          else
-          {
-            *out_ptr++ = *in_ptr++;
-          }
-        }
-      }
-    }
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp
deleted file mode 100644 (file)
index 531fb4f..0000000
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Mul.h"
-
-#include "kernels/BinaryOpCommon.h"
-#include "kernels/Utils.h"
-
-#include "PALMul.h"
-
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
-  : KernelWithParams<MulParams>({input1, input2}, {output}, params)
-{
-}
-
-void Mul::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
-  LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
-  if (input1()->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
-                           input2()->zero_points().size() == 1)
-    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
-                           output()->zero_point() == 0);
-  }
-
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Mul::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::S16:
-      evalQuantizedS16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Mul::evalFloat() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<float>(params, _params.activation);
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    luci_interpreter_pal::BroadcastMul4DSlow(
-      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
-  }
-  else
-  {
-    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
-                              getTensorShape(input2()), getTensorData<float>(input2()),
-                              getTensorShape(output()), getTensorData<float>(output()));
-  }
-}
-
-template <typename T> void Mul::evalInteger() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<T>(params, _params.activation);
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    luci_interpreter_pal::BroadcastMul4DSlow(
-      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
-      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
-  }
-  else
-  {
-    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
-                              getTensorShape(input2()), getTensorData<T>(input2()),
-                              getTensorShape(output()), getTensorData<T>(output()));
-  }
-}
-
-void Mul::evalQuantizedS16() const
-{
-  const auto input1_scale = static_cast<double>(input1()->scale());
-  const auto input2_scale = static_cast<double>(input2()->scale());
-  const auto output_scale = static_cast<double>(output()->scale());
-
-  const double real_multiplier = input1_scale * input2_scale / output_scale;
-
-  int32_t output_multiplier;
-  int output_shift;
-  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
-                                                                              int16_t input2_val) {
-    int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
-    output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
-    output = std::max(output, activation_min);
-    output = std::min(output, activation_max);
-    return static_cast<int16_t>(output);
-  };
-
-  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
-                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
-                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp
deleted file mode 100644 (file)
index c6fe08a..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Neg.h"
-#include "kernels/Utils.h"
-
-#include "PALNeg.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Neg::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
-  output()->resize(input()->shape());
-}
-
-void Neg::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Neg::evalFloat() const
-{
-  luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
-                               getTensorShape(output()), getTensorData<float>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp
deleted file mode 100644 (file)
index 54e5eee..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/NotEqual.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void NotEqual::configure()
-{
-  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
-  if (x()->element_type() == DataType::U8)
-  {
-    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
-    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
-  }
-  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void NotEqual::execute() const
-{
-  switch (x()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void NotEqual::evalFloat() const
-{
-  const auto x_data = getTensorData<float>(x());
-  const auto y_data = getTensorData<float>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
-                                                   getTensorShape(y()), y_data,
-                                                   getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
-                                    y_data, getTensorShape(output()), output_data);
-  }
-}
-
-template <typename T> void NotEqual::evalInteger() const
-{
-  const auto x_data = getTensorData<T>(x());
-  const auto y_data = getTensorData<T>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
-                                                            getTensorShape(y()), y_data,
-                                                            getTensorShape(output()), output_data);
-  }
-  else
-  {
-    tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
-                                             getTensorShape(y()), y_data, getTensorShape(output()),
-                                             output_data);
-  }
-}
-
-void NotEqual::evalQuantized() const
-{
-  const auto x_data = getTensorData<uint8_t>(x());
-  const auto y_data = getTensorData<uint8_t>(y());
-  auto output_data = getTensorData<bool>(output());
-
-  tflite::ComparisonParams op_params;
-  op_params.left_shift = 8;
-  op_params.input1_offset = -x()->zero_point(); // Note the '-'
-  op_params.input1_shift = _x_shift;
-  op_params.input1_multiplier = _x_multiplier;
-  op_params.input2_offset = -y()->zero_point(); // Note the '-'
-  op_params.input2_shift = _y_shift;
-  op_params.input2_multiplier = _y_multiplier;
-  op_params.is_broadcast = x()->shape() != y()->shape();
-
-  if (op_params.is_broadcast)
-  {
-    tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
-      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
-      output_data);
-  }
-  else
-  {
-    tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
-                                               getTensorShape(y()), y_data,
-                                               getTensorShape(output()), output_data);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp
deleted file mode 100644 (file)
index 4d3e5f2..0000000
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/OneHot.h"
-#include "kernels/Utils.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-namespace
-{
-
-template <typename T>
-void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
-                       const Tensor *off_value_tensor, int32_t depth, int32_t axis,
-                       Tensor *output_tensor)
-{
-  // define input shape and correct axis
-  auto const &input_shape = indices_tensor->shape();
-  axis = axis == -1 ? input_shape.num_dims() : axis;
-
-  // TODO support other integer input types
-  auto const *indices = getTensorData<int32_t>(indices_tensor);
-  auto const on_value = getTensorData<T>(on_value_tensor)[0];
-  auto const off_value = getTensorData<T>(off_value_tensor)[0];
-  auto *output = getTensorData<T>(output_tensor);
-
-  // prefix_dim_size == # of elements before the axis
-  // depth == # of elements per axis
-  // suffix_dim_size == # of elements after the axis
-  auto prefix_dim_size = 1;
-  for (int32_t i = 0; i < axis; ++i)
-  {
-    prefix_dim_size *= input_shape.dim(i);
-  }
-  assert(prefix_dim_size > 0);
-  auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
-
-  // View the indices as a matrix of size:
-  //     prefix_dim_size x suffix_dim_size
-  // View the output as a matrix of size:
-  //     prefix_dim_size x depth x suffix_dim_size
-  // Then the output is:
-  //     output(i, j, k) == (indices(i, k) == j) ? on : off
-  for (int32_t i = 0; i < prefix_dim_size; ++i)
-    for (int32_t j = 0; j < depth; ++j)
-      for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
-        *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
-}
-
-} // namespace
-
-OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
-               const Tensor *off_value, Tensor *output, const OneHotParams &params)
-  : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
-{
-  // Do nothing
-}
-
-void OneHot::configure()
-{
-  // check types
-  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
-  LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
-  LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
-  LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
-
-  // check shape dependent parameters
-  LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
-  LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
-  LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
-  LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
-
-  // define parameters that affect the output shape
-  auto const depth_value = getTensorData<int32_t>(depth())[0];
-  auto const &input_shape = indices()->shape();
-  auto const input_dims = input_shape.num_dims();
-  auto const axis = params().axis == -1 ? input_dims : params().axis;
-
-  // define output shape
-  Shape output_shape(input_shape.num_dims() + 1);
-  {
-    for (int32_t d = 0; d < axis; ++d)
-      output_shape.dim(d) = input_shape.dim(d);
-
-    output_shape.dim(axis) = depth_value;
-
-    for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
-      output_shape.dim(d) = input_shape.dim(d - 1);
-  }
-
-  // reshape output
-  output()->resize(output_shape);
-}
-
-void OneHot::execute() const
-{
-  auto const depth_value = getTensorData<int32_t>(depth())[0];
-  auto const axis = params().axis;
-
-  switch (output()->element_type())
-  {
-    case loco::DataType::FLOAT32:
-      OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
-      break;
-    case loco::DataType::U8:
-      OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
-      break;
-    case loco::DataType::S16:
-      OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
-      break;
-    default:
-      // TODO Support other data types
-      throw std::runtime_error("Not supported, yet!");
-      break;
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp
deleted file mode 100644 (file)
index 5a6b05c..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/PRelu.h"
-
-#include "kernels/BinaryOpCommon.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
-#include <tensorflow/lite/kernels/internal/reference/prelu.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
-  : Kernel({input, alpha}, {output})
-{
-}
-
-PRelu::~PRelu()
-{
-  // Destructor declared to delete vector of alpha quantized data properly
-}
-
-void PRelu::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
-  LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
-  LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
-
-  if (input()->element_type() == DataType::U8)
-  {
-    LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
-    _alpha_multipliers.resize(1);
-    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
-    quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
-                       &_alpha_multipliers[0].shift);
-    double identity_multiplier = input()->scale() / output()->scale();
-    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
-  }
-  else if (input()->element_type() == DataType::S16)
-  {
-    // Common check for correctness of quant params
-    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
-    for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
-    {
-      LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
-    }
-    // PRelu specific checks for CWQ
-    LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
-    LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
-                           alpha()->shape().dim(alpha()->quantized_dimension()));
-    LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
-                           input()->shape().dim(input()->shape().num_dims() - 1));
-
-    // all dimension of alpha except last one should be size 1
-    for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
-    {
-      LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
-    }
-
-    std::vector<double> real_multipliers =
-      getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
-
-    _alpha_multipliers = quantizeMultipliers(real_multipliers);
-
-    double identity_multiplier = input()->scale() / output()->scale();
-    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
-  }
-  output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
-}
-
-void PRelu::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    case DataType::S16:
-      evalQuantizedS16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void PRelu::evalFloat() const
-{
-  const auto input_data = getTensorData<float>(input());
-  const auto alpha_data = getTensorData<float>(alpha());
-  const auto size = getTensorShape(input()).FlatSize();
-  auto output_data = getTensorData<float>(output());
-
-  auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
-
-  if (input()->shape() != alpha()->shape())
-  {
-    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
-      getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
-      getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
-      PReluFunc);
-  }
-  else
-  {
-    for (auto i = decltype(size){0}; i < size; ++i)
-    {
-      if (input_data[i] >= 0)
-        output_data[i] = input_data[i];
-      else
-        output_data[i] = input_data[i] * alpha_data[i];
-    }
-  }
-}
-
-void PRelu::evalQuantized() const
-{
-  tflite::PreluParams op_params{};
-
-  op_params.input_offset = -input()->zero_point(); // Note the '-'.
-  op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
-  op_params.output_offset = output()->zero_point();
-  op_params.output_shift_1 = _output_shift_identity;
-  op_params.output_multiplier_1 = _output_multiplier_identity;
-  op_params.output_shift_2 = _alpha_multipliers[0].shift;
-  op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
-
-  if (input()->shape() != alpha()->shape())
-  {
-    tflite::reference_ops::BroadcastPrelu4DSlow(
-      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
-      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Prelu<uint8_t>(
-      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
-      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-}
-
-static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
-                                       const ChannelQuantMultipliers &identity_mult,
-                                       const ChannelQuantMultipliers &alpha_mult)
-{
-  constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
-  constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
-
-  const int32_t output_val =
-    input_val >= 0
-      ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
-                                              identity_mult.multiplier, identity_mult.shift)
-      : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
-                                              alpha_mult.multiplier, alpha_mult.shift);
-  const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
-  return clamped_output;
-}
-
-void PRelu::evalQuantizedS16() const
-{
-  // Note that this kernel assumes alpha is CWQ
-  tflite::RuntimeShape input_shape = getTensorShape(input());
-  const int16_t *input_data = input()->data<int16_t>();
-  const int16_t *alpha_data = alpha()->data<int16_t>();
-  int16_t *output_data = output()->data<int16_t>();
-
-  const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
-
-  const int last_dim = input()->shape().num_dims() - 1;
-
-  int32_t outer_dims_size = 1;
-  for (int i = 0; i < last_dim; ++i)
-    outer_dims_size *= input_shape.Dims(i);
-  int32_t quant_dim_size = input_shape.Dims(last_dim);
-
-  for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
-    for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
-    {
-      const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
-      size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
-      offset += quant_channel;
-
-      output_data[offset] =
-        evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
-    }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp
deleted file mode 100644 (file)
index 42aab33..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Pack.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params)
-  : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
-{
-}
-
-void Pack::configure()
-{
-  LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
-  const Tensor *t0 = _inputs[0];
-  const int dimension_size = t0->shape().num_dims() + 1;
-  int axis = params().axis;
-  if (axis < 0)
-  {
-    axis += dimension_size;
-  }
-  LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
-
-  if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
-      t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
-      t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
-  {
-    throw std::runtime_error("Unsupported type.");
-  }
-
-  for (uint32_t i = 1; i < _inputs.size(); ++i)
-  {
-    const Tensor *tensor = _inputs[i];
-    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
-    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
-    for (int d = 0; d < t0->shape().num_dims(); ++d)
-    {
-      LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
-    }
-  }
-
-  Shape output_shape(dimension_size);
-  int i = 0;
-  for (int index = 0; index < dimension_size; ++index)
-  {
-    if (index == axis)
-    {
-      output_shape.dim(index) = params().values_count;
-    }
-    else
-    {
-      output_shape.dim(index) = t0->shape().dim(i++);
-    }
-  }
-
-  if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
-      t0->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
-    LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
-    // Guarantee input/output quantization params match as we do not support
-    // packing quantized tensors.
-    for (int i = 0; i < params().values_count; i++)
-    {
-      LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
-      LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
-    }
-  }
-
-  output()->resize(output_shape);
-}
-
-void Pack::execute() const
-{
-  switch (_inputs[0]->element_type())
-  {
-    case DataType::FLOAT32:
-      evalGeneric<float>();
-      break;
-    case DataType::U8:
-      evalGeneric<uint8_t>();
-      break;
-    case DataType::S8:
-      evalGeneric<int8_t>();
-      break;
-    case DataType::S16:
-      evalGeneric<int16_t>();
-      break;
-    case DataType::S32:
-      evalGeneric<int32_t>();
-      break;
-    case DataType::S64:
-      evalGeneric<int64_t>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T> void Pack::evalGeneric() const
-{
-  const Tensor *t0 = _inputs[0];
-  const int dimension_size = t0->shape().num_dims() + 1;
-  int axis = params().axis;
-  if (axis < 0)
-  {
-    axis += dimension_size;
-  }
-
-  VectorOfTensors<T, true> inputs(_inputs);
-  tflite::PackParams params{};
-  params.axis = axis;
-  params.inputs_count = _inputs.size();
-  tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
-                                 getTensorData<T>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp
deleted file mode 100644 (file)
index c07f6e3..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Pad.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/pad.h>
-
-#include <limits>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
-  : Kernel({input, paddings}, {output})
-{
-}
-
-void Pad::configure()
-{
-  const Shape &input_shape = input()->shape();
-  const int num_dims = input_shape.num_dims();
-
-  if (num_dims > 4)
-    throw std::runtime_error("Unsupported number of dimensions.");
-
-  assert(output()->element_type() == input()->element_type());
-  assert(paddings()->element_type() == DataType::S32);
-  // Paddings shape should be [N, 2].
-  assert(paddings()->shape().num_dims() == 2);
-  assert(paddings()->shape().dim(0) == num_dims);
-  assert(paddings()->shape().dim(1) == 2);
-
-  Shape output_shape(num_dims);
-  const auto *paddings_data = getTensorData<int32_t>(paddings());
-  for (int i = 0; i < num_dims; ++i)
-  {
-    const int32_t padding_before = paddings_data[i * 2];
-    const int32_t padding_after = paddings_data[i * 2 + 1];
-    assert(padding_before >= 0 && padding_after >= 0);
-    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
-  }
-
-  output()->resize(output_shape);
-}
-
-void Pad::execute() const
-{
-  const int num_dims = input()->shape().num_dims();
-
-  tflite::PadParams params{};
-  params.left_padding_count = num_dims;
-  params.right_padding_count = num_dims;
-
-  const auto *paddings_data = getTensorData<int32_t>(paddings());
-  for (int i = num_dims - 1; i >= 0; --i)
-  {
-    params.left_padding[i] = paddings_data[i * 2];
-    params.right_padding[i] = paddings_data[i * 2 + 1];
-  }
-
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-    {
-      const float pad_value = 0.0f;
-      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
-                                 &pad_value, getTensorShape(output()),
-                                 getTensorData<float>(output()));
-      break;
-    }
-    case DataType::U8:
-    {
-      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
-      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
-      const auto pad_value = static_cast<uint8_t>(output()->zero_point());
-      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                                 &pad_value, getTensorShape(output()),
-                                 getTensorData<uint8_t>(output()));
-      break;
-    }
-    case DataType::S8:
-    {
-      assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
-      assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
-      const auto pad_value = static_cast<int8_t>(output()->zero_point());
-      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
-                                 &pad_value, getTensorShape(output()),
-                                 getTensorData<int8_t>(output()));
-      break;
-    }
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp
deleted file mode 100644 (file)
index 197cdaa..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/PadV2.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/pad.h>
-
-#include <limits>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values,
-             Tensor *output)
-  : Kernel({input, paddings, constant_values}, {output})
-{
-}
-
-void PadV2::configure()
-{
-  const Shape &input_shape = input()->shape();
-  const int num_dims = input_shape.num_dims();
-
-  if (num_dims > 4)
-    throw std::runtime_error("Unsupported number of dimensions.");
-
-  assert(output()->element_type() == input()->element_type());
-  assert(paddings()->element_type() == DataType::S32);
-  assert(constant_values()->element_type() == output()->element_type());
-  // Paddings shape should be [N, 2].
-  assert(paddings()->shape().num_dims() == 2);
-  assert(paddings()->shape().dim(0) == num_dims);
-  assert(paddings()->shape().dim(1) == 2);
-  // Constant values elements number should be 1.
-  assert(constant_values()->shape().num_elements() == 1);
-
-  Shape output_shape(num_dims);
-  const auto *paddings_data = getTensorData<int32_t>(paddings());
-  for (int i = 0; i < num_dims; ++i)
-  {
-    const int32_t padding_before = paddings_data[i * 2];
-    const int32_t padding_after = paddings_data[i * 2 + 1];
-    assert(padding_before >= 0 && padding_after >= 0);
-    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
-  }
-
-  output()->resize(output_shape);
-}
-
-void PadV2::execute() const
-{
-  const int num_dims = input()->shape().num_dims();
-
-  tflite::PadParams params{};
-  params.left_padding_count = num_dims;
-  params.right_padding_count = num_dims;
-
-  const auto *paddings_data = getTensorData<int32_t>(paddings());
-  for (int i = num_dims - 1; i >= 0; --i)
-  {
-    params.left_padding[i] = paddings_data[i * 2];
-    params.right_padding[i] = paddings_data[i * 2 + 1];
-  }
-
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-    {
-      const auto pad_value = getTensorData<float>(constant_values())[0];
-      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
-                                 &pad_value, getTensorShape(output()),
-                                 getTensorData<float>(output()));
-      break;
-    }
-    case DataType::U8:
-    {
-      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
-      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
-      const auto pad_value = getTensorData<uint8_t>(constant_values())[0];
-      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                                 &pad_value, getTensorShape(output()),
-                                 getTensorData<uint8_t>(output()));
-      break;
-    }
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp
deleted file mode 100644 (file)
index 722c640..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Pow.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
-  : Kernel({input1, input2}, {output})
-{
-}
-
-void Pow::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
-
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Pow::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      eval<float>();
-      break;
-    case DataType::S32:
-      eval<int32_t>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T> void Pow::eval() const
-{
-  tflite::ArithmeticParams params{};
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
-                                              getTensorShape(input2()), getTensorData<T>(input2()),
-                                              getTensorShape(output()), getTensorData<T>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
-                               getTensorShape(input2()), getTensorData<T>(input2()),
-                               getTensorShape(output()), getTensorData<T>(output()));
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp
deleted file mode 100644 (file)
index 0c8544a..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Quantize.h"
-#include "kernels/Utils.h"
-#include "PALQuantize.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-namespace
-{
-
-template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
-{
-  int32_t multiplier;
-  int shift;
-
-  const double effective_output_scale = input->scale() / output->scale();
-  quantizeMultiplier(effective_output_scale, &multiplier, &shift);
-
-  const auto input_shape = getTensorShape(input);
-  const auto output_shape = getTensorShape(output);
-  const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
-
-  const auto input_data = getTensorData<input_dtype>(input);
-
-  switch (output->element_type())
-  {
-    case loco::DataType::S8:
-      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
-                                       output->zero_point(), getTensorData<int8_t>(output));
-      break;
-    case loco::DataType::U8:
-      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
-                                       output->zero_point(), getTensorData<uint8_t>(output));
-      break;
-    case loco::DataType::S16:
-      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
-                                       output->zero_point(), getTensorData<int16_t>(output));
-      break;
-    default:
-      throw std::runtime_error("Unsupported quantized type, yet!");
-  }
-}
-
-} // namespace
-
-Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Quantize::configure()
-{
-
-  if (input()->element_type() == loco::DataType::S16)
-    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
-
-  switch (input()->element_type())
-  {
-    case loco::DataType::FLOAT32:
-    {
-      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
-                             output()->element_type() == loco::DataType::S8 ||
-                             output()->element_type() == loco::DataType::S16);
-      break;
-    }
-    case loco::DataType::S16:
-    case loco::DataType::S8:
-    case loco::DataType::U8:
-    {
-      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
-                             output()->element_type() == loco::DataType::U8 ||
-                             output()->element_type() == loco::DataType::S16);
-      if (output()->element_type() == loco::DataType::S16)
-      {
-        LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
-      }
-      break;
-    }
-    default:
-      throw std::runtime_error("Unsupported type");
-  }
-
-  output()->resize(input()->shape());
-}
-
-void Quantize::execute() const
-{
-  switch (input()->element_type())
-  {
-    case loco::DataType::FLOAT32:
-    {
-      tflite::QuantizationParams op_params;
-      op_params.zero_point = output()->zero_point();
-      op_params.scale = output()->scale();
-      const auto input_data = getTensorData<float>(input());
-
-      switch (output()->element_type())
-      {
-        case loco::DataType::S8:
-        {
-          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
-                                         getTensorShape(output()), getTensorData<int8_t>(output()));
-          break;
-        }
-        case loco::DataType::U8:
-        {
-          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
-                                         getTensorShape(output()),
-                                         getTensorData<uint8_t>(output()));
-          break;
-        }
-        case loco::DataType::S16:
-        {
-          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
-                                         getTensorShape(output()),
-                                         getTensorData<int16_t>(output()));
-          break;
-        }
-        default:
-          throw std::runtime_error("Unsupported type.");
-      }
-      break;
-    }
-    case loco::DataType::S16:
-    {
-      call_requantize<int16_t>(input(), output());
-      break;
-    }
-    case loco::DataType::S8:
-    {
-      call_requantize<int8_t>(input(), output());
-      break;
-    }
-    case loco::DataType::U8:
-    {
-      call_requantize<uint8_t>(input(), output());
-      break;
-    }
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp
deleted file mode 100644 (file)
index 747ec6c..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Relu.h"
-#include "kernels/Utils.h"
-
-#include "PALRelu.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Relu::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  if (input()->element_type() == DataType::S16)
-  {
-    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
-  }
-
-  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
-  {
-    double multiplier = input()->scale() / output()->scale();
-    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
-  }
-  output()->resize(input()->shape());
-}
-
-void Relu::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    case DataType::S16:
-      evalQuantizedS16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Relu::evalFloat() const
-{
-  const auto input_data = getTensorData<float>(input());
-  const auto input_shape = getTensorShape(input());
-  auto output_data = getTensorData<float>(output());
-  auto output_shape = getTensorShape(output());
-
-  luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
-}
-
-void Relu::evalQuantized() const
-{
-  tflite::ReluParams params;
-  params.input_offset = input()->zero_point();
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = _output_multiplier;
-  params.output_shift = _output_shift;
-
-  params.quantized_activation_min =
-    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
-  params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
-
-  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                              getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-void Relu::evalQuantizedS16() const
-{
-  const auto *input_data = getTensorData<int16_t>(input());
-  auto *output_data = getTensorData<int16_t>(output());
-
-  constexpr int32_t output_min = 0;
-  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
-
-  const int32_t num_elements = input()->shape().num_elements();
-
-  for (int32_t i = 0; i < num_elements; ++i)
-  {
-    const int32_t input_val = input_data[i];
-    int32_t output_val =
-      tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
-    output_val = std::max(output_val, output_min);
-    output_val = std::min(output_val, output_max);
-    output_data[i] = static_cast<int16_t>(output_val);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp
deleted file mode 100644 (file)
index 07205ed..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Relu6.h"
-#include "kernels/Utils.h"
-
-#include "PALRelu6.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Relu6::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
-  if (input()->element_type() == DataType::U8)
-  {
-    double multiplier = input()->scale() / output()->scale();
-    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
-  }
-  output()->resize(input()->shape());
-}
-
-void Relu6::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Relu6::evalFloat() const
-{
-  const auto input_data = getTensorData<float>(input());
-  const auto input_shape = getTensorShape(input());
-  auto output_data = getTensorData<float>(output());
-  auto output_shape = getTensorShape(output());
-
-  luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
-}
-
-void Relu6::evalQuantized() const
-{
-  tflite::ReluParams params;
-  params.input_offset = input()->zero_point();
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = _output_multiplier;
-  params.output_shift = _output_shift;
-
-  params.quantized_activation_min =
-    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
-  params.quantized_activation_max =
-    std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
-             params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
-
-  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                              getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp
deleted file mode 100644 (file)
index 61d3300..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Reshape.h"
-
-#include <cassert>
-#include <cstring>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-static Shape extractShapeFromTensor(const Tensor *tensor)
-{
-  assert(tensor->element_type() == DataType::S32);
-  Shape shape(tensor->shape().num_elements());
-  const auto *shape_data = tensor->data<int32_t>();
-  for (int i = 0; i < tensor->shape().num_elements(); ++i)
-  {
-    shape.dim(i) = shape_data[i];
-  }
-  return shape;
-}
-
-static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shape)
-{
-  const int32_t num_input_elements = input_shape.num_elements();
-  int32_t num_output_elements = 1;
-  int unknown_dim_index = -1;
-  for (int i = 0; i < output_shape->num_dims(); ++i)
-  {
-    const int32_t value = output_shape->dim(i);
-    if (value == -1)
-    {
-      assert(unknown_dim_index == -1);
-      unknown_dim_index = i;
-    }
-    else
-    {
-      num_output_elements *= value;
-    }
-  }
-  if (unknown_dim_index != -1)
-  {
-    output_shape->dim(unknown_dim_index) = num_input_elements / num_output_elements;
-    num_output_elements *= output_shape->dim(unknown_dim_index);
-  }
-  assert(num_output_elements == num_input_elements);
-}
-
-Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
-  : Kernel({input, shape}, {output})
-{
-}
-
-void Reshape::configure()
-{
-  Shape output_shape = extractShapeFromTensor(shape());
-  resolveUnknownDimension(input()->shape(), &output_shape);
-  output()->resize(output_shape);
-}
-
-void Reshape::execute() const
-{
-  const auto *input_data = input()->data<void>();
-  auto *output_data = output()->data<void>();
-
-  const size_t element_size = getDataTypeSize(input()->element_type());
-  const int32_t num_elements = input()->shape().num_elements();
-  std::memcpy(output_data, input_data, num_elements * element_size);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h
deleted file mode 100644 (file)
index 99b947f..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_RESHAPE_H
-#define LUCI_INTERPRETER_KERNELS_RESHAPE_H
-
-#include "core/Kernel.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Reshape : public Kernel
-{
-public:
-  Reshape(const Tensor *input, const Tensor *shape, Tensor *output);
-
-  const Tensor *input() const { return _inputs[0]; }
-  const Tensor *shape() const { return _inputs[1]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_RESHAPE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp
deleted file mode 100644 (file)
index c2ff3ea..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Reshape.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class ReshapeTest : public ::testing::Test
-{
-protected:
-  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
-  std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-// TODO Test types other than FLOAT32.
-
-TEST_F(ReshapeTest, Regular)
-{
-  Shape input_shape{1, 2, 2, 3};
-  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-  Shape shape_shape{2};
-  std::vector<int32_t> shape_data{3, 4};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor shape_tensor =
-    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
-}
-
-TEST_F(ReshapeTest, UnknownDimension)
-{
-  Shape input_shape{2, 1, 2, 3};
-  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-  Shape shape_shape{3};
-  std::vector<int32_t> shape_data{2, -1, 2};
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
-  Tensor shape_tensor =
-    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
-  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
-  kernel.configure();
-  _memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
deleted file mode 100644 (file)
index e2ddd6a..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ResizeBilinear.h"
-
-#include "kernels/Utils.h"
-
-#include "PALResizeBilinear.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
-                               const ResizeBilinearParams &params)
-  : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
-{
-}
-
-void ResizeBilinear::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
-  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
-  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
-  if (params().half_pixel_centers && params().align_corners)
-    throw std::runtime_error("If half_pixel_centers is True, align_corners must be False.");
-  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
-  Shape output_shape(4);
-  output_shape.dim(0) = input()->shape().dim(0);
-  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
-  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
-  output_shape.dim(3) = input()->shape().dim(3);
-  output()->resize(output_shape);
-}
-
-void ResizeBilinear::execute() const
-{
-  tflite::ResizeBilinearParams op_params{};
-  op_params.align_corners = params().align_corners;
-  op_params.half_pixel_centers = params().half_pixel_centers;
-  switch (output()->element_type())
-  {
-    case DataType::FLOAT32:
-      luci_interpreter_pal::ResizeBilinear(
-        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
-        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      luci_interpreter_pal::ResizeBilinear(
-        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
-        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
deleted file mode 100644 (file)
index 306cefb..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ResizeNearestNeighbor.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
-#include "PALResizeNearestNeighbor.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
-                                             Tensor *output,
-                                             const ResizeNearestNeighborParams &params)
-  : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
-{
-}
-
-void ResizeNearestNeighbor::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
-  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
-  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
-  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
-  Shape output_shape(4);
-  output_shape.dim(0) = input()->shape().dim(0);
-  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
-  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
-  output_shape.dim(3) = input()->shape().dim(3);
-  output()->resize(output_shape);
-}
-
-void ResizeNearestNeighbor::execute() const
-{
-  tflite::ResizeNearestNeighborParams op_params{};
-  op_params.align_corners = params().align_corners;
-  op_params.half_pixel_centers = params().half_pixel_centers;
-  switch (output()->element_type())
-  {
-    case DataType::FLOAT32:
-      tflite::reference_ops::ResizeNearestNeighbor(
-        op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
-        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
-      break;
-    case DataType::U8:
-      luci_interpreter_pal::ResizeNearestNeighbor(
-        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
-        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp
deleted file mode 100644 (file)
index 1b6a5cc..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ReverseV2.h"
-#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
-  : Kernel({input, axes}, {output})
-{
-}
-
-void ReverseV2::configure()
-{
-  assert(axes()->shape().num_dims() == 1);
-  assert(input()->shape().num_dims() >= axes()->shape().num_elements());
-  if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
-      input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
-      input()->element_type() != DataType::S64)
-  {
-    throw std::runtime_error("Unsupported input type.");
-  }
-  if (axes()->element_type() != DataType::S32)
-  {
-    throw std::runtime_error("Unsupported axes type.");
-  }
-  if (axes()->shape().num_elements() > 1)
-  {
-    throw std::runtime_error("Current implementation does not support more than 1 axis.");
-  }
-  int axis_value = getTensorData<int32_t>(axes())[0];
-  if (axis_value < 0 || axis_value >= input()->shape().num_dims())
-  {
-    throw std::runtime_error("Invalid axes value");
-  }
-  assert(input()->element_type() == output()->element_type());
-
-  output()->resize(input()->shape());
-}
-
-void ReverseV2::execute() const
-{
-  int axis_value = getTensorData<int32_t>(axes())[0];
-  switch (output()->element_type())
-  {
-    case DataType::FLOAT32:
-      tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
-                                            getTensorData<float>(input()), getTensorShape(output()),
-                                            getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      tflite::reference_ops::Reverse<uint8_t>(
-        axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
-        getTensorShape(output()), getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported output type");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp
deleted file mode 100644 (file)
index 6dd92dc..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Rsqrt.h"
-#include "kernels/Utils.h"
-
-#include <stdexcept>
-#include <cmath>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Rsqrt::configure()
-{
-  if (input()->element_type() != output()->element_type())
-  {
-    throw std::runtime_error("Input/output tensor data type mismatch.");
-  }
-  output()->resize(input()->shape());
-}
-
-void Rsqrt::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Rsqrt::evalFloat() const
-{
-  auto in = getTensorData<float>(input());
-  auto out = getTensorData<float>(output());
-  auto size = getTensorShape(input()).FlatSize();
-  for (auto i = in; i != in + size; ++i)
-  {
-    *out = 1.f / std::sqrt(*i);
-    ++out;
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp
deleted file mode 100644 (file)
index 40d79aa..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/SVDF.h"
-#include "kernels/Utils.h"
-#include "PALSVDF.h"
-
-#include <tensorflow/lite/kernels/internal/quantization_util.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-namespace
-{
-TfLiteFusedActivation get_tflite_activation(Activation activation)
-{
-  switch (activation)
-  {
-    case luci::FusedActFunc::RELU:
-      return kTfLiteActRelu;
-    case luci::FusedActFunc::RELU6:
-      return kTfLiteActRelu6;
-    case luci::FusedActFunc::RELU_N1_TO_1:
-      return kTfLiteActReluN1To1;
-    case luci::FusedActFunc::TANH:
-      return kTfLiteActTanh;
-    case luci::FusedActFunc::SIGN_BIT:
-      return kTfLiteActSignBit;
-    case luci::FusedActFunc::NONE:
-      return kTfLiteActNone;
-    default:
-      throw std::runtime_error("Unsupported activation type");
-  }
-}
-} // namespace
-
-SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
-           const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
-           Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
-           Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
-           const SVDFParams &params)
-  : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
-                                 {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
-                                  scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
-                                 params)
-{
-  // Do nothing
-}
-
-void SVDF::configure()
-{
-  const Shape &input_shape = input()->shape();
-  const Shape &weight_features_shape = weight_feature()->shape();
-  const Shape &weight_time_shape = weight_time()->shape();
-
-  // Validate Input Tensor:
-  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
-                         input()->element_type() == loco::DataType::S8);
-  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
-
-  // Validate inputs and output types
-  if (input()->element_type() == loco::DataType::S8)
-  {
-    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
-    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
-                           weight_time()->element_type() == loco::DataType::S8);
-    if (bias())
-      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
-
-    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
-                           input_activation_state()->element_type() == loco::DataType::S8);
-    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
-
-    // Note: now tflite support only ReLU activation for integer SVDF
-    LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
-  }
-  else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
-  {
-    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
-    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
-    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
-    if (bias())
-      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
-    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
-  }
-  else if ((weight_feature()->element_type() == loco::DataType::U8 ||
-            weight_feature()->element_type() == loco::DataType::S8) &&
-           input()->element_type() == loco::DataType::FLOAT32)
-  {
-    // TODO:: support hybrid SVDF op
-    throw std::runtime_error("Hybrid type is not currently supported");
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported type.");
-  }
-
-  // Check all the parameters of tensor match within themselves and match the
-  // input configuration.
-  const int rank = params().svdf_rank;
-  const int batch_size = input_shape.dim(0);
-  const int num_filters = weight_features_shape.dim(0);
-  LUCI_INTERPRETER_CHECK(rank != 0);
-  LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
-
-  const int num_units = num_filters / rank;
-  const int memory_size = weight_time_shape.dim(1);
-
-  // Validate Weight_Feature Input Tensor:
-  LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
-  LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
-
-  // Validate Weight_Time Input Tensor:
-  LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
-  LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
-
-  // Validate Bias
-  if (bias())
-    LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
-
-  // Validate Input Activation State
-  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
-  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
-  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
-
-  // Resize scratchpad_state to input_activation_state
-  auto scratchpad_activation_state = getOutputTensors()[1];
-  scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
-
-  // Resize output tensor
-  output()->resize({batch_size, num_units});
-
-  luci_interpreter_pal::SetupScratchpadTensor(
-    input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
-    getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
-    getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
-}
-
-void SVDF::execute() const
-{
-  switch (weight_feature()->element_type())
-  {
-    case loco::DataType::FLOAT32:
-      evalFloat();
-      break;
-    case loco::DataType::S8:
-    {
-      if (input()->element_type() == loco::DataType::S8)
-        evalInteger();
-      else
-        // TODO:: support hybrid SVDF op
-        throw std::runtime_error("Hybrid type is not currently supported");
-      break;
-    }
-    default:
-      throw std::runtime_error("Unsupported type");
-  }
-}
-
-void SVDF::evalInteger() const
-{
-  const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
-                                                     input_activation_state()->scale());
-  const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
-                                                     weight_time()->scale() / output()->scale());
-
-  int32_t effective_scale_1_a;
-  int effective_scale_1_b;
-  int32_t effective_scale_2_a;
-  int effective_scale_2_b;
-
-  tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
-  tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
-
-  TfLiteSVDFParams params_svdf{};
-  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
-  params_svdf.rank = params().svdf_rank;
-  params_svdf.activation = get_tflite_activation(params().activation);
-
-  auto scratchpad_activation_state = getOutputTensors()[1];
-  // Note: it is expected that activation_state input variable tensor reset to zero,
-  // also expected that this variable tensor doesn't have buffer
-  auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
-  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
-
-  auto scratchpad = getOutputTensors()[2];
-  auto output_temp = getOutputTensors()[3];
-
-  int32_t input_zp = input()->zero_point();
-  int32_t output_zp = output()->zero_point();
-  luci_interpreter_pal::IntegerSVDF(
-    params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
-    getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
-    getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
-    getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
-    getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
-    getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
-    effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
-}
-
-void SVDF::evalFloat() const
-{
-  TfLiteSVDFParams params_svdf{};
-  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
-  params_svdf.rank = params().svdf_rank;
-  params_svdf.activation = get_tflite_activation(params().activation);
-
-  auto scratchpad_activation_state = getOutputTensors()[1];
-  // Note: it is expected that activation_state input variable tensor reset to zero,
-  // also expected that this variable tensor doesn't have buffer
-  auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
-  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
-
-  auto scratchpad_1 = getOutputTensors()[2];
-
-  luci_interpreter_pal::FloatSVDF(
-    params_svdf, getTensorShape(input()), getTensorData<float>(input()),
-    getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
-    getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
-    getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
-    getTensorShape(output()), getTensorData<float>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp
deleted file mode 100644 (file)
index 0429fe1..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Shape.h"
-#include "kernels/Utils.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params)
-  : KernelWithParams<ShapeParams>({input}, {output}, params)
-{
-}
-
-void ShapeKernel::configure()
-{
-  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
-                         output()->element_type() == DataType::S64);
-  const auto input_shape = input()->shape();
-
-  Shape output_shape(1);
-  output_shape.dim(0) = input_shape.num_dims();
-
-  output()->resize(output_shape);
-}
-
-void ShapeKernel::execute() const
-{
-  switch (params().out_type)
-  {
-    case DataType::S32:
-      evalInt<int32_t>();
-      break;
-    case DataType::S64:
-      evalInt<int64_t>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T> void ShapeKernel::evalInt() const
-{
-  const auto input_shape = input()->shape();
-
-  auto output_data = getTensorData<T>(output());
-
-  for (int i = 0; i < input_shape.num_dims(); ++i)
-  {
-    output_data[i] = input_shape.dim(i);
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp
deleted file mode 100644 (file)
index 2fe2c54..0000000
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Slice.h"
-#include "Utils.h"
-#include "PALSlice.h"
-
-#include <cassert>
-#include <cstring>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-const int max_dim = 4;
-
-Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
-  : Kernel({input, begin, size}, {output})
-{
-}
-
-template <typename T>
-Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
-{
-  Shape output_shape = Shape(input->shape().num_dims());
-  for (int idx = 0; idx < input->shape().num_dims(); idx++)
-  {
-    T size_value = getTensorData<T>(size)[idx];
-    if (size_value < 0)
-    {
-      if (size_value != -1)
-      {
-        throw std::runtime_error("Invalid size.");
-      }
-      size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
-    }
-    else
-    {
-      if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
-      {
-        throw std::runtime_error("Invalid begin and size.");
-      }
-    }
-    output_shape.dim(idx) = static_cast<int>(size_value);
-  }
-  return output_shape;
-}
-
-template <typename T>
-void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
-                            std::vector<int> *begins, std::vector<int> *sizes)
-{
-  for (int idx = dimensions - 1; idx >= 0; --idx)
-  {
-    begins->push_back(getTensorData<T>(begin)[idx]);
-    sizes->push_back(getTensorData<T>(size)[idx]);
-  }
-}
-
-void Slice::configure()
-{
-  assert(input()->element_type() == output()->element_type());
-  assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
-  assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
-  assert(begin()->shape().num_dims() == 1);
-  assert(size()->shape().num_dims() == 1);
-  assert(input()->shape().num_dims() <= max_dim);
-
-  if (begin()->element_type() == DataType::S32)
-  {
-    output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
-  }
-  else if (begin()->element_type() == DataType::S64)
-  {
-    output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Slice::execute() const
-{
-  std::vector<int> begins;
-  begins.reserve(max_dim);
-  std::vector<int> sizes;
-  sizes.reserve(max_dim);
-  if (begin()->element_type() == DataType::S32)
-  {
-    getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
-  }
-  else if (begin()->element_type() == DataType::S64)
-  {
-    getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported begin type.");
-  }
-  for (int i = input()->shape().num_dims(); i < max_dim; ++i)
-  {
-    begins.push_back(0);
-    sizes.push_back(1);
-  }
-
-  assert(begins.size() == 4);
-  assert(sizes.size() == 4);
-  tflite::SliceParams op_params{};
-  op_params.begin_count = 4;
-  op_params.size_count = 4;
-  for (int i = 0; i < 4; i++)
-  {
-    op_params.begin[i] = begins[3 - i];
-    op_params.size[i] = sizes[3 - i];
-  }
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
-                                  getTensorShape(output()), getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
-                                  getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                  getTensorData<uint8_t>(output()));
-      break;
-    case DataType::S8:
-      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
-                                  getTensorData<int8_t>(input()), getTensorShape(output()),
-                                  getTensorData<int8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported input type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp
deleted file mode 100644 (file)
index c230aaa..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Softmax.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/softmax.h>
-#include "PALSoftmax.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params)
-  : KernelWithParams<SoftmaxParams>({input}, {output}, params)
-{
-}
-
-void Softmax::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
-  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
-  {
-    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0);
-    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 ||
-                           output()->zero_point() == std::numeric_limits<int8_t>::min());
-    tflite::SoftmaxParams op_params{};
-    op_params.table = _table;
-    luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
-  }
-  output()->resize(input()->shape());
-}
-
-void Softmax::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S8:
-      evalQuantized<int8_t>();
-      break;
-    case DataType::U8:
-      evalQuantized<uint8_t>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Softmax::evalFloat() const
-{
-  tflite::SoftmaxParams op_params{};
-  op_params.beta = params().beta;
-
-  tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()),
-                                 getTensorShape(output()), getTensorData<float>(output()));
-}
-
-template <typename T> void Softmax::evalQuantized() const
-{
-  tflite::SoftmaxParams op_params{};
-  op_params.table = const_cast<float *>(_table);
-  op_params.zero_point = output()->zero_point();
-  op_params.scale = output()->scale();
-  luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta);
-  luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
-                                getTensorShape(output()), getTensorData<T>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h
deleted file mode 100644 (file)
index 1f281df..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_SOFTMAX_H
-#define LUCI_INTERPRETER_KERNELS_SOFTMAX_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Softmax : public KernelWithParams<SoftmaxParams>
-{
-public:
-  Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params);
-
-  const Tensor *input() const { return _inputs[0]; }
-  Tensor *output() const { return _outputs[0]; }
-
-  void configure() override;
-  void execute() const override;
-
-private:
-  void evalFloat() const;
-  template <typename T> void evalQuantized() const;
-
-  float _table[256];
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp
deleted file mode 100644 (file)
index 08e7067..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Softmax.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-template <typename T> constexpr loco::DataType toLocoDataType();
-
-template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
-
-template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
-
-template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
-
-template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
-void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
-           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
-  Tensor input_tensor =
-    makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
-
-  SoftmaxParams params{};
-  params.beta = 0.1;
-
-  Softmax kernel(&input_tensor, &output_tensor, params);
-  kernel.configure();
-  memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
-  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
-}
-
-template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
-void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
-           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
-{
-  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
-  std::pair<float, int32_t> input_quant_param =
-    quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
-                          std::max<float>(std::max<float>(input_data), 0.f));
-  std::pair<float, int32_t> output_quant_param =
-    quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
-                          std::max<float>(std::max<float>(output_data), 0.f));
-  Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
-                                                             input_quant_param.second, input_data,
-                                                             memory_manager.get());
-  Tensor output_tensor =
-    makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
-
-  SoftmaxParams params{};
-  params.beta = 0.1;
-
-  Softmax kernel(&input_tensor, &output_tensor, params);
-  kernel.configure();
-  memory_manager->allocate_memory(output_tensor);
-  kernel.execute();
-
-  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-  EXPECT_THAT(dequantizeTensorData(output_tensor),
-              FloatArrayNear(output_data, output_tensor.scale()));
-}
-
-template <typename T> class SoftmaxTest : public ::testing::Test
-{
-};
-
-using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
-
-TYPED_TEST(SoftmaxTest, Simple)
-{
-  Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
-                   {
-                     5, -9, 8,  //
-                     -7, 2, -4, //
-                     1, -2, 9,  //
-                     3, -6, -1, //
-                   },
-                   {
-                     0.38514, 0.09497, 0.51989, //
-                     0.20792, 0.51141, 0.28067, //
-                     0.25212, 0.18678, 0.56110, //
-                     0.48149, 0.19576, 0.32275, //
-                   });
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
deleted file mode 100644 (file)
index 630cd38..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/SpaceToBatchND.h"
-#include "kernels/Utils.h"
-
-#include "PALSpaceToBatchND.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-const int kInputMinDimensionNum = 3;
-const int kInputMaxDimensionNum = 4;
-
-} // namespace
-
-SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
-                               const Tensor *paddings, Tensor *output)
-  : Kernel({input, block_shape, paddings}, {output})
-{
-}
-
-void SpaceToBatchND::configure()
-{
-  const auto *block_shape_data = block_shape()->data<int32_t>();
-  const auto *paddings_data = paddings()->data<int32_t>();
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
-  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
-  int spatial_dims_num = input()->shape().num_dims() - 2;
-
-  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
-  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
-
-  LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
-  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
-  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
-
-  Shape output_shape = Shape(input()->shape().num_dims());
-  int output_batch_size = input()->shape().dim(0);
-  for (int i = 0; i < spatial_dims_num; ++i)
-  {
-    int final_dim_size =
-      (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
-    LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
-    output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
-    output_batch_size = output_batch_size * block_shape_data[i];
-  }
-  output_shape.dim(0) = output_batch_size;
-  output_shape.dim(input()->shape().num_dims() - 1) =
-    input()->shape().dim(input()->shape().num_dims() - 1);
-  output()->resize(output_shape);
-}
-
-void SpaceToBatchND::execute() const
-{
-  switch (input()->element_type())
-  {
-    tflite::SpaceToBatchParams op_params;
-    case DataType::FLOAT32:
-      op_params.output_offset = 0;
-      luci_interpreter_pal::SpaceToBatchND(
-        op_params, getTensorShape(input()), getTensorData<float>(input()),
-        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
-        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
-        getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      op_params.output_offset = output()->zero_point();
-      luci_interpreter_pal::SpaceToBatchND(
-        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
-        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
-        getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
deleted file mode 100644 (file)
index 7c29e8c..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SpaceToDepth.h"
-#include "Utils.h"
-#include "PALSpaceToDepth.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
-  : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
-{
-}
-
-void SpaceToDepth::configure()
-{
-  assert(input()->shape().num_dims() == 4);
-  assert(output()->element_type() == DataType::FLOAT32 ||
-         output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
-         output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
-  assert(input()->element_type() == output()->element_type());
-
-  const int block_size = params().block_size;
-  const int32_t input_height = input()->shape().dim(1);
-  const int32_t input_width = input()->shape().dim(2);
-  int32_t output_height = input_height / block_size;
-  int32_t output_width = input_width / block_size;
-
-  assert(input_height == output_height * block_size);
-  assert(input_width == output_width * block_size);
-
-  Shape output_shape(4);
-  output_shape.dim(0) = input()->shape().dim(0);
-  output_shape.dim(1) = output_height;
-  output_shape.dim(2) = output_width;
-  output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
-
-  output()->resize(output_shape);
-}
-
-void SpaceToDepth::execute() const
-{
-  tflite::SpaceToDepthParams op_params{};
-  op_params.block_size = params().block_size;
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
-                                         getTensorData<float>(input()), getTensorShape(output()),
-                                         getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
-                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                         getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp
deleted file mode 100644 (file)
index 1a563f3..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Split.h"
-
-#include "Utils.h"
-
-#include "PALSplit.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
-  : Kernel({axis, input}, std::move(outputs))
-{
-}
-
-void Split::configure()
-{
-  assert(axis()->shape().num_elements() == 1);
-  _axis_value = getTensorData<int32_t>(axis())[0];
-  if (_axis_value < 0)
-    _axis_value += input()->shape().num_dims();
-  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
-
-  const int32_t input_size = input()->shape().dim(_axis_value);
-  assert(input_size % _outputs.size() == 0);
-  const int32_t slice_size = input_size / _outputs.size();
-
-  Shape output_shape = input()->shape();
-  output_shape.dim(_axis_value) = slice_size;
-  for (Tensor *output : _outputs)
-  {
-    output->resize(output_shape);
-  }
-}
-
-void Split::execute() const
-{
-  tflite::SplitParams params{};
-  params.num_split = _outputs.size();
-  params.axis = _axis_value;
-
-#define TF_LITE_SPLIT(scalar)                                                                    \
-  {                                                                                              \
-    VectorOfTensors<scalar, false> all_outputs(_outputs);                                        \
-    luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
-                                all_outputs.shapes(), all_outputs.data());                       \
-  }
-
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      TF_LITE_SPLIT(float);
-      break;
-    case DataType::U8:
-      TF_LITE_SPLIT(uint8_t);
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-#undef TF_LITE_SPLIT
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp
deleted file mode 100644 (file)
index aa68208..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SplitV.h"
-
-#include "Utils.h"
-
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
-               std::vector<Tensor *> outputs)
-  : Kernel({input, size_splits, axis}, std::move(outputs))
-{
-}
-
-void SplitV::configure()
-{
-  assert(axis()->shape().num_elements() == 1);
-  _axis_value = getTensorData<int32_t>(axis())[0];
-  if (_axis_value < 0)
-    _axis_value += input()->shape().num_dims();
-  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
-
-  auto num_split = static_cast<int32_t>(_outputs.size());
-  auto sizes_data = getTensorData<int32_t>(size_splits());
-
-  assert(size_splits()->shape().num_dims() == 1);
-
-  int32_t sum = 0;
-  const auto num_dims_size_spits = size_splits()->shape().dim(0);
-  int32_t count_neg_dim = 0;
-
-  for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
-  {
-    if (sizes_data[i] != -1)
-    {
-      sum += sizes_data[i];
-    }
-    else
-    {
-      count_neg_dim++;
-    }
-  }
-  assert(count_neg_dim < 2);
-  assert(size_splits()->shape().num_elements() == num_split);
-
-  auto output_shape = input()->shape();
-  for (int32_t i = 0; i < num_split; ++i)
-  {
-    if (sizes_data[i] == -1)
-    {
-      output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
-    }
-    else
-    {
-      output_shape.dim(_axis_value) = sizes_data[i];
-    }
-    _outputs[i]->resize(output_shape);
-  }
-}
-
-void SplitV::execute() const
-{
-  tflite::SplitParams params{};
-  params.num_split = _outputs.size();
-  params.axis = _axis_value;
-
-#define TF_LITE_SPLIT(scalar)                                                                     \
-  {                                                                                               \
-    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
-    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
-                                 all_outputs.shapes(), all_outputs.data());                       \
-  }
-
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      TF_LITE_SPLIT(float);
-      break;
-    case DataType::U8:
-      TF_LITE_SPLIT(uint8_t);
-      break;
-    case DataType::S16:
-      TF_LITE_SPLIT(int16_t);
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-#undef TF_LITE_SPLIT
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp
deleted file mode 100644 (file)
index 46e9fc9..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Sqrt.h"
-#include "kernels/Utils.h"
-
-#include <stdexcept>
-#include <cmath>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Sqrt::configure()
-{
-  if (input()->element_type() != output()->element_type())
-  {
-    throw std::runtime_error("Input/output tensor data type mismatch.");
-  }
-  output()->resize(input()->shape());
-}
-
-void Sqrt::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Sqrt::evalFloat() const
-{
-  auto in = getTensorData<float>(input());
-  auto out = getTensorData<float>(output());
-  auto size = getTensorShape(input()).FlatSize();
-  for (auto i = in; i != in + size; ++i)
-  {
-    *out = std::sqrt(*i);
-    ++out;
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp
deleted file mode 100644 (file)
index bc71905..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Square.h"
-#include "kernels/Utils.h"
-
-#include <stdexcept>
-#include <cmath>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Square::configure()
-{
-  if (input()->element_type() != output()->element_type())
-  {
-    throw std::runtime_error("Input/output tensor data type mismatch.");
-  }
-  output()->resize(input()->shape());
-}
-
-void Square::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Square::evalFloat() const
-{
-  auto in = getTensorData<float>(input());
-  auto out = getTensorData<float>(output());
-  auto size = getTensorShape(input()).FlatSize();
-  for (auto i = in; i != in + size; ++i)
-  {
-    *out = (*i) * (*i);
-    ++out;
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
deleted file mode 100644 (file)
index 3bafeba..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/SquaredDifference.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
-  : Kernel({input1, input2}, {output})
-{
-}
-
-void SquaredDifference::configure()
-{
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
-  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void SquaredDifference::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalSquaredDifference<float>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-template <typename T> inline void SquaredDifference::evalSquaredDifference() const
-{
-  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
-                        getTensorShape(input2()), getTensorData<T>(input2()),
-                        getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
-                          const T difference = x - y;
-                          return difference * difference;
-                        });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp
deleted file mode 100644 (file)
index 4a75518..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Squeeze.h"
-
-#include "kernels/Utils.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
-  : KernelWithParams<SqueezeParams>({input}, {output}, params)
-{
-}
-
-void Squeeze::configure()
-{
-  int input_num_dims = input()->shape().num_dims();
-  int num_squeeze_dims = params().squeeze_dims.size();
-  assert(input_num_dims <= 8);
-  bool should_squeeze[8] = {false};
-  int num_squeezed_dims = 0;
-  if (num_squeeze_dims == 0)
-  {
-    for (int idx = 0; idx < input_num_dims; ++idx)
-    {
-      if (input()->shape().dim(idx) == 1)
-      {
-        should_squeeze[idx] = true;
-        ++num_squeezed_dims;
-      }
-    }
-  }
-  else
-  {
-    for (int idx = 0; idx < num_squeeze_dims; ++idx)
-    {
-      int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
-                                                   : params().squeeze_dims[idx];
-      assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
-      if (!should_squeeze[current])
-        ++num_squeezed_dims;
-      should_squeeze[current] = true;
-    }
-  }
-  Shape output_shape(input_num_dims - num_squeezed_dims);
-  for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
-  {
-    if (!should_squeeze[in_idx])
-    {
-      output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
-    }
-  }
-  output()->resize(output_shape);
-}
-
-void Squeeze::execute() const
-{
-  assert(input()->shape().num_elements() == output()->shape().num_elements());
-
-  const auto *input_data = input()->data<void>();
-  auto *output_data = output()->data<void>();
-  std::memcpy(output_data, input_data,
-              getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp
deleted file mode 100644 (file)
index a8730d8..0000000
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/StridedSlice.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
-                           const Tensor *strides, Tensor *output, const StridedSliceParams &params)
-  : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
-{
-}
-
-void StridedSlice::configure()
-{
-  assert(begin()->shape().num_dims() == 1);
-  assert(end()->shape().num_dims() == 1);
-  assert(strides()->shape().num_dims() == 1);
-  assert(input()->element_type() == output()->element_type());
-  assert(begin()->element_type() == DataType::S32);
-  assert(end()->element_type() == DataType::S32);
-  assert(strides()->element_type() == DataType::S32);
-  assert(input()->shape().num_dims() <= 4);
-  if (params().ellipsis_mask != 0)
-  {
-    throw std::runtime_error("ellipsis_mask is not implemented yet.");
-  }
-  if (params().new_axis_mask != 0)
-  {
-    throw std::runtime_error("new_axis_mask is not implemented yet.");
-  }
-  if (input()->element_type() == DataType::U8)
-  {
-    assert(input()->scale() == output()->scale());
-    assert(input()->zero_point() == output()->zero_point());
-  }
-  tflite::StridedSliceParams op_params{};
-  op_params.start_indices_count = input()->shape().num_dims();
-  op_params.stop_indices_count = input()->shape().num_dims();
-  op_params.strides_count = input()->shape().num_dims();
-
-  for (int i = 0; i < input()->shape().num_dims(); i++)
-  {
-    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
-    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
-    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
-  }
-  op_params.begin_mask = params().begin_mask;
-  op_params.ellipsis_mask = 0;
-  op_params.end_mask = params().end_mask;
-  op_params.new_axis_mask = 0;
-  op_params.shrink_axis_mask = params().shrink_axis_mask;
-  std::vector<int32_t> output_shape_vector;
-  for (int i = 0; i < input()->shape().num_dims(); i++)
-  {
-    int idx = input()->shape().num_dims() - i - 1;
-    int32_t stride = getTensorData<int32_t>(strides())[idx];
-    assert(stride != 0);
-    int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
-    int32_t end =
-      ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
-
-    const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
-    if (shrink_axis)
-    {
-      end = begin + 1;
-    }
-
-    int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
-    dim_shape = dim_shape < 0 ? 0 : dim_shape;
-    if (!shrink_axis)
-    {
-      output_shape_vector.push_back(dim_shape);
-    }
-  }
-  Shape output_shape = Shape(output_shape_vector.size());
-  for (size_t i = 0; i < output_shape_vector.size(); i++)
-  {
-    output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1];
-  }
-  output()->resize(output_shape);
-}
-
-void StridedSlice::execute() const
-{
-  tflite::StridedSliceParams op_params{};
-  op_params.start_indices_count = input()->shape().num_dims();
-  op_params.stop_indices_count = input()->shape().num_dims();
-  op_params.strides_count = input()->shape().num_dims();
-
-  for (int i = 0; i < input()->shape().num_dims(); i++)
-  {
-    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
-    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
-    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
-  }
-  op_params.begin_mask = params().begin_mask;
-  op_params.ellipsis_mask = 0;
-  op_params.end_mask = params().end_mask;
-  op_params.new_axis_mask = 0;
-  op_params.shrink_axis_mask = params().shrink_axis_mask;
-
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
-                                          getTensorData<float>(input()), getTensorShape(output()),
-                                          getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
-                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                          getTensorData<uint8_t>(output()));
-      break;
-    case DataType::S32:
-      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
-                                          getTensorData<int32_t>(input()), getTensorShape(output()),
-                                          getTensorData<int32_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp
deleted file mode 100644 (file)
index 24b6a72..0000000
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Sub.h"
-#include "kernels/Utils.h"
-
-#include "PALSub.h"
-
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
-  : KernelWithParams<SubParams>({input1, input2}, {output}, params)
-{
-}
-
-void Sub::configure()
-{
-  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
-  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
-  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Sub::execute() const
-{
-  switch (input1()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::S64:
-      evalInteger<int64_t>();
-      break;
-    case DataType::S32:
-      evalInteger<int32_t>();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Sub::evalFloat() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<float>(params, _params.activation);
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastSubSlow(
-      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
-      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
-  }
-  else
-  {
-    luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
-                              getTensorShape(input2()), getTensorData<float>(input2()),
-                              getTensorShape(output()), getTensorData<float>(output()));
-  }
-}
-
-template <typename T> void Sub::evalInteger() const
-{
-  tflite::ArithmeticParams params{};
-  fillArithmeticActivationRange<T>(params, _params.activation);
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastSubSlow(
-      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
-      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
-                               getTensorShape(input2()), getTensorData<T>(input2()),
-                               getTensorShape(output()), getTensorData<T>(output()));
-  }
-}
-
-void Sub::evalQuantized() const
-{
-  const auto input1_scale = static_cast<double>(input1()->scale());
-  const auto input2_scale = static_cast<double>(input2()->scale());
-  const auto output_scale = static_cast<double>(output()->scale());
-
-  const int left_shift = 20;
-  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
-  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
-  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
-  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
-
-  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
-  int input1_shift{}, input2_shift{}, output_shift{};
-  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
-  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
-  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
-  tflite::ArithmeticParams params{};
-  params.left_shift = left_shift;
-  // The kernel expects inputs' zero points to be negated.
-  params.input1_offset = -input1()->zero_point(); // Note the '-'.
-  params.input1_multiplier = input1_multiplier;
-  params.input1_shift = input1_shift;
-  params.input2_offset = -input2()->zero_point(); // Note the '-'.
-  params.input2_multiplier = input2_multiplier;
-  params.input2_shift = input2_shift;
-  params.output_offset = output()->zero_point();
-  params.output_multiplier = output_multiplier;
-  params.output_shift = output_shift;
-  params.quantized_activation_min = activation_min;
-  params.quantized_activation_max = activation_max;
-
-  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
-    getTensorShape(input1()), getTensorShape(input2()), &params);
-
-  if (need_broadcast)
-  {
-    tflite::reference_ops::BroadcastSubSlow(
-      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
-      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-  else
-  {
-    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
-                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
-                               getTensorShape(output()), getTensorData<uint8_t>(output()));
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp
deleted file mode 100644 (file)
index c4fa169..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Tanh.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/tanh.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Tanh::configure()
-{
-  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-  if (input()->element_type() == DataType::U8)
-  {
-    populateLookupTable();
-  }
-  output()->resize(input()->shape());
-}
-
-void Tanh::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      evalQuantized();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void Tanh::evalFloat() const
-{
-  tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
-                              getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void Tanh::evalQuantized() const
-{
-  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
-  uint8_t *output_data = getTensorData<uint8_t>(output());
-  const uint8_t *input_data = getTensorData<uint8_t>(input());
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = getTableValue(input_data[i]);
-  }
-}
-
-void Tanh::populateLookupTable()
-{
-  const auto input_scale = static_cast<double>(input()->scale());
-  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
-  const auto output_scale = static_cast<double>(output()->scale());
-  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
-  const float inverse_scale = 1 / output_scale;
-  int32_t maxval = std::numeric_limits<uint8_t>::max();
-  int32_t minval = std::numeric_limits<uint8_t>::min();
-  for (int32_t val = minval; val <= maxval; ++val)
-  {
-    const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = std::tanh(dequantized);
-    const float rescaled = std::round(transformed * inverse_scale);
-    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
-    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
-                  static_cast<uint8_t>(val));
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp
deleted file mode 100644 (file)
index 4d983ad..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/TestUtils.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace testing
-{
-
-using ::testing::FloatNear;
-using ::testing::Matcher;
-
-Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); }
-
-Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
-{
-  return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
-}
-
-std::vector<float> dequantizeTensorData(const Tensor &tensor)
-{
-  if (tensor.element_type() == DataType::U8)
-  {
-    std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
-    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
-  }
-  if (tensor.element_type() == DataType::S8)
-  {
-    std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
-    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
-  }
-  else if (tensor.element_type() == DataType::S16)
-  {
-    // S16 quantization is symmetric, so zero point should be zero.
-    for (auto zp : tensor.zero_points())
-    {
-      (void)zp;
-      assert(zp == 0);
-    }
-
-    std::vector<int16_t> data = extractTensorData<int16_t>(tensor);
-    if (tensor.scales().size() == 1)
-    {
-      return dequantize(data.data(), data.size(), tensor.scale(), 0);
-    }
-
-    // quantize_dimension breaks shape into two parts:
-    // inner dimensions that contains continuous data with one quantization type
-    // outer dimensions that contains other dimensions
-    const Shape shape = tensor.shape();
-    const int32_t quantized_dimension = tensor.quantized_dimension();
-    assert(quantized_dimension < shape.num_dims());
-    size_t outer_dims_size = 1;
-    int32_t quant_dim_size = shape.dim(quantized_dimension);
-    size_t inner_dims_size = 1;
-    assert(quant_dim_size == tensor.scales().size());
-
-    for (int i = 0; i < quantized_dimension; ++i)
-      outer_dims_size *= shape.dim(i);
-    for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
-      inner_dims_size *= shape.dim(i);
-
-    assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
-
-    std::vector<float> dequantized_data;
-    dequantized_data.reserve(shape.num_elements());
-    for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
-      for (int32_t channel = 0; channel < quant_dim_size; ++channel)
-      {
-        float scale = tensor.scales()[channel];
-        size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
-        std::vector<float> part_dequantized_data =
-          dequantize(data.data() + offset, inner_dims_size, scale, 0);
-        dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
-                                part_dequantized_data.end());
-      }
-    return dequantized_data;
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported type.");
-  }
-}
-
-Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
-{
-  std::vector<Matcher<float>> matchers;
-  matchers.reserve(values.size());
-  for (const float v : values)
-  {
-    matchers.emplace_back(FloatNear(v, max_abs_error));
-  }
-  return ElementsAreArray(matchers);
-}
-
-std::vector<int32_t> extractTensorShape(const Tensor &tensor)
-{
-  std::vector<int32_t> result;
-  int dims = tensor.shape().num_dims();
-  for (int i = 0; i < dims; i++)
-  {
-    result.push_back(tensor.shape().dim(i));
-  }
-  return result;
-}
-
-} // namespace testing
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp
deleted file mode 100644 (file)
index 802d872..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Transpose.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/transpose.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
-  : Kernel({input, perm}, {output})
-{
-}
-
-void Transpose::configure()
-{
-  // Transpose op only supports 1D-4D input arrays.
-  int dims = input()->shape().num_dims();
-  const int32_t *perm_data = getTensorData<int32_t>(perm());
-
-  assert(input()->shape().num_dims() <= 4);
-  assert(input()->element_type() == output()->element_type());
-
-  assert(perm()->shape().num_dims() == 1);
-  assert(perm()->shape().dim(0) == dims);
-
-  Shape output_shape(dims);
-  for (int i = 0; i < dims; i++)
-  {
-    assert(perm_data[i] < dims && perm_data[i] >= 0);
-    output_shape.dim(i) = input()->shape().dim(perm_data[i]);
-  }
-
-  output()->resize(output_shape);
-}
-
-void Transpose::execute() const
-{
-  tflite::TransposeParams params{};
-  const int32_t *perm_data = getTensorData<int32_t>(perm());
-  const int32_t size = perm()->shape().dim(0);
-  params.perm_count = size;
-  for (int i = 0; i < size; i++)
-    params.perm[i] = perm_data[i];
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      tflite::reference_ops::Transpose(params, getTensorShape(input()),
-                                       getTensorData<float>(input()), getTensorShape(output()),
-                                       getTensorData<float>(output()));
-      break;
-    case DataType::U8:
-      tflite::reference_ops::Transpose(params, getTensorShape(input()),
-                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                       getTensorData<uint8_t>(output()));
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp
deleted file mode 100644 (file)
index 1b5f9d9..0000000
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/TransposeConv.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                             const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
-                             const TransposeConvParams &params)
-  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
-                                          {output, scratch_tensor}, params)
-{
-}
-
-TransposeConv::~TransposeConv()
-{
-  // Define destructor here, to delete vector of qunatized multipliers properly
-}
-
-void TransposeConv::configure()
-{
-  assert(output_shape()->shape().num_dims() == 1);
-  assert(input()->shape().num_dims() == 4);
-  assert(filter()->shape().num_dims() == 4);
-  assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
-         input()->element_type() == DataType::S16);
-  assert(input()->element_type() == output()->element_type());
-  assert(input()->shape().dim(3) == filter()->shape().dim(3));
-
-  const int num_dims = output_shape()->shape().dim(0);
-  Shape out_shape(num_dims);
-  const auto *shape_data = getTensorData<int32_t>(output_shape());
-  for (int i = 0; i < num_dims; i++)
-    out_shape.dim(i) = shape_data[i];
-  output()->resize(out_shape);
-
-  const int32_t filter_height = filter()->shape().dim(1);
-  const int32_t filter_width = filter()->shape().dim(2);
-  const int32_t output_height = out_shape.dim(1);
-  const int32_t output_width = out_shape.dim(2);
-
-  const int32_t unused_output_height =
-    computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
-  const int32_t unused_output_width =
-    computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
-
-  _padding_height =
-    computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
-  _padding_width =
-    computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
-
-  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
-  {
-    auto scratch_tensor = getOutputTensors()[1];
-    scratch_tensor->resize(output()->shape());
-    const std::vector<double> real_multipliers =
-      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
-    _quant_multipliers = quantizeMultipliers(real_multipliers);
-  }
-  else
-  {
-    auto scratch_tensor = getOutputTensors()[1];
-    scratch_tensor->set_allocatable(false);
-  }
-}
-
-void TransposeConv::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      evalFloat();
-      break;
-    case DataType::U8:
-      if (filter()->scales().size() == 1)
-      {
-        evalQuantized();
-      }
-      else if (filter()->scales().size() > 1)
-      {
-        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
-        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
-                               static_cast<size_t>(filter()->shape().dim(0)));
-        evalQuantizedPerChannel();
-      }
-      break;
-    case DataType::S16:
-      evalQuantizedS16();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-void TransposeConv::evalFloat() const
-{
-  tflite::ConvParams op_params{};
-  op_params.padding_type = tflite::PaddingType::kSame;
-  op_params.padding_values.height = _padding_height;
-  op_params.padding_values.width = _padding_width;
-  op_params.stride_height = params().stride_height;
-  op_params.stride_width = params().stride_width;
-  tflite::reference_ops::TransposeConv(op_params,                                                //
-                                       getTensorShape(input()), getTensorData<float>(input()),   //
-                                       getTensorShape(filter()), getTensorData<float>(filter()), //
-                                       getTensorShape(bias()), getTensorData<float>(bias()),     //
-                                       getTensorShape(output()), getTensorData<float>(output()), //
-                                       tflite::RuntimeShape(), nullptr);
-}
-
-void TransposeConv::evalQuantized() const
-{
-  tflite::ConvParams op_params{};
-  op_params.padding_type = tflite::PaddingType::kSame;
-  op_params.padding_values.height = _padding_height;
-  op_params.padding_values.width = _padding_width;
-  op_params.stride_height = params().stride_height;
-  op_params.stride_width = params().stride_width;
-  // The kernel expects input and filter zero points to be negated.
-  op_params.input_offset = -input()->zero_point();    // Note the '-'.
-  op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
-  op_params.output_offset = output()->zero_point();
-  op_params.output_multiplier = _quant_multipliers[0].multiplier;
-  op_params.output_shift = _quant_multipliers[0].shift;
-  op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
-  op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
-
-  auto scratch_tensor = getOutputTensors()[1];
-
-  tflite::reference_ops::TransposeConv(op_params,                                                //
-                                       getTensorShape(input()), getTensorData<uint8>(input()),   //
-                                       getTensorShape(filter()), getTensorData<uint8>(filter()), //
-                                       getTensorShape(bias()), getTensorData<int32_t>(bias()),   //
-                                       getTensorShape(output()), getTensorData<uint8>(output()), //
-                                       tflite::RuntimeShape(), nullptr,                          //
-                                       getTensorData<int32_t>(scratch_tensor));
-}
-
-void TransposeConv::evalQuantizedPerChannel() const
-{
-  const auto *input_data = getTensorData<uint8_t>(input());
-  const auto *filter_data = getTensorData<uint8_t>(filter());
-  const auto *bias_data = getTensorData<int32_t>(bias());
-  auto *output_data = getTensorData<uint8_t>(output());
-
-  auto scratch_tensor = getOutputTensors()[1];
-  auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
-
-  const Shape &input_shape = input()->shape();
-  const Shape &filter_shape = filter()->shape();
-  const Shape &output_shape = output()->shape();
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t input_depth = input_shape.dim(3);
-  const int32_t output_depth = filter_shape.dim(0);
-  const int32_t filter_height = filter_shape.dim(1);
-  const int32_t filter_width = filter_shape.dim(2);
-  const int32_t output_height = output_shape.dim(1);
-  const int32_t output_width = output_shape.dim(2);
-
-  const int32_t stride_height = _params.stride_height;
-  const int32_t stride_width = _params.stride_width;
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
-
-  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
-
-  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
-  for (int32_t batch = 0; batch < batches; ++batch)
-  {
-    for (int32_t in_y = 0; in_y < input_height; ++in_y)
-    {
-      for (int32_t in_x = 0; in_x < input_width; ++in_x)
-      {
-        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
-        {
-          const int32_t out_y_origin = in_y * stride_height - _padding_height;
-          const int32_t out_x_origin = in_x * stride_width - _padding_width;
-          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
-          {
-            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
-            {
-              const int32_t out_x = out_x_origin + filter_x;
-              const int32_t out_y = out_y_origin + filter_y;
-              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
-              {
-                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
-                {
-                  const uint8_t input_val =
-                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
-                  const uint8_t filter_val =
-                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
-                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
-                    static_cast<int32_t>(input_val - input()->zero_point()) *
-                    static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    for (int32_t out_y = 0; out_y < output_height; ++out_y)
-    {
-      for (int32_t out_x = 0; out_x < output_width; ++out_x)
-      {
-        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
-        {
-          int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
-          if (bias_data)
-          {
-            acc += bias_data[out_c];
-          }
-
-          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
-            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
-
-          scaled_acc += output()->zero_point();
-          scaled_acc = std::max(scaled_acc, activation_min);
-          scaled_acc = std::min(scaled_acc, activation_max);
-
-          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
-        }
-      }
-    }
-  }
-}
-
-void TransposeConv::evalQuantizedS16() const
-{
-  const auto *input_data = getTensorData<int16_t>(input());
-  const auto *filter_data = getTensorData<int16_t>(filter());
-  const auto *bias_data = getTensorData<int64_t>(bias());
-  auto *output_data = getTensorData<int16_t>(output());
-
-  auto scratch_tensor = getOutputTensors()[1];
-  auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
-
-  const Shape &input_shape = input()->shape();
-  const Shape &filter_shape = filter()->shape();
-  const Shape &output_shape = output()->shape();
-
-  const int32_t batches = input_shape.dim(0);
-  const int32_t input_height = input_shape.dim(1);
-  const int32_t input_width = input_shape.dim(2);
-  const int32_t input_depth = input_shape.dim(3);
-  const int32_t output_depth = filter_shape.dim(0);
-  const int32_t filter_height = filter_shape.dim(1);
-  const int32_t filter_width = filter_shape.dim(2);
-  const int32_t output_height = output_shape.dim(1);
-  const int32_t output_width = output_shape.dim(2);
-
-  const int32_t stride_height = _params.stride_height;
-  const int32_t stride_width = _params.stride_width;
-
-  int32_t activation_min{};
-  int32_t activation_max{};
-  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
-
-  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
-
-  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
-  for (int32_t batch = 0; batch < batches; ++batch)
-  {
-    for (int32_t in_y = 0; in_y < input_height; ++in_y)
-    {
-      for (int32_t in_x = 0; in_x < input_width; ++in_x)
-      {
-        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
-        {
-          const int32_t out_y_origin = in_y * stride_height - _padding_height;
-          const int32_t out_x_origin = in_x * stride_width - _padding_width;
-          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
-          {
-            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
-            {
-              const int32_t out_x = out_x_origin + filter_x;
-              const int32_t out_y = out_y_origin + filter_y;
-              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
-              {
-                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
-                {
-                  const int16_t input_val =
-                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
-                  const int16_t filter_val =
-                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
-                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
-                    static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    for (int32_t out_y = 0; out_y < output_height; ++out_y)
-    {
-      for (int32_t out_x = 0; out_x < output_width; ++out_x)
-      {
-        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
-        {
-          int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
-          if (bias_data)
-          {
-            acc += bias_data[out_c];
-          }
-          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
-            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
-
-          scaled_acc = std::max(scaled_acc, activation_min);
-          scaled_acc = std::min(scaled_acc, activation_max);
-
-          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
-        }
-      }
-    }
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp
deleted file mode 100644 (file)
index 9127241..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Unpack.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
-  : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
-{
-}
-
-void Unpack::configure()
-{
-  const Shape &input_shape = input()->shape();
-
-  int axis = _params.axis;
-  if (axis < 0)
-    axis += input()->shape().num_dims();
-  assert(axis >= 0 && axis < input_shape.num_dims());
-
-  Shape output_shape(input_shape.num_dims() - 1);
-  int out_index = 0;
-  for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
-  {
-    if (in_index != axis)
-      output_shape.dim(out_index++) = input_shape.dim(in_index);
-  }
-
-  for (Tensor *output : _outputs)
-  {
-    assert(output->element_type() == input()->element_type());
-    output->resize(output_shape);
-  }
-}
-
-template <typename T> void Unpack::executeImpl() const
-{
-  tflite::UnpackParams params{};
-  params.axis = _params.axis;
-  params.num_split = _outputs.size();
-  VectorOfTensors<T, false> all_outputs(_outputs);
-  tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
-                                   **all_outputs.shapes(), all_outputs.data());
-}
-
-void Unpack::execute() const
-{
-  switch (input()->element_type())
-  {
-    case DataType::FLOAT32:
-      return executeImpl<float>();
-    case DataType::U8:
-      return executeImpl<uint8_t>();
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp
deleted file mode 100644 (file)
index 5d8e5db..0000000
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Utils.h"
-
-#include <cassert>
-#include <cmath>
-#include <limits>
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-template <typename T>
-void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
-{
-  switch (activation)
-  {
-    case Activation::NONE:
-      *activation_min = std::numeric_limits<T>::lowest();
-      *activation_max = std::numeric_limits<T>::max();
-      break;
-    case Activation::RELU:
-      *activation_min = 0;
-      *activation_max = std::numeric_limits<T>::max();
-      break;
-    case Activation::RELU_N1_TO_1:
-      *activation_min = -1;
-      *activation_max = 1;
-      break;
-    case Activation::RELU6:
-      *activation_min = 0;
-      *activation_max = 6;
-      break;
-    default:
-      throw std::runtime_error("Unsupported activation.");
-  }
-}
-
-template void calculateActivationRange(Activation activation, float *activation_min,
-                                       float *activation_max);
-template void calculateActivationRange(Activation activation, int32_t *activation_min,
-                                       int32_t *activation_max);
-template void calculateActivationRange(Activation activation, int64_t *activation_min,
-                                       int64_t *activation_max);
-
-static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
-                                                  const Tensor *output, int32_t *activation_min,
-                                                  int32_t *activation_max)
-{
-  const float scale = output->scale();
-  const int32_t zero_point = output->zero_point();
-
-  auto quantize = [scale, zero_point](float x) {
-    return zero_point + static_cast<int32_t>(std::round(x / scale));
-  };
-
-  switch (activation)
-  {
-    case Activation::NONE:
-    case Activation::TANH:
-      *activation_min = qmin;
-      *activation_max = qmax;
-      break;
-    case Activation::RELU:
-      *activation_min = std::max(qmin, quantize(0.0f));
-      *activation_max = qmax;
-      break;
-    case Activation::RELU_N1_TO_1:
-      *activation_min = std::max(qmin, quantize(-1.0f));
-      *activation_max = std::min(qmax, quantize(1.0f));
-      break;
-    case Activation::RELU6:
-      *activation_min = std::max(qmin, quantize(0.0f));
-      *activation_max = std::min(qmax, quantize(6.0f));
-      break;
-    default:
-      throw std::runtime_error("Unsupported activation.");
-  }
-}
-
-void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
-                                       int32_t *activation_min, int32_t *activation_max)
-{
-  assert(output->zero_points().size() == 1);
-  int32_t qmin{};
-  int32_t qmax{};
-  switch (output->element_type())
-  {
-    case DataType::U8:
-      qmin = 0;
-      qmax = std::numeric_limits<uint8_t>::max();
-      break;
-    case DataType::S8:
-      qmin = -std::numeric_limits<int8_t>::max();
-      qmax = std::numeric_limits<int8_t>::max();
-      break;
-    case DataType::S16:
-      // For now, assume that signed int16 type implies signed symmetric quantization.
-      assert(output->zero_point() == 0);
-      qmin = -std::numeric_limits<int16_t>::max();
-      qmax = std::numeric_limits<int16_t>::max();
-      break;
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-
-  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min,
-                                        activation_max);
-}
-
-void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
-{
-  if (double_multiplier == 0.0)
-  {
-    *quantized_multiplier = 0;
-    *shift = 0;
-    return;
-  }
-
-  const double q = std::frexp(double_multiplier, shift);
-  auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31)));
-
-  if (q_fixed == (INT64_C(1) << 31))
-  {
-    q_fixed /= 2;
-    ++*shift;
-  }
-  assert(q_fixed <= std::numeric_limits<int32_t>::max());
-  // A shift amount smaller than -31 would cause all bits to be shifted out
-  // and thus all results would be zero. We implement that instead with
-  // q_fixed==0, so as to avoid hitting issues with right-shift
-  // operations with shift amounts greater than 31. Note that this happens
-  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
-  // that we're effectively flushing tiny double_multiplier's to zero.
-  // We could conceivably handle values in the range (roughly) [32, 63]
-  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
-  // the present handling is just doing 'flush denormals to zero'. We could
-  // reconsider and actually generate nonzero denormals if a need arises.
-  if (*shift < -31)
-  {
-    *shift = 0;
-    q_fixed = 0;
-  }
-  *quantized_multiplier = static_cast<int32_t>(q_fixed);
-}
-
-void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
-                                         int *left_shift)
-{
-  assert(double_multiplier < 1.0);
-  assert(double_multiplier > 0.0);
-  int shift;
-  quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
-  assert(shift <= 0);
-  *left_shift = shift;
-}
-
-Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
-{
-  const int num_input1_dims = input1_shape.num_dims();
-  const int num_input2_dims = input2_shape.num_dims();
-  const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
-  Shape output_shape(num_out_dims);
-
-  for (int i = 0; i < num_out_dims; ++i)
-  {
-    const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
-    const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
-
-    bool need_broadcast = input1_dim != input2_dim;
-    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
-    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
-
-    output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
-  }
-
-  return output_shape;
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h
deleted file mode 100644 (file)
index ebeb20e..0000000
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
-#define LUCI_INTERPRETER_KERNELS_UTILS_H
-
-#include "core/KernelParams.h"
-#include "luci_interpreter/core/Tensor.h"
-
-#include <tensorflow/lite/kernels/internal/types.h>
-
-#include <cassert>
-#include <cstdint>
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-#define LUCI_INTERPRETER_CHECK(cond)                                                         \
-  if (!(cond))                                                                               \
-    throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
-                             std::string(#cond) + ") was not true.");
-
-inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
-                              int32_t filter_size, int32_t out_size)
-{
-  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
-  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
-  return padding > 0 ? padding : 0;
-}
-
-inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
-                                        int32_t filter_size, int32_t out_size, int32_t *offset)
-{
-  int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
-  int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
-  total_padding = total_padding > 0 ? total_padding : 0;
-  *offset = total_padding % 2;
-  return total_padding / 2;
-}
-
-inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
-                                 int32_t stride, int32_t dilation_rate = 1)
-{
-  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
-  switch (padding)
-  {
-    case Padding::SAME:
-      return (image_size + stride - 1) / stride;
-    case Padding::VALID:
-      return (image_size + stride - effective_filter_size) / stride;
-    default:
-      assert(false);
-      return 0;
-  }
-}
-
-inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
-{
-  return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
-}
-
-template <typename T>
-void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
-
-void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
-                                       int32_t *activation_min, int32_t *activation_max);
-
-template <typename T> constexpr bool one_of_types() { return false; }
-
-// Checks if T is equal to one of {U,Other} types
-template <typename T, typename U, typename... Other> constexpr bool one_of_types()
-{
-  return std::is_same<T, U>::value || one_of_types<T, Other...>();
-}
-
-/**
- * Fills activation min and max parameters depending on given data type and activation
- *
- * T is a template parameter, so after optimization this code left with only required if case
- *
- * @tparam T data type of arithmetic operation output tensor
- * @param params tflite params to fill
- * @param activation luci_interpreter::Activation of arithmetic operation
- */
-template <typename T>
-void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
-{
-  static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
-
-  if (std::is_same<T, float>::value)
-    calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
-  if (std::is_same<T, int32_t>::value)
-    calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
-  else
-    calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
-}
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of its exponent.
-//
-// Handles an arbitrary positive multiplier. The 'shift' output-value is
-// basically the 'floating-point exponent' of the multiplier:
-// Negative for a right-shift (when the multiplier is <1), positive for a
-// left-shift (when the multiplier is >1)
-void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of NEGATIVE its exponent ---
-// this is intended as a RIGHT-shift.
-//
-// Restricted to the case where the multiplier < 1 (and non-negative).
-void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
-                                         int *left_shift);
-
-Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape);
-
-inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
-                                               float output_scale)
-{
-  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
-  LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
-  return input_product_scale / static_cast<double>(output_scale);
-}
-
-// TODO rename getQuantizedConvolutionMultiplers to something more general
-// it is used for non conv operators too
-inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
-                                                             const std::vector<float> &filter_scale,
-                                                             float output_scale)
-{
-  std::vector<double> effective_output_scales;
-  size_t n = filter_scale.size();
-  effective_output_scales.reserve(n);
-  for (size_t i = 0; i < n; ++i)
-  {
-    effective_output_scales.push_back(
-      getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
-  }
-  return effective_output_scales;
-}
-
-struct ChannelQuantMultipliers
-{
-  int shift;
-  int32_t multiplier;
-  ChannelQuantMultipliers() = default;
-};
-
-inline std::vector<ChannelQuantMultipliers>
-quantizeMultipliers(const std::vector<double> &effective_scale)
-{
-  size_t n = effective_scale.size();
-  std::vector<ChannelQuantMultipliers> params(n);
-  for (size_t i = 0; i < n; ++i)
-  {
-    quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
-  }
-  return params;
-}
-
-// Helper wrapper to hide broadcast logic
-template <typename T> class BroadcastableWrapper
-{
-public:
-  BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
-
-  T operator[](int idx) { return _v[idx * _stride]; }
-
-private:
-  const std::vector<T> &_v;
-  int _stride;
-};
-
-inline tflite::RuntimeShape getTensorShape(const Tensor *tensor)
-{
-  if (tensor == nullptr)
-    return tflite::RuntimeShape();
-
-  const Shape &shape = tensor->shape();
-  tflite::RuntimeShape runtime_shape(shape.num_dims());
-  for (int i = 0; i < shape.num_dims(); ++i)
-  {
-    runtime_shape.SetDim(i, shape.dim(i));
-  }
-  return runtime_shape;
-}
-
-template <typename T> const T *getTensorData(const Tensor *tensor)
-{
-  return tensor != nullptr ? tensor->data<T>() : nullptr;
-}
-
-template <typename T> T *getTensorData(Tensor *tensor)
-{
-  return tensor != nullptr ? tensor->data<T>() : nullptr;
-}
-
-// A list of tensors in a format that can be used by kernels like split and
-// concatenation.
-template <typename T, bool is_const> class VectorOfTensors
-{
-public:
-  using ElementT = typename std::conditional<is_const, const T, T>::type;
-  using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
-
-  // Build with the tensors in 'tensor_list'.
-  explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
-  {
-    const int num_tensors = tensor_list.size();
-
-    all_data_.reserve(num_tensors);
-    all_shape_.reserve(num_tensors);
-    all_shape_ptr_.reserve(num_tensors);
-
-    for (TensorT *tensor : tensor_list)
-    {
-      all_data_.push_back(getTensorData<T>(tensor));
-      all_shape_.push_back(getTensorShape(tensor));
-    }
-
-    // Taking the pointer from inside a std::vector is only OK if the vector is
-    // never modified, so we populate all_shape in the previous loop and then we
-    // are free to grab iterators here.
-    for (tflite::RuntimeShape &shape : all_shape_)
-    {
-      all_shape_ptr_.push_back(&shape);
-    }
-  }
-  // Return a pointer to the data pointers of all tensors in the list. For
-  // example:
-  //   float* const* f = v.data();
-  //   f[0][1] is the second element of the first tensor.
-  ElementT *const *data() const { return all_data_.data(); }
-
-  // Return a pointer the shape pointers of all tensors in the list. For
-  // example:
-  //   const RuntimeShape* const* d = v.dims();
-  //   dims[1] are the dimensions of the second tensor in the list.
-  const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
-
-private:
-  std::vector<ElementT *> all_data_;
-  std::vector<tflite::RuntimeShape> all_shape_;
-  std::vector<tflite::RuntimeShape *> all_shape_ptr_;
-};
-
-// A list of quantized tensors in a format that can be used by kernels like
-// split and concatenation.
-template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
-{
-public:
-  using typename VectorOfTensors<uint8_t, is_const>::TensorT;
-
-  // Build with the tensors in 'tensor_list'.
-  explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
-    : VectorOfTensors<uint8_t, is_const>(tensor_list)
-  {
-    for (TensorT *tensor : tensor_list)
-    {
-      zero_point_.push_back(tensor->zero_point());
-      scale_.push_back(tensor->scale());
-    }
-  }
-
-  const float *scale() const { return scale_.data(); }
-  const int32_t *zero_point() const { return zero_point_.data(); }
-
-private:
-  std::vector<int32_t> zero_point_;
-  std::vector<float> scale_;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt
deleted file mode 100644 (file)
index 2927715..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-set(SOURCES
-    GraphLoader.h
-    GraphLoader.cpp
-    KernelBuilderHelper.h
-    KernelBuilderHelper.cpp
-    KernelBuilder.h
-    KernelBuilder.cpp
-    ModuleLoader.h
-    ModuleLoader.cpp
-    RuntimeToIR.h
-    nodes/Builders.h)
-
-# include kernel specific builders
-macro(REGISTER_KERNEL NODE)
-  list(APPEND SOURCES "nodes/${NODE}.cpp")
-endmacro(REGISTER_KERNEL)
-include(${KERNEL_REGISTER_FILE})
-
-add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
-if (NOT NNCC_LIBRARY_NO_PIC)
-  set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif(NOT NNCC_LIBRARY_NO_PIC)
-target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
-target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-
-target_link_libraries(${LUCI_INTERPRETER_LOADER}
-        PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
-        PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan)
-
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-nnas_find_package(GTest REQUIRED)
-
-set(TEST_SOURCES KernelBuilder.test.cpp)
-
-GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES})
-target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER})
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp
deleted file mode 100644 (file)
index 4020709..0000000
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "loader/GraphLoader.h"
-
-#include "loader/KernelBuilder.h"
-
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-#include <loco/IR/Algorithm.h>
-
-namespace luci_interpreter
-{
-namespace
-{
-
-template <typename NodeT> Shape getNodeShape(const NodeT *node)
-{
-  Shape shape(node->rank());
-  for (uint32_t i = 0; i < node->rank(); ++i)
-  {
-    shape.dim(i) = node->dim(i).value();
-  }
-  return shape;
-}
-
-template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)
-{
-  const size_t element_size = getDataTypeSize(DT);
-  const int32_t num_elements = node->size<DT>();
-
-  *data_size = num_elements * element_size;
-  if (*data_size > 0)
-  {
-    // FIXME There is no good way to get the pointer to the data currently.
-    return &node->at<DT>(0);
-  }
-  return nullptr;
-}
-
-const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
-{
-  switch (node->dtype())
-  {
-    case DataType::U8:
-      return getNodeDataImpl<DataType::U8>(node, data_size);
-    case DataType::FLOAT32:
-      return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
-    case DataType::S8:
-      return getNodeDataImpl<DataType::S8>(node, data_size);
-    case DataType::S16:
-      return getNodeDataImpl<DataType::S16>(node, data_size);
-    case DataType::S32:
-      return getNodeDataImpl<DataType::S32>(node, data_size);
-    case DataType::S64:
-      return getNodeDataImpl<DataType::S64>(node, data_size);
-    case DataType::BOOL:
-      return getNodeDataImpl<DataType::BOOL>(node, data_size);
-    default:
-      throw std::runtime_error("Unsupported type.");
-  }
-}
-
-const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
-{
-  if (node->custom_code() != "CircleReferencingConst")
-    return nullptr;
-
-  // helper struct which describes data loaded to custom_options of CircleReferencingConst node
-  // TODO move this struct to header
-  struct ConstDataReference
-  {
-    const uint8_t *data = nullptr;
-    uint32_t size = 0;
-  };
-
-  const auto &custom_options = node->custom_options();
-  const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
-
-  *data_size = const_data_ref.size;
-  return const_data_ref.data;
-}
-
-bool isExecutableNode(const luci::CircleNode *node)
-{
-  switch (node->opcode())
-  {
-    // These nodes denote inputs / outputs of a graph.
-    case luci::CircleOpcode::CIRCLECONST:
-    case luci::CircleOpcode::CIRCLEINPUT:
-    case luci::CircleOpcode::CIRCLEOUTPUT:
-    case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
-    // The following nodes denote outputs of multiple-output nodes.
-    case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
-    case luci::CircleOpcode::CIRCLECUSTOMOUT:
-    case luci::CircleOpcode::CIRCLEIFOUT:
-    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
-    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
-    case luci::CircleOpcode::CIRCLESPLITOUT:
-    case luci::CircleOpcode::CIRCLESPLITVOUT:
-    case luci::CircleOpcode::CIRCLETOPKV2OUT:
-    case luci::CircleOpcode::CIRCLEUNIQUEOUT:
-    case luci::CircleOpcode::CIRCLEUNPACKOUT:
-    case luci::CircleOpcode::CIRCLEVARIABLE:
-    case luci::CircleOpcode::CIRCLEWHILEOUT:
-      return false;
-    // Custom nodes may be executable and non-executable
-    case luci::CircleOpcode::CUSTOM:
-    {
-      auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
-
-      // TODO handle more non-executable Custom ops here
-      if (custom_node->custom_code() == "CircleReferencingConst")
-        return false;
-
-      return true;
-    }
-    default:
-      return true;
-  }
-}
-
-bool isTensorProducingNode(const luci::CircleNode *node)
-{
-  switch (node->opcode())
-  {
-    // Output nodes do not produce tensors.
-    case luci::CircleOpcode::CIRCLEOUTPUT:
-    // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
-    // are produced by the corresponding *Out nodes instead.
-    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
-    case luci::CircleOpcode::CUSTOM:
-    case luci::CircleOpcode::IF:
-    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
-    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
-    case luci::CircleOpcode::SPLIT:
-    case luci::CircleOpcode::SPLIT_V:
-    case luci::CircleOpcode::TOPK_V2:
-    case luci::CircleOpcode::UNIQUE:
-    case luci::CircleOpcode::UNPACK:
-    case luci::CircleOpcode::WHILE:
-      return false;
-    default:
-      return true;
-  }
-}
-
-bool isSupportedCustomNode(const luci::CircleNode *node)
-{
-  const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
-
-  // TODO handle more Custom ops here
-  if (custom_node->custom_code() == "CircleReferencingConst")
-    return true;
-
-  return false;
-}
-
-} // namespace
-
-GraphLoader::GraphLoader(
-  const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
-  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-  std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
-  : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
-    _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
-    _memory_manager(memory_manager)
-{
-}
-
-void GraphLoader::loadTensors()
-{
-  for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
-  {
-    const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
-
-    if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
-      throw std::runtime_error("Unsupported Custom operator. " + node->name());
-
-    if (!isTensorProducingNode(node))
-      continue;
-
-    // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
-    // be inferred.
-    Shape shape{};
-    switch (node->opcode())
-    {
-      case luci::CircleOpcode::CIRCLECONST:
-      case luci::CircleOpcode::CIRCLECUSTOMOUT:
-      case luci::CircleOpcode::CIRCLEINPUT:
-      case luci::CircleOpcode::CIRCLEVARIABLE:
-        shape = getNodeShape(node);
-        break;
-      default:
-        break;
-    }
-
-    AffineQuantization quantization;
-    if (node->quantparam() != nullptr)
-    {
-      const luci::CircleQuantParam *params = node->quantparam();
-      assert(params->scale.size() == params->zerop.size());
-      quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
-      quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
-      quantization.quantized_dimension = params->quantized_dimension;
-    }
-
-    auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
-                                           node->name());
-
-    // If node has execution plan then read memory offsets for nodes
-    // from the beginning of shared memory buffer. Used in Static Memory Manager.
-    if (luci::has_execution_plan(node))
-    {
-      auto execution_plan = luci::get_execution_plan(node);
-      assert(!execution_plan.offsets().empty());
-      tensor->set_offset(execution_plan.offsets().front());
-    }
-
-    if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
-    {
-      size_t data_size{};
-      const void *const_data = getNodeData(const_node, &data_size);
-      if (const_data != nullptr)
-      {
-        _memory_manager->allocate_memory(*tensor);
-        tensor->writeData(const_data, data_size);
-      }
-    }
-    else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
-    {
-      const auto *custom_node =
-        loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
-
-      if (custom_node->custom_code() == "CircleReferencingConst")
-      {
-        size_t data_size{};
-        const void *const_data = getNodeData(custom_node, &data_size);
-        if (const_data != nullptr)
-        {
-          _memory_manager->allocate_memory(*tensor);
-          tensor->writeData(const_data, data_size);
-        }
-      }
-    }
-
-    _node_to_tensor.emplace(node, tensor.get());
-    _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
-
-    _runtime_graph->addTensor(std::move(tensor));
-  }
-}
-
-void GraphLoader::initInputOutputTensors() const
-{
-  auto input_nodes = loco::input_nodes(_graph);
-  std::vector<Tensor *> input_tensors(input_nodes.size());
-  for (size_t i = 0; i < input_nodes.size(); ++i)
-  {
-    input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
-    _memory_manager->allocate_memory(*input_tensors[i]);
-  }
-  _runtime_graph->setInputTensors(input_tensors);
-
-  auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
-  std::vector<Tensor *> output_tensors(output_nodes.size());
-  for (size_t i = 0; i < output_nodes.size(); ++i)
-  {
-    const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
-    output_tensors[i] = _node_to_tensor.at(node->from());
-  }
-  _runtime_graph->setOutputTensors(output_tensors);
-}
-
-void GraphLoader::loadOperators()
-{
-  KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
-
-  // Create kernels for executable nodes. This has to be done in execution order.
-  auto graph = const_cast<loco::Graph *>(_graph);
-
-  auto const graph_nodes = loco::all_nodes(graph);
-
-  // Checking for execution plan in node annotations.
-  bool has_execution_annotation = true;
-  auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
-    const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
-    if (!luci::has_execution_plan(circle_node))
-      has_execution_annotation = false;
-  };
-  std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
-
-  if (has_execution_annotation)
-  {
-    // Build ordered_nodes vector that stores the order of execution of graph nodes.
-    std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
-
-    auto const filler = [&ordered_nodes](auto const node) {
-      const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
-      auto const position = luci::get_execution_plan(circle_node).order_in_plan();
-      ordered_nodes.at(position) = circle_node;
-    };
-    std::for_each(begin(graph_nodes), end(graph_nodes), filler);
-
-    for (auto node : ordered_nodes)
-    {
-      if (isExecutableNode(node))
-      {
-        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
-        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
-        _runtime_graph->addKernel(std::move(kernel));
-      }
-    }
-  }
-  else
-  {
-    // If it is impossible to build the execution order plan,
-    // then we use the default postorder_traversal approach.
-    for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
-    {
-      const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
-      if (isExecutableNode(node))
-      {
-        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
-        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
-        _runtime_graph->addKernel(std::move(kernel));
-      }
-    }
-  }
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h
deleted file mode 100644 (file)
index fe066ec..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
-#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
-
-#include "core/RuntimeGraph.h"
-#include "loader/RuntimeToIR.h"
-#include "luci_interpreter/MemoryManager.h"
-
-#include <loco/IR/Graph.h>
-
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class GraphLoader
-{
-public:
-  GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
-              const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-              std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
-              IMemoryManager *memory_manager);
-
-  void loadTensors();
-  void initInputOutputTensors() const;
-  void loadOperators();
-
-private:
-  const loco::Graph *_graph;
-  RuntimeGraph *_runtime_graph;
-  RuntimeToIR &_runtime_to_ir;
-  IMemoryManager *_memory_manager;
-
-  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
-  std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp
deleted file mode 100644 (file)
index 8483a9a..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "loader/KernelBuilder.h"
-#include "loader/nodes/Builders.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-#define CIRCLE_NODE(OPCODE, CLASS) CLASS,
-#define CIRCLE_VNODE(OPCODE, CLASS) CLASS,
-
-// This enum is auxiliary.
-// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE,
-// because list of target operators is in format of CLASS names
-enum class BuilderId
-{
-#include <luci/IR/CircleNodes.lst>
-  Size // casts to count of values in BuilderId enum
-};
-
-#undef CIRCLE_VNODE
-#undef CIRCLE_NODE
-
-/**
- * @brief Registry of kernel builders
- *
- * This class contains mapping from Opcodes to kernel builder functions
- */
-
-class KernelBuilderRegistry
-{
-public:
-  using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *,
-                                                    KernelBuilderHelper &);
-
-  KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr)
-  {
-#define REGISTER_KERNEL(name) \
-  register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name);
-
-#include "KernelsToBuild.lst"
-
-#undef REGISTER_KERNEL
-  }
-
-  KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const
-  {
-    return _operator_builders.at(size_t(opcode));
-  }
-
-private:
-  std::vector<KernelBuilderFunc *> _operator_builders;
-
-  void register_kernel_builder(BuilderId id, KernelBuilderFunc *func)
-  {
-    // Using BuilderId is a duplicate of luci::CirclreOpcode,
-    // size_t(id) is equal to size_t(corresponding operation opcode).
-    assert(size_t(id) < _operator_builders.size());
-    _operator_builders[size_t(id)] = func;
-  }
-};
-
-KernelBuilder::KernelBuilder(
-  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-  : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
-{
-  _builder_registry = std::make_unique<KernelBuilderRegistry>();
-}
-
-KernelBuilder::~KernelBuilder()
-{
-  // Need to define in this CPP to hide KernelBuilderRegistry internals.
-  // This destructor deletes _builder_registry
-}
-
-std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
-{
-  auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode());
-  if (specific_builder != nullptr)
-    return specific_builder(node, *this);
-
-  std::string msg = "Unsupported operator: ";
-  msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
-  throw std::invalid_argument(msg.c_str());
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h
deleted file mode 100644 (file)
index b1f3833..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
-#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
-
-#include "loader/KernelBuilderHelper.h"
-
-#include "core/Kernel.h"
-#include "core/RuntimeGraph.h"
-
-#include <luci/IR/CircleNodeVisitor.h>
-
-#include <memory>
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class KernelBuilderRegistry;
-
-class KernelBuilder : public KernelBuilderHelper
-{
-public:
-  KernelBuilder(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
-
-  ~KernelBuilder();
-
-  std::unique_ptr<Kernel> build(const luci::CircleNode *node);
-
-private:
-  std::unique_ptr<KernelBuilderRegistry> _builder_registry;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp
deleted file mode 100644 (file)
index b221b69..0000000
+++ /dev/null
@@ -1,1376 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "loader/GraphLoader.h"
-#include "loader/KernelBuilder.h"
-#include "luci_interpreter/SimpleMemoryManager.h"
-
-#include <kernels/Add.h>
-#include <kernels/ArgMax.h>
-#include <kernels/AveragePool2D.h>
-#include <kernels/BatchMatMul.h>
-#include <kernels/Cast.h>
-#include <kernels/Concatenation.h>
-#include <kernels/Conv2D.h>
-#include <kernels/DepthToSpace.h>
-#include <kernels/DepthwiseConv2D.h>
-#include <kernels/Div.h>
-#include <kernels/Elu.h>
-#include <kernels/Exp.h>
-#include <kernels/Floor.h>
-#include <kernels/FloorDiv.h>
-#include <kernels/Equal.h>
-#include <kernels/FullyConnected.h>
-#include <kernels/Greater.h>
-#include <kernels/GreaterEqual.h>
-#include <kernels/InstanceNorm.h>
-#include <kernels/L2Normalize.h>
-#include <kernels/L2Pool2D.h>
-#include <kernels/LeakyRelu.h>
-#include <kernels/Less.h>
-#include <kernels/LessEqual.h>
-#include <kernels/LocalResponseNormalization.h>
-#include <kernels/LogicalAnd.h>
-#include <kernels/LogicalNot.h>
-#include <kernels/LogicalOr.h>
-#include <kernels/Logistic.h>
-#include <kernels/LogSoftmax.h>
-#include <kernels/Maximum.h>
-#include <kernels/MaxPool2D.h>
-#include <kernels/Mean.h>
-#include <kernels/Minimum.h>
-#include <kernels/Mul.h>
-#include <kernels/Neg.h>
-#include <kernels/NotEqual.h>
-#include <kernels/OneHot.h>
-#include <kernels/Pad.h>
-#include <kernels/PadV2.h>
-#include <kernels/Pow.h>
-#include <kernels/PRelu.h>
-#include <kernels/Relu.h>
-#include <kernels/Relu6.h>
-#include <kernels/Reshape.h>
-#include <kernels/ResizeBilinear.h>
-#include <kernels/ResizeNearestNeighbor.h>
-#include <kernels/ReverseV2.h>
-#include <kernels/Rsqrt.h>
-#include <kernels/Slice.h>
-#include <kernels/Softmax.h>
-#include <kernels/SpaceToDepth.h>
-#include <kernels/Split.h>
-#include <kernels/SplitV.h>
-#include <kernels/Sqrt.h>
-#include <kernels/SquaredDifference.h>
-#include <kernels/Squeeze.h>
-#include <kernels/StridedSlice.h>
-#include <kernels/Sub.h>
-#include <kernels/Tanh.h>
-#include <kernels/Transpose.h>
-#include <kernels/TransposeConv.h>
-#include <kernels/Unpack.h>
-
-#include <gmock/gmock.h>
-
-namespace luci_interpreter
-{
-namespace
-{
-
-using namespace testing;
-
-class KernelBuilderTest : public Test
-{
-protected:
-  luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
-  void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
-
-  std::unique_ptr<IMemoryManager> _memory_manager;
-
-  template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
-  {
-    auto *node = _graph.nodes()->create<NodeT>(std::forward<Args>(args)...);
-    // The actual type does not matter for the purpose of the tests.
-    // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry
-    //  actual output types).
-    node->dtype(loco::DataType::FLOAT32);
-    return node;
-  }
-
-  template <typename NodeOutT> NodeOutT *createNodeOut(loco::Node *node, int index)
-  {
-    auto *node_out = createNode<NodeOutT>();
-    node_out->input(node);
-    node_out->index(index);
-    return node_out;
-  }
-
-  template <typename KernelT> std::unique_ptr<KernelT> buildKernel(const luci::CircleNode *op)
-  {
-    std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
-
-    RuntimeGraph runtime_graph(nullptr, _memory_manager.get());
-    graph_to_runtime_graph[&_graph] = &runtime_graph;
-    RuntimeToIR runtime_to_ir;
-    GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
-                             _node_to_tensor, _memory_manager.get());
-    graph_loader.loadTensors();
-
-    KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
-
-    auto kernel = kernel_builder.build(op);
-    return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release()));
-  }
-
-  void checkTensor(const Tensor *tensor, const loco::Node *node)
-  {
-    EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node)));
-  }
-
-private:
-  loco::Graph _graph;
-  std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
-};
-
-TEST_F(KernelBuilderTest, Add)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleAdd>();
-  op->x(input1);
-  op->y(input2);
-
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::Add>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, ArgMax)
-{
-  auto *input = createInputNode();
-  auto *axis = createInputNode();
-
-  auto *op = createNode<luci::CircleArgMax>();
-  op->input(input);
-  op->dimension(axis);
-
-  op->output_type(loco::DataType::FLOAT32);
-
-  auto kernel = buildKernel<kernels::ArgMax>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->axis(), axis);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().output_type, Eq(op->output_type()));
-}
-
-TEST_F(KernelBuilderTest, AveragePool2D)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleAveragePool2D>();
-  op->value(input);
-
-  op->padding(luci::Padding::SAME);
-  op->filter()->h(11);
-  op->filter()->w(13);
-  op->stride()->h(17);
-  op->stride()->w(19);
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::AveragePool2D>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
-  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
-  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
-  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
-  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, BatchMatMul)
-{
-  auto *lhs = createInputNode();
-  auto *rhs = createInputNode();
-
-  auto *op = createNode<luci::CircleBatchMatMul>();
-  op->x(lhs);
-  op->y(rhs);
-  op->adj_x(false);
-  op->adj_y(false);
-
-  auto kernel = buildKernel<kernels::BatchMatMul>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->x(), lhs);
-  checkTensor(kernel->y(), rhs);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
-  EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
-}
-
-TEST_F(KernelBuilderTest, Cast)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleCast>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::Cast>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Concatenation)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleConcatenation>(2);
-  op->values(0, input1);
-  op->values(1, input2);
-  op->axis(11);
-
-  auto kernel = buildKernel<kernels::Concatenation>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(0), input1);
-  checkTensor(kernel->input(1), input2);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Conv2D)
-{
-  auto *input = createInputNode();
-  auto *filter = createInputNode();
-  auto *bias = createInputNode();
-
-  auto *op = createNode<luci::CircleConv2D>();
-  op->input(input);
-  op->filter(filter);
-  op->bias(bias);
-
-  op->padding(luci::Padding::SAME);
-  op->stride()->h(11);
-  op->stride()->w(13);
-  op->dilation()->h(17);
-  op->dilation()->w(19);
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::Conv2D>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->filter(), filter);
-  checkTensor(kernel->bias(), bias);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
-  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
-  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
-  EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
-  EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, DepthToSpace)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleDepthToSpace>();
-  op->input(input);
-
-  op->block_size(11);
-
-  auto kernel = buildKernel<kernels::DepthToSpace>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().block_size, Eq(op->block_size()));
-}
-
-TEST_F(KernelBuilderTest, DepthwiseConv2D)
-{
-  auto *input = createInputNode();
-  auto *filter = createInputNode();
-  auto *bias = createInputNode();
-
-  auto *op = createNode<luci::CircleDepthwiseConv2D>();
-  op->input(input);
-  op->filter(filter);
-  op->bias(bias);
-
-  op->padding(luci::Padding::SAME);
-  op->depthMultiplier(11);
-  op->stride()->h(13);
-  op->stride()->w(17);
-  op->dilation()->h(19);
-  op->dilation()->w(23);
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::DepthwiseConv2D>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->filter(), filter);
-  checkTensor(kernel->bias(), bias);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
-  EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier()));
-  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
-  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
-  EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
-  EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Div)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleDiv>();
-  op->x(input1);
-  op->y(input2);
-
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::Div>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Elu)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleElu>();
-  op->features(input);
-
-  auto kernel = buildKernel<kernels::Elu>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Exp)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleExp>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::Exp>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Floor)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleFloor>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::Floor>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, FloorDiv)
-{
-  auto *x = createInputNode();
-  auto *y = createInputNode();
-
-  auto *op = createNode<luci::CircleFloorDiv>();
-  op->x(x);
-  op->y(y);
-
-  auto kernel = buildKernel<kernels::FloorDiv>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->x(), x);
-  checkTensor(kernel->y(), y);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Equal)
-{
-  auto *x_input = createInputNode();
-  auto *y_input = createInputNode();
-
-  auto *op = createNode<luci::CircleEqual>();
-  op->x(x_input);
-  op->y(y_input);
-
-  auto kernel = buildKernel<kernels::Equal>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->x(), x_input);
-  checkTensor(kernel->y(), y_input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, FullyConnected)
-{
-  auto *input = createInputNode();
-  auto *weights = createInputNode();
-  auto *bias = createInputNode();
-
-  auto *op = createNode<luci::CircleFullyConnected>();
-  op->input(input);
-  op->weights(weights);
-  op->bias(bias);
-
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::FullyConnected>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->weights(), weights);
-  checkTensor(kernel->bias(), bias);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Greater)
-{
-  auto *x_input = createInputNode();
-  auto *y_input = createInputNode();
-
-  auto *op = createNode<luci::CircleGreater>();
-  op->x(x_input);
-  op->y(y_input);
-
-  auto kernel = buildKernel<kernels::Greater>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->x(), x_input);
-  checkTensor(kernel->y(), y_input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, GreaterEqual)
-{
-  auto *x_input = createInputNode();
-  auto *y_input = createInputNode();
-
-  auto *op = createNode<luci::CircleGreaterEqual>();
-  op->x(x_input);
-  op->y(y_input);
-
-  auto kernel = buildKernel<kernels::GreaterEqual>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->x(), x_input);
-  checkTensor(kernel->y(), y_input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, InstanceNorm)
-{
-  auto *input = createInputNode();
-  auto *gamma = createInputNode();
-  auto *beta = createInputNode();
-
-  auto *op = createNode<luci::CircleInstanceNorm>();
-  op->input(input);
-  op->gamma(gamma);
-  op->beta(beta);
-
-  op->epsilon(1e-05);
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::InstanceNorm>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->gamma(), gamma);
-  checkTensor(kernel->beta(), beta);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon()));
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, L2Normalize)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleL2Normalize>();
-  op->x(input);
-
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::L2Normalize>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, L2Pool2D)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleL2Pool2D>();
-  op->value(input);
-
-  op->padding(luci::Padding::SAME);
-  op->filter()->h(11);
-  op->filter()->w(13);
-  op->stride()->h(17);
-  op->stride()->w(19);
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::L2Pool2D>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
-  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
-  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
-  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
-  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, LeakyRelu)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleLeakyRelu>();
-  op->features(input);
-
-  op->alpha(11.0f);
-
-  auto kernel = buildKernel<kernels::LeakyRelu>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
-}
-
-TEST_F(KernelBuilderTest, Less)
-{
-  auto *x_input = createInputNode();
-  auto *y_input = createInputNode();
-
-  auto *op = createNode<luci::CircleLess>();
-  op->x(x_input);
-  op->y(y_input);
-
-  auto kernel = buildKernel<kernels::Less>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->x(), x_input);
-  checkTensor(kernel->y(), y_input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LessEqual)
-{
-  auto *x_input = createInputNode();
-  auto *y_input = createInputNode();
-
-  auto *op = createNode<luci::CircleLessEqual>();
-  op->x(x_input);
-  op->y(y_input);
-
-  auto kernel = buildKernel<kernels::LessEqual>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->x(), x_input);
-  checkTensor(kernel->y(), y_input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LocalResponseNormalization)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleLocalResponseNormalization>();
-  op->input(input);
-
-  op->radius(11);
-  op->bias(13.0f);
-  op->alpha(15.0f);
-  op->beta(17.0f);
-
-  auto kernel = buildKernel<kernels::LocalResponseNormalization>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().radius, Eq(op->radius()));
-  EXPECT_THAT(kernel->params().bias, Eq(op->bias()));
-  EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
-  EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
-}
-
-TEST_F(KernelBuilderTest, LogicalAnd)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleLogicalAnd>();
-  op->x(input1);
-  op->y(input2);
-
-  auto kernel = buildKernel<kernels::LogicalAnd>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LogicalNot)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleLogicalNot>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::LogicalNot>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LogicalOr)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleLogicalOr>();
-  op->x(input1);
-  op->y(input2);
-
-  auto kernel = buildKernel<kernels::LogicalOr>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Logistic)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleLogistic>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::Logistic>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LogSoftmax)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleLogSoftmax>();
-  op->logits(input);
-
-  auto kernel = buildKernel<kernels::LogSoftmax>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Maximum)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleMaximum>();
-  op->x(input1);
-  op->y(input2);
-
-  auto kernel = buildKernel<kernels::Maximum>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, MaxPool2D)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleMaxPool2D>();
-  op->value(input);
-
-  op->padding(luci::Padding::SAME);
-  op->filter()->h(11);
-  op->filter()->w(13);
-  op->stride()->h(17);
-  op->stride()->w(19);
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::MaxPool2D>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
-  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
-  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
-  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
-  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Mean)
-{
-  auto *input = createInputNode();
-  auto *axes = createInputNode();
-
-  auto *op = createNode<luci::CircleMean>();
-  op->input(input);
-  op->reduction_indices(axes);
-
-  op->keep_dims(true);
-
-  auto kernel = buildKernel<kernels::Mean>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->axes(), axes);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims()));
-}
-
-TEST_F(KernelBuilderTest, Minimum)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleMinimum>();
-  op->x(input1);
-  op->y(input2);
-
-  auto kernel = buildKernel<kernels::Minimum>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Mul)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleMul>();
-  op->x(input1);
-  op->y(input2);
-
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::Mul>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Neg)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleNeg>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::Neg>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, NotEqual)
-{
-  auto *x_input = createInputNode();
-  auto *y_input = createInputNode();
-
-  auto *op = createNode<luci::CircleNotEqual>();
-  op->x(x_input);
-  op->y(y_input);
-
-  auto kernel = buildKernel<kernels::NotEqual>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->x(), x_input);
-  checkTensor(kernel->y(), y_input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, OneHot)
-{
-  auto *indices = createInputNode();
-  auto *depth = createInputNode();
-  auto *on_value = createInputNode();
-  auto *off_value = createInputNode();
-  auto axis = 1;
-
-  auto *op = createNode<luci::CircleOneHot>();
-  op->indices(indices);
-  op->depth(depth);
-  op->on_value(on_value);
-  op->off_value(off_value);
-  op->axis(axis);
-
-  auto kernel = buildKernel<kernels::OneHot>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->indices(), indices);
-  checkTensor(kernel->depth(), depth);
-  checkTensor(kernel->on_value(), on_value);
-  checkTensor(kernel->off_value(), off_value);
-  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
-}
-
-TEST_F(KernelBuilderTest, Pad)
-{
-  auto *input = createInputNode();
-  auto *paddings = createInputNode();
-
-  auto *op = createNode<luci::CirclePad>();
-  op->input(input);
-  op->paddings(paddings);
-
-  auto kernel = buildKernel<kernels::Pad>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->paddings(), paddings);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, PadV2)
-{
-  auto *input = createInputNode();
-  auto *paddings = createInputNode();
-  auto *constant_values = createInputNode();
-
-  auto *op = createNode<luci::CirclePadV2>();
-  op->input(input);
-  op->paddings(paddings);
-  op->constant_values(constant_values);
-
-  auto kernel = buildKernel<kernels::PadV2>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->paddings(), paddings);
-  checkTensor(kernel->constant_values(), constant_values);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Pow)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CirclePow>();
-  op->x(input1);
-  op->y(input2);
-
-  auto kernel = buildKernel<kernels::Pow>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, PRelu)
-{
-  auto *input = createInputNode();
-  auto *alpha = createInputNode();
-
-  auto *op = createNode<luci::CirclePRelu>();
-  op->input(input);
-  op->alpha(alpha);
-
-  auto kernel = buildKernel<kernels::PRelu>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->alpha(), alpha);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Relu)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleRelu>();
-  op->features(input);
-
-  auto kernel = buildKernel<kernels::Relu>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Relu6)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleRelu6>();
-  op->features(input);
-
-  auto kernel = buildKernel<kernels::Relu6>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Reshape)
-{
-  auto *input = createInputNode();
-  auto *shape = createInputNode();
-
-  auto *op = createNode<luci::CircleReshape>();
-  op->tensor(input);
-  op->shape(shape);
-
-  auto kernel = buildKernel<kernels::Reshape>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->shape(), shape);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, ResizeBilinear)
-{
-  auto *input = createInputNode();
-  auto *size = createInputNode();
-
-  auto *op = createNode<luci::CircleResizeBilinear>();
-  op->input(input);
-  op->size(size);
-  op->align_corners(true);
-  op->half_pixel_centers(true);
-
-  auto kernel = buildKernel<kernels::ResizeBilinear>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->size(), size);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
-  EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers()));
-}
-
-TEST_F(KernelBuilderTest, ResizeNearestNeighbor)
-{
-  auto *input = createInputNode();
-  auto *size = createInputNode();
-
-  auto *op = createNode<luci::CircleResizeNearestNeighbor>();
-  op->input(input);
-  op->size(size);
-  op->align_corners(true);
-
-  auto kernel = buildKernel<kernels::ResizeNearestNeighbor>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->size(), size);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
-  // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor
-  // after adding, need to be updated.
-}
-
-TEST_F(KernelBuilderTest, ReverseV2)
-{
-  auto *input = createInputNode();
-  auto *axes = createInputNode();
-
-  auto *op = createNode<luci::CircleReverseV2>();
-  op->tensor(input);
-  op->axis(axes);
-
-  auto kernel = buildKernel<kernels::ReverseV2>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->axes(), axes);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Rsqrt)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleRsqrt>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::Rsqrt>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Slice)
-{
-  auto *input = createInputNode();
-  auto *begin = createInputNode();
-  auto *size = createInputNode();
-
-  auto *op = createNode<luci::CircleSlice>();
-  op->input(input);
-  op->begin(begin);
-  op->size(size);
-
-  auto kernel = buildKernel<kernels::Slice>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->begin(), begin);
-  checkTensor(kernel->size(), size);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Softmax)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleSoftmax>();
-  op->logits(input);
-
-  op->beta(11.0f);
-
-  auto kernel = buildKernel<kernels::Softmax>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
-}
-
-TEST_F(KernelBuilderTest, SpaceToDepth)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleSpaceToDepth>();
-  op->input(input);
-
-  op->block_size(11);
-
-  auto kernel = buildKernel<kernels::SpaceToDepth>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().block_size, op->block_size());
-}
-
-TEST_F(KernelBuilderTest, Split)
-{
-  auto *axis = createInputNode();
-  auto *input = createInputNode();
-  auto *op = createNode<luci::CircleSplit>();
-  auto *output1 = createNodeOut<luci::CircleSplitOut>(op, 0);
-  auto *output2 = createNodeOut<luci::CircleSplitOut>(op, 1);
-
-  op->split_dim(axis);
-  op->input(input);
-
-  op->num_split(2);
-
-  auto kernel = buildKernel<kernels::Split>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->axis(), axis);
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(0), output1);
-  checkTensor(kernel->output(1), output2);
-}
-
-TEST_F(KernelBuilderTest, SplitV)
-{
-  auto *input = createInputNode();
-  auto *size_splits = createInputNode();
-  auto *axis = createInputNode();
-  auto *op = createNode<luci::CircleSplitV>();
-  auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0);
-  auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1);
-
-  op->input(input);
-  op->size_splits(size_splits);
-  op->split_dim(axis);
-
-  op->num_split(2);
-
-  auto kernel = buildKernel<kernels::SplitV>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->size_splits(), size_splits);
-  checkTensor(kernel->axis(), axis);
-  checkTensor(kernel->output(0), output0);
-  checkTensor(kernel->output(1), output1);
-}
-
-TEST_F(KernelBuilderTest, Sqrt)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleSqrt>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::Sqrt>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, SquaredDifference)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleSquaredDifference>();
-  op->x(input1);
-  op->y(input2);
-
-  auto kernel = buildKernel<kernels::SquaredDifference>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Squeeze)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleSqueeze>();
-  op->input(input);
-
-  op->squeeze_dims({11, 13});
-
-  auto kernel = buildKernel<kernels::Squeeze>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims()));
-}
-
-TEST_F(KernelBuilderTest, StridedSlice)
-{
-  auto *input = createInputNode();
-  auto *begin = createInputNode();
-  auto *end = createInputNode();
-  auto *strides = createInputNode();
-
-  auto *op = createNode<luci::CircleStridedSlice>();
-  op->input(input);
-  op->begin(begin);
-  op->end(end);
-  op->strides(strides);
-
-  op->begin_mask(11);
-  op->ellipsis_mask(13);
-  op->end_mask(17);
-  op->new_axis_mask(19);
-  op->shrink_axis_mask(23);
-
-  auto kernel = buildKernel<kernels::StridedSlice>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->begin(), begin);
-  checkTensor(kernel->end(), end);
-  checkTensor(kernel->strides(), strides);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask()));
-  EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask()));
-  EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask()));
-  EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask()));
-  EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
-}
-
-TEST_F(KernelBuilderTest, Sub)
-{
-  auto *input1 = createInputNode();
-  auto *input2 = createInputNode();
-
-  auto *op = createNode<luci::CircleSub>();
-  op->x(input1);
-  op->y(input2);
-
-  op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
-  auto kernel = buildKernel<kernels::Sub>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input1(), input1);
-  checkTensor(kernel->input2(), input2);
-  checkTensor(kernel->output(), op);
-  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Tanh)
-{
-  auto *input = createInputNode();
-
-  auto *op = createNode<luci::CircleTanh>();
-  op->x(input);
-
-  auto kernel = buildKernel<kernels::Tanh>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Transpose)
-{
-  auto *input = createInputNode();
-  auto *perm = createInputNode();
-
-  auto *op = createNode<luci::CircleTranspose>();
-  op->a(input);
-  op->perm(perm);
-
-  auto kernel = buildKernel<kernels::Transpose>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->perm(), perm);
-  checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, TransposeConv)
-{
-  auto *output_shape = createInputNode();
-  auto *filter = createInputNode();
-  auto *input = createInputNode();
-  auto *bias = createInputNode();
-
-  auto *op = createNode<luci::CircleTransposeConv>();
-  op->inputSizes(output_shape);
-  op->filter(filter);
-  op->outBackprop(input);
-  op->bias(bias);
-
-  op->padding(luci::Padding::SAME);
-  op->stride()->h(11);
-  op->stride()->w(13);
-
-  auto kernel = buildKernel<kernels::TransposeConv>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->output_shape(), output_shape);
-  checkTensor(kernel->filter(), filter);
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(), op);
-  checkTensor(kernel->bias(), bias);
-  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
-  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
-  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
-}
-
-TEST_F(KernelBuilderTest, Unpack)
-{
-  auto *input = createInputNode();
-  auto *op = createNode<luci::CircleUnpack>();
-  auto *output1 = createNodeOut<luci::CircleUnpackOut>(op, 0);
-  auto *output2 = createNodeOut<luci::CircleUnpackOut>(op, 1);
-
-  op->value(input);
-
-  op->num(2);
-  op->axis(11);
-
-  auto kernel = buildKernel<kernels::Unpack>(op);
-  ASSERT_THAT(kernel, NotNull());
-
-  checkTensor(kernel->input(), input);
-  checkTensor(kernel->output(0), output1);
-  checkTensor(kernel->output(1), output2);
-  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
-}
-
-TEST_F(KernelBuilderTest, NonExisting1_NEG)
-{
-  auto *op = createNode<luci::CircleConst>();
-  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
-}
-
-TEST_F(KernelBuilderTest, NonExisting2_NEG)
-{
-  auto *op = createNode<luci::CircleInput>();
-  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
-}
-
-TEST_F(KernelBuilderTest, NonExisting3_NEG)
-{
-  auto *op = createNode<luci::CircleOutput>();
-  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
-}
-
-} // namespace
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp
deleted file mode 100644 (file)
index 23c96a6..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "loader/KernelBuilderHelper.h"
-
-#include <luci/IR/Nodes/CircleOutput.h>
-
-namespace luci_interpreter
-{
-
-const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const
-{
-  const Tensor *tensor = _node_to_tensor.at(node);
-  assert(tensor != nullptr);
-  return tensor;
-}
-
-const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const
-{
-  if (dynamic_cast<const luci::CircleOutputExclude *>(node))
-  {
-    return nullptr;
-  }
-  return getInputTensor(node);
-}
-
-Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const
-{
-  Tensor *tensor = _node_to_tensor.at(node);
-  assert(tensor != nullptr);
-  return tensor;
-}
-
-std::vector<Tensor *>
-KernelBuilderHelper::getOutputTensors(const std::vector<const loco::Node *> &nodes) const
-{
-  std::vector<Tensor *> tensors;
-  tensors.reserve(nodes.size());
-  for (const loco::Node *node : nodes)
-    tensors.push_back(getOutputTensor(node));
-  return tensors;
-}
-
-RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const
-{
-  RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
-  assert(runtime_graph != nullptr);
-  return runtime_graph;
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h
deleted file mode 100644 (file)
index d6fb253..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
-#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
-
-#include "core/Kernel.h"
-#include "core/RuntimeGraph.h"
-
-#include <loco/IR/Graph.h>
-#include <loco/IR/Node.h>
-
-#include <vector>
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class KernelBuilderHelper
-{
-public:
-  KernelBuilderHelper(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
-  {
-  }
-
-public:
-  const Tensor *getInputTensor(const loco::Node *node) const;
-  const Tensor *getOptionalInputTensor(const loco::Node *node) const;
-
-  Tensor *getOutputTensor(const loco::Node *node) const;
-  std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const;
-
-  RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
-
-public:
-  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const
-  {
-    return _graph_to_runtime_graph;
-  }
-
-  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor() const
-  {
-    return _node_to_tensor;
-  }
-
-private:
-  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
-  const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
-};
-
-template <typename CircleNodeOut>
-std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node)
-{
-  std::vector<const CircleNodeOut *> output_nodes;
-  for (const loco::Node *loco_node : loco::succs(node))
-  {
-    output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
-  }
-  std::sort(output_nodes.begin(), output_nodes.end(),
-            [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
-              return node1->index() < node2->index();
-            });
-  return {output_nodes.cbegin(), output_nodes.cend()};
-}
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp
deleted file mode 100644 (file)
index 2f278b0..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ModuleLoader.h"
-
-#include "GraphLoader.h"
-
-namespace luci_interpreter
-{
-
-ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
-                           RuntimeToIR &runtime_to_ir,
-                           std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
-                           IMemoryManager *memory_manager)
-  : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
-    _node_to_tensor(node_to_tensor), _memory_manager(memory_manager)
-{
-}
-
-void ModuleLoader::load()
-{
-  // Runtime graphs have to be created in advance, because they will be needed during the loading
-  // process for control flow nodes.
-  for (size_t i = 0; i < _module->size(); ++i)
-  {
-    _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager));
-  }
-  for (size_t i = 0; i < _module->size(); ++i)
-  {
-    const loco::Graph *graph = _module->graph(i);
-    RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
-    GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
-                       _node_to_tensor, _memory_manager);
-    loader.loadTensors();
-    loader.initInputOutputTensors();
-    loader.loadOperators();
-  }
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h
deleted file mode 100644 (file)
index 11326a2..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
-#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
-
-#include "core/RuntimeModule.h"
-#include "loader/RuntimeToIR.h"
-#include "luci_interpreter/MemoryManager.h"
-
-#include <luci/IR/Module.h>
-
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class ModuleLoader
-{
-public:
-  ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
-               RuntimeToIR &runtime_to_ir,
-               std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
-               IMemoryManager *memory_manager);
-
-  void load();
-
-private:
-  IMemoryManager *_memory_manager;
-  const luci::Module *_module;
-  RuntimeModule *_runtime_module;
-  RuntimeToIR &_runtime_to_ir;
-  std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
-  std::unordered_map<const loco::Graph *, RuntimeGraph *> _graph_to_runtime_graph;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h
deleted file mode 100644 (file)
index 9ea8b1f..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
-#define LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
-
-#include "luci_interpreter/core/Tensor.h"
-
-#include <luci/IR/CircleNode.h>
-
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-// Maps runtime entities back to IR entities. It is used to implement observing functionality.
-struct RuntimeToIR
-{
-  std::unordered_map<const Tensor *, const luci::CircleNode *> tensor_to_node;
-  std::unordered_map<const Kernel *, const luci::CircleNode *> kernel_to_node;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp
deleted file mode 100644 (file)
index 501e847..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Add.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  AddParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Add>(input1, input2, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp
deleted file mode 100644 (file)
index f3ca557..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ArgMax.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
-                                                  KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node);
-  assert(node->arity() == 2);
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *axis = helper.getInputTensor(node->dimension());
-  Tensor *output = helper.getOutputTensor(node);
-
-  ArgMaxParams params{};
-  params.output_type = node->output_type();
-
-  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
deleted file mode 100644 (file)
index a813570..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/AveragePool2D.h"
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
-                                                         KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->value());
-  Tensor *output = helper.getOutputTensor(node);
-
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
-
-  // It is unknown what data will be stored in scratchpad tensor,
-  // using UINT8 as a most general option
-  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
-  scratchpad->set_observable(false);
-  scratchpad->set_data_buffer(nullptr);
-  // If node has execution plan then read memory offsets for scratchpad temporary tensor
-  // from the beginning of shared memory buffer.
-  // Used in Static Memory Manager.
-  // TODO move tensors offset initialization to one place
-  if (luci::has_execution_plan(node))
-  {
-    const auto execution_plan = luci::get_execution_plan(node);
-    // Check whether the offset for the current CircleConv2D temporary was found.
-    if (execution_plan.offsets().size() > 1)
-      // If this is true, then we keep this offset in scratchpad.
-      scratchpad->set_offset(execution_plan.offsets().at(1));
-  }
-  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
-
-  return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
deleted file mode 100644 (file)
index 9da2f6d..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/BatchMatMul.h"
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
-                                                       KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *lhs = helper.getInputTensor(node->x());
-  const Tensor *rhs = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  auto lhs_scratchpad =
-    std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
-  lhs_scratchpad->set_observable(false);
-  lhs_scratchpad->set_data_buffer(nullptr);
-  auto rhs_scratchpad =
-    std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
-  rhs_scratchpad->set_observable(false);
-  rhs_scratchpad->set_data_buffer(nullptr);
-  // If node has execution plan then read memory offsets for scratchpad temporary tensor
-  // from the beginning of shared memory buffer.
-  // Used in Static Memory Manager.
-  // TODO move tensors offset initialization to one place
-  if (luci::has_execution_plan(node))
-  {
-    const auto execution_plan = luci::get_execution_plan(node);
-    // Check whether the offset for the current BatchMatMul temporary was found.
-    if (execution_plan.offsets().size() > 1)
-    {
-      assert(execution_plan.offsets().size() == 3);
-
-      // If this is true, then we keep this offset in scratchpad.
-      lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
-      rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
-    }
-  }
-  Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
-  Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
-
-  BatchMatMulParams params;
-  params.adj_x = node->adj_x();
-  params.adj_y = node->adj_y();
-
-  return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
deleted file mode 100644 (file)
index ac6ebb3..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/BatchToSpaceND.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
-                                                          KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node);
-  assert(node->arity() == 3);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
-  const Tensor *crops = helper.getInputTensor(node->crops());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h
deleted file mode 100644 (file)
index eab2840..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
-#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
-
-#include "loader/KernelBuilderHelper.h"
-
-#include "luci/IR/CircleNodes.h"
-
-namespace luci_interpreter
-{
-
-#define REGISTER_KERNEL(name)                                                            \
-  std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \
-                                                    KernelBuilderHelper &helper);
-
-#include "KernelsToBuild.lst"
-
-#undef REGISTER_KERNEL
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp
deleted file mode 100644 (file)
index a16354c..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Cast.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node);
-
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Cast>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp
deleted file mode 100644 (file)
index ba2564e..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Concatenation.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
-                                                         KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node);
-  std::vector<const Tensor *> inputs(node->numValues());
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-  {
-    inputs[i] = helper.getInputTensor(node->values(i));
-  }
-  Tensor *output = helper.getOutputTensor(node);
-
-  ConcatenationParams params{};
-  params.axis = node->axis();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp
deleted file mode 100644 (file)
index 218165e..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Conv2D.h"
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
-                                                  KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node);
-  assert(node->arity() == 3);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *filter = helper.getInputTensor(node->filter());
-  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
-  Tensor *output = helper.getOutputTensor(node);
-
-  // It is unknown what data will be stored in scratchpad tensor,
-  // using UINT8 as a most general option
-  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
-  scratchpad->set_observable(false);
-  scratchpad->set_data_buffer(nullptr);
-  // If node has execution plan then read memory offsets for scratchpad temporary tensor
-  // from the beginning of shared memory buffer.
-  // Used in Static Memory Manager.
-  // TODO move tensors offset initialization to one place
-  if (luci::has_execution_plan(node))
-  {
-    const auto execution_plan = luci::get_execution_plan(node);
-    // Check whether the offset for the current CircleConv2D temporary was found.
-    if (execution_plan.offsets().size() > 1)
-      // If this is true, then we keep this offset in scratchpad.
-      scratchpad->set_offset(execution_plan.offsets().at(1));
-  }
-  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
-
-  Conv2DParams params{};
-  params.padding = node->padding();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.dilation_height_factor = node->dilation()->h();
-  params.dilation_width_factor = node->dilation()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
deleted file mode 100644 (file)
index 1749463..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/DepthToSpace.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
-                                                        KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  Tensor *output = helper.getOutputTensor(node);
-
-  DepthToSpaceParams params{};
-  params.block_size = node->block_size();
-
-  return std::make_unique<kernels::DepthToSpace>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
deleted file mode 100644 (file)
index 8af1e3b..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/DepthwiseConv2D.h"
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
-                                                           KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
-  assert(node->arity() == 3);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *filter = helper.getInputTensor(node->filter());
-  const Tensor *bias = helper.getInputTensor(node->bias());
-  Tensor *output = helper.getOutputTensor(node);
-
-  DepthwiseConv2DParams params{};
-  params.padding = node->padding();
-  params.depth_multiplier = node->depthMultiplier();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.dilation_height_factor = node->dilation()->h();
-  params.dilation_width_factor = node->dilation()->w();
-  params.activation = node->fusedActivationFunction();
-
-  // It is unknown what data will be stored in scratchpad tensor,
-  // using UINT8 as a most general option
-  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
-  scratchpad->set_observable(false);
-  scratchpad->set_data_buffer(nullptr);
-  // If node has execution plan then read memory offsets for scratchpad temporary tensor
-  // from the beginning of shared memory buffer.
-  // Used in Static Memory Manager.
-  // TODO move tensors offset initialization to one place
-  if (luci::has_execution_plan(node))
-  {
-    const auto execution_plan = luci::get_execution_plan(node);
-    // Check whether the offset for the current CircleConv2D temporary was found.
-    if (execution_plan.offsets().size() > 1)
-      // If this is true, then we keep this offset in scratchpad.
-      scratchpad->set_offset(execution_plan.offsets().at(1));
-  }
-  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
-
-  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp
deleted file mode 100644 (file)
index 787322e..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Dequantize.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
-                                                      KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Dequantize>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp
deleted file mode 100644 (file)
index 0611dfd..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Div.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node);
-  assert(node->arity() == 2);
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  DivParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Div>(input1, input2, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp
deleted file mode 100644 (file)
index a79985e..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Elu.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->features());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Elu>(input, output);
-}
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp
deleted file mode 100644 (file)
index 5969288..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Equal.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-
-{
-  const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *x = helper.getInputTensor(node->x());
-  const Tensor *y = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Equal>(x, y, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp
deleted file mode 100644 (file)
index 30d11cb..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Exp.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Exp>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp
deleted file mode 100644 (file)
index 9840c34..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ExpandDims.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
-                                                      KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *axis = helper.getInputTensor(node->axis());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::ExpandDims>(input, axis, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp
deleted file mode 100644 (file)
index 3aefdf1..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Fill.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node);
-  assert(node->arity() == 2);
-
-  const auto dims = helper.getInputTensor(node->dims());
-  const auto value = helper.getInputTensor(node->value());
-  auto output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Fill>(dims, value, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp
deleted file mode 100644 (file)
index e0a2231..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Floor.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Floor>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp
deleted file mode 100644 (file)
index a45d89e..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/FloorDiv.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
-                                                    KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *x = helper.getInputTensor(node->x());
-  const Tensor *y = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::FloorDiv>(x, y, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp
deleted file mode 100644 (file)
index b7b742b..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/FullyConnected.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
-                                                          KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node);
-  assert(node->arity() == 3);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *weights = helper.getInputTensor(node->weights());
-  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
-  Tensor *output = helper.getOutputTensor(node);
-
-  FullyConnectedParams params{};
-  params.activation = node->fusedActivationFunction();
-  params.keep_num_dims = node->keep_num_dims();
-
-  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp
deleted file mode 100644 (file)
index 2ee2906..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Gather.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
-                                                  KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *params = helper.getInputTensor(node->params());
-  const Tensor *indices = helper.getInputTensor(node->indices());
-  Tensor *output = helper.getOutputTensor(node);
-
-  GatherParams gparams{};
-  gparams.axis = node->axis();
-  // TODO support batch_dims
-  gparams.batch_dims = 0;
-
-  return std::make_unique<kernels::Gather>(params, indices, output, gparams);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp
deleted file mode 100644 (file)
index 80aa63c..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Greater.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
-                                                   KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *x = helper.getInputTensor(node->x());
-  const Tensor *y = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Greater>(x, y, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
deleted file mode 100644 (file)
index 272f284..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/GreaterEqual.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
-                                                        KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *x = helper.getInputTensor(node->x());
-  const Tensor *y = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::GreaterEqual>(x, y, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp
deleted file mode 100644 (file)
index 3ac7d49..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/If.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
-                                              KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node);
-  auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
-  assert(node->arity() == 1 + node->input_count());
-  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
-  const Tensor *cond = helper.getInputTensor(node->cond());
-  std::vector<const Tensor *> inputs(node->input_count());
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-  {
-    inputs[i] = helper.getInputTensor(node->input(i));
-  }
-  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
-  RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph());
-  RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph());
-
-  return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
-                                       else_graph);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
deleted file mode 100644 (file)
index 06031e5..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/InstanceNorm.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
-                                                        KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node);
-  assert(node->arity() == 3);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *gamma = helper.getInputTensor(node->gamma());
-  const Tensor *beta = helper.getInputTensor(node->beta());
-
-  Tensor *output = helper.getOutputTensor(node);
-
-  InstanceNormParams params{};
-  params.epsilon = node->epsilon();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp
deleted file mode 100644 (file)
index 6e22e6d..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/L2Normalize.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
-                                                       KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  L2NormParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::L2Normalize>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
deleted file mode 100644 (file)
index 95b5589..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/L2Pool2D.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
-                                                    KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->value());
-  Tensor *output = helper.getOutputTensor(node);
-
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::L2Pool2D>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
deleted file mode 100644 (file)
index bbf5067..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LeakyRelu.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
-                                                     KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node);
-  assert(node->arity() == 1);
-  const Tensor *input = helper.getInputTensor(node->features());
-  Tensor *output = helper.getOutputTensor(node);
-
-  LeakyReluParams params{};
-  params.alpha = node->alpha();
-
-  return std::make_unique<kernels::LeakyRelu>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp
deleted file mode 100644 (file)
index ae914ec..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Less.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *x = helper.getInputTensor(node->x());
-  const Tensor *y = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Less>(x, y, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp
deleted file mode 100644 (file)
index f1b424b..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LessEqual.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
-                                                     KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *x = helper.getInputTensor(node->x());
-  const Tensor *y = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::LessEqual>(x, y, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
deleted file mode 100644 (file)
index 962ca2d..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LocalResponseNormalization.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel>
-build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
-                                              KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
-  assert(node->arity() == 1);
-  const Tensor *input = helper.getInputTensor(node->input());
-  Tensor *output = helper.getOutputTensor(node);
-
-  LocalResponseNormalizationParams params{};
-  params.radius = node->radius();
-  params.bias = node->bias();
-  params.alpha = node->alpha();
-  params.beta = node->beta();
-
-  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
deleted file mode 100644 (file)
index 4322041..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LogSoftmax.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
-                                                      KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->logits());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::LogSoftmax>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
deleted file mode 100644 (file)
index bf3cb67..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LogicalAnd.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
-                                                      KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp
deleted file mode 100644 (file)
index fefcd9a..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LogicalNot.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
-                                                      KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::LogicalNot>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp
deleted file mode 100644 (file)
index a416cb4..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LogicalOr.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
-                                                     KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::LogicalOr>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp
deleted file mode 100644 (file)
index 4a69dee..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Logistic.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
-                                                    KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Logistic>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
deleted file mode 100644 (file)
index f66a206..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/MaxPool2D.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
-                                                     KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->value());
-  Tensor *output = helper.getOutputTensor(node);
-
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::MaxPool2D>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp
deleted file mode 100644 (file)
index d0bff77..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Maximum.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
-                                                   KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Maximum>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp
deleted file mode 100644 (file)
index 0dec63e..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Mean.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
-  Tensor *output = helper.getOutputTensor(node);
-
-  auto temp_index_unique =
-    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
-  temp_index_unique->set_observable(false);
-  temp_index_unique->set_data_buffer(nullptr);
-  Tensor *temp_index =
-    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
-
-  auto resolved_axes_unique =
-    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
-  resolved_axes_unique->set_observable(false);
-  resolved_axes_unique->set_data_buffer(nullptr);
-  Tensor *resolved_axes =
-    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
-
-  auto temp_sum_unique =
-    std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
-  temp_sum_unique->set_observable(false);
-  temp_sum_unique->set_data_buffer(nullptr);
-  Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique));
-
-  ReducerParams params{};
-  params.keep_dims = node->keep_dims();
-
-  return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum,
-                                         params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp
deleted file mode 100644 (file)
index 1a49c10..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Minimum.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
-                                                   KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Minimum>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp
deleted file mode 100644 (file)
index b221b45..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/MirrorPad.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
-                                                     KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *paddings = helper.getInputTensor(node->paddings());
-  Tensor *output = helper.getOutputTensor(node);
-
-  MirrorPadParams params{};
-  params.mode = node->mode();
-
-  return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp
deleted file mode 100644 (file)
index f998485..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Mul.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  MulParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Mul>(input1, input2, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp
deleted file mode 100644 (file)
index 9a9ecf9..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Neg.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Neg>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp
deleted file mode 100644 (file)
index 3916a58..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/NotEqual.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
-                                                    KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *x = helper.getInputTensor(node->x());
-  const Tensor *y = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::NotEqual>(x, y, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp
deleted file mode 100644 (file)
index a401609..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/OneHot.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
-                                                  KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
-  assert(node->arity() == 4);
-
-  const Tensor *indices = helper.getInputTensor(node->indices());
-  const Tensor *depth = helper.getInputTensor(node->depth());
-  const Tensor *on_value = helper.getInputTensor(node->on_value());
-  const Tensor *off_value = helper.getInputTensor(node->off_value());
-  Tensor *output = helper.getOutputTensor(node);
-
-  OneHotParams params{};
-  params.axis = node->axis();
-
-  return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp
deleted file mode 100644 (file)
index f3d700c..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/PRelu.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *alpha = helper.getInputTensor(node->alpha());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::PRelu>(input, alpha, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp
deleted file mode 100644 (file)
index efc5850..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Pack.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node);
-  assert(node->arity() == node->values_count());
-
-  std::vector<const Tensor *> inputs(node->values_count());
-  for (uint32_t i = 0; i < node->values_count(); ++i)
-  {
-    inputs[i] = helper.getInputTensor(node->values(i));
-  }
-  Tensor *output = helper.getOutputTensor(node);
-
-  PackParams params{};
-  params.axis = node->axis();
-  params.values_count = node->values_count();
-
-  return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp
deleted file mode 100644 (file)
index 67ce997..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Pad.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *paddings = helper.getInputTensor(node->paddings());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Pad>(input, paddings, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp
deleted file mode 100644 (file)
index e378a97..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/PadV2.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node);
-  assert(node->arity() == 3);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *paddings = helper.getInputTensor(node->paddings());
-  const Tensor *constant_values = helper.getInputTensor(node->constant_values());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp
deleted file mode 100644 (file)
index d32fc3d..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Pow.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Pow>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp
deleted file mode 100644 (file)
index cb36fb6..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Quantize.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
-                                                    KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Quantize>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp
deleted file mode 100644 (file)
index 1d64c1c..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Relu.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->features());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Relu>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp
deleted file mode 100644 (file)
index e50cd25..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Relu6.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->features());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Relu6>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp
deleted file mode 100644 (file)
index 76ddd88..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Reshape.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
-                                                   KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->tensor());
-  const Tensor *shape = helper.getInputTensor(node->shape());
-  Tensor *output = helper.getOutputTensor(node);
-
-  // NOTE 'newShape' attribute is ignored.
-  return std::make_unique<kernels::Reshape>(input, shape, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
deleted file mode 100644 (file)
index dc2b88a..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ResizeBilinear.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
-                                                          KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *size = helper.getInputTensor(node->size());
-  Tensor *output = helper.getOutputTensor(node);
-
-  ResizeBilinearParams params{};
-  params.align_corners = node->align_corners();
-  params.half_pixel_centers = node->half_pixel_centers();
-
-  return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
deleted file mode 100644 (file)
index c7058ae..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ResizeNearestNeighbor.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel>
-build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
-                                         KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *size = helper.getInputTensor(node->size());
-  Tensor *output = helper.getOutputTensor(node);
-
-  ResizeNearestNeighborParams params{};
-  params.align_corners = node->align_corners();
-  // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
-  // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
-  // default value on current is false.
-  // it need to be updated when CircleResizeNearestNeighbor updated.
-  params.half_pixel_centers = false;
-
-  return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp
deleted file mode 100644 (file)
index c1a7f53..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ReverseV2.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
-                                                     KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->tensor());
-  const Tensor *axes = helper.getInputTensor(node->axis());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::ReverseV2>(input, axes, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp
deleted file mode 100644 (file)
index 0714a5d..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Rsqrt.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Rsqrt>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp
deleted file mode 100644 (file)
index d172ef4..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SVDF.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node);
-  assert(node->arity() == 5);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *feature = helper.getInputTensor(node->weight_feature());
-  const Tensor *time = helper.getInputTensor(node->weight_time());
-  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
-  const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
-  Tensor *output = helper.getOutputTensor(node);
-
-  auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
-                                                    Shape({}), AffineQuantization{}, "");
-  scratchpad_tensor->set_observable(false);
-  scratchpad_tensor->set_data_buffer(nullptr);
-  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
-  DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
-
-  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
-  scratchpad_tensor->set_observable(false);
-  scratchpad_tensor->set_data_buffer(nullptr);
-  Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
-  if (data_type == DataType::FLOAT32 &&
-      (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
-  {
-    data_type = feature->element_type();
-  }
-
-  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
-  scratchpad_tensor->set_observable(false);
-  scratchpad_tensor->set_data_buffer(nullptr);
-  Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
-  data_type = DataType::FLOAT32;
-
-  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
-  scratchpad_tensor->set_observable(false);
-  scratchpad_tensor->set_data_buffer(nullptr);
-  Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
-  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
-  scratchpad_tensor->set_observable(false);
-  scratchpad_tensor->set_data_buffer(nullptr);
-  Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
-  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
-  scratchpad_tensor->set_observable(false);
-  scratchpad_tensor->set_data_buffer(nullptr);
-  Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
-  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
-  scratchpad_tensor->set_observable(false);
-  scratchpad_tensor->set_data_buffer(nullptr);
-  Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
-  SVDFParams params{};
-  params.activation = node->fusedActivationFunction();
-  params.svdf_rank = node->svdf_rank();
-  params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
-
-  return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
-                                         tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp
deleted file mode 100644 (file)
index d1edbc7..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Shape.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node);
-  assert(node->arity() == 1);
-
-  const auto input = helper.getInputTensor(node->input());
-  auto output = helper.getOutputTensor(node);
-
-  ShapeParams shape_params{};
-  shape_params.out_type = node->out_type();
-
-  return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp
deleted file mode 100644 (file)
index 60ac641..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Slice.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node);
-  assert(node->arity() == 3);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *begin = helper.getInputTensor(node->begin());
-  const Tensor *size = helper.getInputTensor(node->size());
-
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Slice>(input, begin, size, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp
deleted file mode 100644 (file)
index f41f63f..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Softmax.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
-                                                   KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->logits());
-  Tensor *output = helper.getOutputTensor(node);
-
-  SoftmaxParams params{};
-  params.beta = node->beta();
-
-  return std::make_unique<kernels::Softmax>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
deleted file mode 100644 (file)
index b6e6cf5..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SpaceToBatchND.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
-                                                          KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node);
-  assert(node->arity() == 3);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
-  const Tensor *paddings = helper.getInputTensor(node->paddings());
-
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
deleted file mode 100644 (file)
index 63fdb95..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SpaceToDepth.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
-                                                        KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node);
-  assert(node->arity() == 1);
-  const Tensor *input = helper.getInputTensor(node->input());
-
-  Tensor *output = helper.getOutputTensor(node);
-
-  SpaceToDepthParams params{};
-  params.block_size = node->block_size();
-
-  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp
deleted file mode 100644 (file)
index 3f6d4a7..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Split.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node);
-  auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
-  assert(node->arity() == 2);
-  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
-
-  const Tensor *axis = helper.getInputTensor(node->split_dim());
-  const Tensor *input = helper.getInputTensor(node->input());
-  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
-  // NOTE 'num_splits' attribute is ignored.
-  return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp
deleted file mode 100644 (file)
index 0788822..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SplitV.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
-                                                  KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node);
-  auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
-  assert(node->arity() == 3);
-  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *sizes_data = helper.getInputTensor(node->size_splits());
-  const Tensor *axis = helper.getInputTensor(node->split_dim());
-  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
-  // NOTE 'num_splits' attribute is ignored.
-  return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs));
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp
deleted file mode 100644 (file)
index b9843fe..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Sqrt.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Sqrt>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp
deleted file mode 100644 (file)
index 0ad7c17..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Square.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
-                                                  KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Square>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
deleted file mode 100644 (file)
index e4c6fd8..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SquaredDifference.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
-                                                             KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp
deleted file mode 100644 (file)
index 6885f80..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Squeeze.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
-                                                   KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  Tensor *output = helper.getOutputTensor(node);
-
-  SqueezeParams params{};
-  params.squeeze_dims = node->squeeze_dims();
-
-  return std::make_unique<kernels::Squeeze>(input, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp
deleted file mode 100644 (file)
index 359b4e3..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/StridedSlice.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
-                                                        KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node);
-  assert(node->arity() == 4);
-
-  const Tensor *input = helper.getInputTensor(node->input());
-  const Tensor *begin = helper.getInputTensor(node->begin());
-  const Tensor *end = helper.getInputTensor(node->end());
-  const Tensor *strides = helper.getInputTensor(node->strides());
-
-  Tensor *output = helper.getOutputTensor(node);
-
-  StridedSliceParams params{};
-  params.begin_mask = node->begin_mask();
-  params.ellipsis_mask = node->ellipsis_mask();
-  params.end_mask = node->end_mask();
-  params.new_axis_mask = node->new_axis_mask();
-  params.shrink_axis_mask = node->shrink_axis_mask();
-
-  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp
deleted file mode 100644 (file)
index a6252cb..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Sub.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
-                                               KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = helper.getInputTensor(node->x());
-  const Tensor *input2 = helper.getInputTensor(node->y());
-  Tensor *output = helper.getOutputTensor(node);
-
-  SubParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Sub>(input1, input2, output, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp
deleted file mode 100644 (file)
index a58ef60..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Tanh.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
-                                                KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node);
-  assert(node->arity() == 1);
-
-  const Tensor *input = helper.getInputTensor(node->x());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Tanh>(input, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp
deleted file mode 100644 (file)
index ea17d83..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Transpose.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
-                                                     KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node);
-  assert(node->arity() == 2);
-
-  const Tensor *input = helper.getInputTensor(node->a());
-  const Tensor *perm = helper.getInputTensor(node->perm());
-  Tensor *output = helper.getOutputTensor(node);
-
-  return std::make_unique<kernels::Transpose>(input, perm, output);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp
deleted file mode 100644 (file)
index d773e30..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/TransposeConv.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
-                                                         KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node);
-  assert(node->arity() == 4);
-
-  const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
-  const Tensor *filter = helper.getInputTensor(node->filter());
-  const Tensor *out_backprop = helper.getInputTensor(node->outBackprop());
-  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
-
-  Tensor *output = helper.getOutputTensor(node);
-
-  DataType scratch_data_type =
-    helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
-
-  auto scratch_tensor =
-    std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, "");
-  scratch_tensor->set_observable(false);
-  scratch_tensor->set_data_buffer(nullptr);
-  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor));
-
-  TransposeConvParams params{};
-  params.padding = node->padding();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-
-  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
-                                                  tmp, params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp
deleted file mode 100644 (file)
index a1c0d32..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Unpack.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
-                                                  KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node);
-  auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
-  assert(node->arity() == 1);
-  assert(output_nodes.size() == static_cast<size_t>(node->num()));
-
-  const Tensor *input = helper.getInputTensor(node->value());
-  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
-  UnpackParams params{};
-  params.axis = node->axis();
-
-  // NOTE 'num' attribute is ignored.
-  return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp
deleted file mode 100644 (file)
index 8fde6ec..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/While.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
-                                                 KernelBuilderHelper &helper)
-{
-  const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node);
-
-  auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
-  assert(node->arity() == node->input_count());
-  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
-  std::vector<const Tensor *> inputs(node->input_count());
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-  {
-    inputs[i] = helper.getInputTensor(node->input(i));
-  }
-  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
-  RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph());
-  RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph());
-
-  return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
-                                          body_graph);
-}
-
-} // namespace luci_interpreter
diff --git a/compiler/luci-micro/requires.cmake b/compiler/luci-micro/requires.cmake
deleted file mode 100644 (file)
index 5913aa9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-require(luci-interpreter)
diff --git a/compiler/luci-micro/standalone/CMakeLists.txt b/compiler/luci-micro/standalone/CMakeLists.txt
deleted file mode 100644 (file)
index d304826..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-cmake_minimum_required(VERSION 3.10)
-project(luci_interpreter_micro_standalone)
-
-# Add fake target, so nothing is build
-set(BUILD_WHITELIST "dummy")
-
-add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc)
-
-set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler")
-nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
-
-include_directories(${FlatBuffersSource_DIR}/include)
-
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes-std ${CMAKE_CURRENT_BINARY_DIR}/hermes-std)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes ${CMAKE_CURRENT_BINARY_DIR}/hermes)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-strcast ${CMAKE_CURRENT_BINARY_DIR}/pepper-strcast)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/foder ${CMAKE_CURRENT_BINARY_DIR}/foder)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/mio-circle04 ${CMAKE_CURRENT_BINARY_DIR}/mio-circle04)
-
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/import ${CMAKE_CURRENT_BINARY_DIR}/luci/import)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/profile ${CMAKE_CURRENT_BINARY_DIR}/luci/profile)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/env ${CMAKE_CURRENT_BINARY_DIR}/luci/env)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/plan ${CMAKE_CURRENT_BINARY_DIR}/luci/plan)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/log ${CMAKE_CURRENT_BINARY_DIR}/luci/log)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/logex ${CMAKE_CURRENT_BINARY_DIR}/luci/logex)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/locop ${CMAKE_CURRENT_BINARY_DIR}/locop)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/pp ${CMAKE_CURRENT_BINARY_DIR}/pp)
-
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
index 3489f1e..c86d2cd 100644 (file)
@@ -53,3 +53,14 @@ add_test(NAME luci_pass_value_test
           "$<TARGET_FILE:luci_eval_driver>"
           ${LUCI_PASS_VALUE_TESTS}
 )
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  add_test(NAME luci_pass_value_210_test
+    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
+            "${CMAKE_CURRENT_BINARY_DIR}"
+            "${ARTIFACTS_BIN_PATH}"
+            "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+            "$<TARGET_FILE:luci_eval_driver>"
+            ${LUCI_PASS_VALUE_TESTS}
+  )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
index cdff159..b0b938c 100644 (file)
@@ -13,11 +13,13 @@ addeval(Net_Conv_Add_Mul_001 fuse_batchnorm_with_conv)
 addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv)
 addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6)
 addeval(Net_Conv_Min_Relu_000 transform_min_relu_to_relu6)
+addeval(Net_Conv_PReluGraph_000 fuse_prelu)
 addeval(Net_Conv_Relu6_000 fuse_activation_function)
 addeval(Net_Densify_Add_000 fold_densify)
 addeval(Net_Dequantize_Add_000 fold_dequantize)
 addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv)
 addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv)
+addeval(Net_FullyConnected_Add_000 fold_fully_connected)
 addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
 addeval(Net_Reshape_Reshape_000 remove_redundant_reshape)
 addeval(Net_Squeeze_Squeeze_000 substitute_squeeze_to_reshape)
@@ -29,11 +31,16 @@ addeval(Net_TConv_BN_001 fuse_batchnorm_with_tconv)
 addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv)
 addeval(Net_TConv_BN_003 fuse_batchnorm_with_tconv)
 addeval(Net_TConv_BN_004 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_005 fuse_batchnorm_with_tconv)
 addeval(Net_InstanceNorm_001 fuse_instnorm)
 addeval(Net_InstanceNorm_002 fuse_instnorm)
 addeval(Net_InstanceNorm_003 fuse_instnorm)
 addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
 addeval(FullyConnected_007 replace_non_const_fc_with_batch_matmul)
+addeval(Net_Transpose_Add_000 forward_transpose_op)
+addeval(Net_Transpose_Abs_000 forward_transpose_op)
+addeval(UnidirectionalSequenceLSTM_003 unroll_unidirseqlstm)
+addeval(UnidirectionalSequenceLSTM_004 unroll_unidirseqlstm)
 
 # test for limited support for FLOAT16
 addeval(Net_Dequantize_Add_000 fold_dequantize)
index ebf9c59..b55f602 100644 (file)
@@ -45,6 +45,28 @@ if(NOT CMAKE_CROSSCOMPILING)
     )
   endif()
 
+  if(ONE_UBUNTU_CODENAME_JAMMY)
+    add_test(NAME luci_value_210_test
+      COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
+              "${CMAKE_CURRENT_BINARY_DIR}"
+              "${ARTIFACTS_BIN_PATH}"
+              "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+              "$<TARGET_FILE:luci_eval_driver>"
+              ${LUCI_VALUE_TESTS}
+    )
+
+    if(DEFINED LUCI_VALUE_TESTS_TOL)
+      add_test(NAME luci_value_tol_210_test
+        COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol.sh"
+                "${CMAKE_CURRENT_BINARY_DIR}"
+                "${ARTIFACTS_BIN_PATH}"
+                "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+                "$<TARGET_FILE:luci_eval_driver>"
+                ${LUCI_VALUE_TESTS_TOL}
+      )
+    endif()
+  endif(ONE_UBUNTU_CODENAME_JAMMY)
+
 else(NOT CMAKE_CROSSCOMPILING)
   # NOTE target test is carried out using reference input/output data from host
   #      test results. this is because it would be difficult to prepare
index 560e34f..f74b220 100755 (executable)
@@ -24,11 +24,16 @@ circle_model = args.model + ".circle"
 
 rtolf32 = 1e-5
 atolf32 = 1e-5
+# NOTE reuse f32 value as int value too
+rtolint = 0
+atolint = 0
 try:
     if args.rtolf32 != None:
         rtolf32 = float(args.rtolf32)
+        rtolint = int(rtolf32)
     if args.atolf32 != None:
         atolf32 = float(args.atolf32)
+        atolint = int(atolf32)
 except ValueError:
     print("rtolf32 or atolf32 is not a number")
     quit(128)
@@ -117,7 +122,11 @@ for idx in range(len(inpt_output_details)):
     intp_output_data = interpreter.get_tensor(output_tensor)
     try:
         if output_details["dtype"] == np.uint8:
-            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+            if np.allclose(
+                    luci_output_data, intp_output_data, rtol=rtolint,
+                    atol=atolint) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
             output_dtype = "uint8"
@@ -125,26 +134,42 @@ for idx in range(len(inpt_output_details)):
             if np.allclose(
                     luci_output_data, intp_output_data, rtol=rtolf32,
                     atol=atolf32) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
             output_dtype = "float32"
         elif output_details["dtype"] == np.int64:
-            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+            if np.allclose(
+                    luci_output_data, intp_output_data, rtol=rtolint,
+                    atol=atolint) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
             output_dtype = "int64"
         elif output_details["dtype"] == np.int32:
-            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+            if np.allclose(
+                    luci_output_data, intp_output_data, rtol=rtolint,
+                    atol=atolint) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
             output_dtype = "int32"
         elif output_details["dtype"] == np.int16:
-            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+            if np.allclose(
+                    luci_output_data, intp_output_data, rtol=rtolint,
+                    atol=atolint) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
             output_dtype = "int16"
         elif output_details["dtype"] == np.bool_:
             if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                print("intp_output_data", intp_output_data)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + tflite_model +
                                  " does not match with " + circle_model)
             output_dtype = "bool"
index 5313e33..3e4d938 100755 (executable)
@@ -55,11 +55,16 @@ circle_model = args.work_path + ".circle"
 
 rtolf32 = 1e-5
 atolf32 = 1e-5
+# NOTE reuse f32 value as int value too
+rtolint = 0
+atolint = 0
 try:
     if args.rtolf32 != None:
         rtolf32 = float(args.rtolf32)
+        rtolint = int(rtolf32)
     if args.atolf32 != None:
         atolf32 = float(args.atolf32)
+        atolint = int(atolf32)
 except ValueError:
     print("rtolf32 or atolf32 is not a number")
     quit(128)
@@ -113,28 +118,42 @@ for idx in range(num_outputs):
     try:
         if output_dtype == np.uint8:
             if np.allclose(
-                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                    luci_output_data, luci_output_data_ref, rtol=rtolint,
+                    atol=atolint) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + circle_model_ref +
                                  " does not match with " + circle_model)
         elif output_dtype == np.float32:
             if np.allclose(
                     luci_output_data, luci_output_data_ref, rtol=rtolf32,
                     atol=atolf32) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + circle_model_ref +
                                  " does not match with " + circle_model)
         elif output_dtype == np.int64:
             if np.allclose(
-                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                    luci_output_data, luci_output_data_ref, rtol=rtolint,
+                    atol=atolint) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + circle_model_ref +
                                  " does not match with " + circle_model)
         elif output_dtype == np.int32:
             if np.allclose(
-                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                    luci_output_data, luci_output_data_ref, rtol=rtolint,
+                    atol=atolint) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + circle_model_ref +
                                  " does not match with " + circle_model)
         elif output_dtype == np.int16:
             if np.allclose(
-                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                    luci_output_data, luci_output_data_ref, rtol=rtolint,
+                    atol=atolint) == False:
+                print("luci_output_data_ref", luci_output_data_ref)
+                print("luci_output_data", luci_output_data)
                 raise SystemExit("Execution result of " + circle_model_ref +
                                  " does not match with " + circle_model)
         elif output_dtype == np.bool_:
index 932da95..70c3fb1 100644 (file)
@@ -174,17 +174,20 @@ addeval(Tanh_000)
 #addeval(TopKV2_001)
 addeval(Transpose_000)
 addeval(TransposeConv_000)
+addeval(UnidirectionalSequenceLSTM_002)
+addeval(UnidirectionalSequenceLSTM_003)
+addeval(UnidirectionalSequenceLSTM_004)
 addeval(Unpack_000)
 addeval(Unpack_001)
 addeval(Unpack_002)
 addeval(Unpack_003)
+addeval(UnidirectionalSequenceLSTM_002)
 #addeval(Where_000)
 #addeval(Where_001)
 #addeval(While_000)
 #addeval(While_001)
 #addeval(While_002)
 #addeval(While_003)
-addeval(YUV_TO_RGB_U8_000)
 #addeval(ZerosLike_000)
 
 # Simple Network test
@@ -194,3 +197,5 @@ addeval(Part_While_001)
 # Tests with tolerance
 addevaltol(SVDF_000 8e-3 8e-3)
 addevaltol(SVDF_001 8e-3 8e-3)
+# refer https://github.com/Samsung/ONE/issues/10438
+addevaltol(YUV_TO_RGB_U8_000 1 1)
index 59a08b5..4634be7 100644 (file)
@@ -47,9 +47,6 @@ CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext
     {
       // If there is no tensor, insert CircleOutputExclude.
       auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
-      // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
-      // a dummy type is inserted.
-      node->dtype(loco::DataType::FLOAT32);
       input_nodes.push_back(node);
     }
   }
index 4df8d1e..7bcb57a 100644 (file)
@@ -48,9 +48,6 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
     {
       // If there is no tensor, insert CircleOutputExclude.
       auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
-      // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
-      // a dummy type is inserted.
-      node->dtype(loco::DataType::FLOAT32);
       input_nodes.push_back(node);
     }
   }
index f8bdcff..c04b957 100644 (file)
@@ -39,40 +39,51 @@ CircleNode *CircleBidirectionalSequenceLSTMGraphBuilder::build_node(const BuildN
   auto *node = bna.context->graph()->nodes()->create<CircleBidirectionalSequenceLSTM>();
   auto &inputs = bna.input_nodes;
   node->input(inputs.at(0));
+
   node->fw_input_to_input_weights(inputs.at(1)); // Optional
   node->fw_input_to_cell_weights(inputs.at(2));
   node->fw_input_to_forget_weights(inputs.at(3));
   node->fw_input_to_output_weights(inputs.at(4));
+
   node->fw_recurrent_to_input_weights(inputs.at(5)); // Optional
   node->fw_recurrent_to_cell_weights(inputs.at(6));
   node->fw_recurrent_to_forget_weights(inputs.at(7));
   node->fw_recurrent_to_output_weights(inputs.at(8));
+
   node->fw_cell_to_input_weights(inputs.at(9));   // Optional
   node->fw_cell_to_forget_weights(inputs.at(10)); // Optional
   node->fw_cell_to_output_weights(inputs.at(11)); // Optional
-  node->fw_input_gate_bias(inputs.at(12));        // Optional
+
+  node->fw_input_gate_bias(inputs.at(12)); // Optional
   node->fw_forget_gate_bias(inputs.at(13));
   node->fw_cell_gate_bias(inputs.at(14));
   node->fw_output_gate_bias(inputs.at(15));
-  node->fw_projection_weights(inputs.at(16));     // Optional
-  node->fw_projection_bias(inputs.at(17));        // Optional
+
+  node->fw_projection_weights(inputs.at(16)); // Optional
+  node->fw_projection_bias(inputs.at(17));    // Optional
+
   node->bw_input_to_input_weights(inputs.at(18)); // Optional
   node->bw_input_to_cell_weights(inputs.at(19));
   node->bw_input_to_forget_weights(inputs.at(20));
   node->bw_input_to_output_weights(inputs.at(21));
+
   node->bw_recurrent_to_input_weights(inputs.at(22)); // Optional
   node->bw_recurrent_to_cell_weights(inputs.at(23));
   node->bw_recurrent_to_forget_weights(inputs.at(24));
   node->bw_recurrent_to_output_weights(inputs.at(25));
+
   node->bw_cell_to_input_weights(inputs.at(26));  // Optional
   node->bw_cell_to_forget_weights(inputs.at(27)); // Optional
   node->bw_cell_to_output_weights(inputs.at(28)); // Optional
-  node->bw_input_gate_bias(inputs.at(29));        // Optional
+
+  node->bw_input_gate_bias(inputs.at(29)); // Optional
   node->bw_forget_gate_bias(inputs.at(30));
   node->bw_cell_gate_bias(inputs.at(31));
   node->bw_output_gate_bias(inputs.at(32));
+
   node->bw_projection_weights(inputs.at(33)); // Optional
   node->bw_projection_bias(inputs.at(34));    // Optional
+
   node->fw_activation_state(inputs.at(35));
   node->fw_cell_state(inputs.at(36));
   node->bw_activation_state(inputs.at(37));
@@ -83,6 +94,7 @@ CircleNode *CircleBidirectionalSequenceLSTMGraphBuilder::build_node(const BuildN
   node->fw_auxillary_input_to_forget_weights(inputs.at(41)); // Optional
   node->fw_auxillary_input_to_cell_weights(inputs.at(42));   // Optional
   node->fw_auxillary_input_to_output_weights(inputs.at(43)); // Optional
+
   node->bw_auxillary_input_to_input_weights(inputs.at(44));  // Optional
   node->bw_auxillary_input_to_forget_weights(inputs.at(45)); // Optional
   node->bw_auxillary_input_to_cell_weights(inputs.at(46));   // Optional
index 83a0251..ef57a13 100644 (file)
@@ -43,9 +43,6 @@ CircleNode *CircleSVDFBuilder::build_node(const circle::OperatorT &op,
   if (inputs.size() == 4)
   {
     auto *bias = graph->nodes()->create<CircleOutputExclude>();
-    // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
-    // a dummy type is inserted.
-    bias->dtype(inputs.at(0)->dtype());
     node->bias(bias);
 
     node->input_activation_state(inputs.at(3));
index 041983d..01a28cb 100644 (file)
@@ -65,9 +65,6 @@ CircleNode *CircleTransposeConvGraphBuilder::build_node(const circle::OperatorT
   if (inputs.size() == 3)
   {
     auto *bias = graph->nodes()->create<CircleOutputExclude>();
-    // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
-    // a dummy type is inserted.
-    bias->dtype(loco::DataType::FLOAT32);
     node->bias(bias);
   }
   else
index d9cc3f8..7ab6d68 100644 (file)
@@ -34,24 +34,30 @@ CircleNode *CircleUnidirectionalSequenceLSTMGraphBuilder::build_node(
   auto *node = graph->nodes()->create<CircleUnidirectionalSequenceLSTM>();
   node->input(inputs.at(0));
   node->input_to_input_weights(inputs.at(1)); // Optional
-  node->input_to_cell_weights(inputs.at(2));
-  node->input_to_forget_weights(inputs.at(3));
+  node->input_to_forget_weights(inputs.at(2));
+  node->input_to_cell_weights(inputs.at(3));
   node->input_to_output_weights(inputs.at(4));
+
   node->recurrent_to_input_weights(inputs.at(5)); // Optional
-  node->recurrent_to_cell_weights(inputs.at(6));
-  node->recurrent_to_forget_weights(inputs.at(7));
+  node->recurrent_to_forget_weights(inputs.at(6));
+  node->recurrent_to_cell_weights(inputs.at(7));
   node->recurrent_to_output_weights(inputs.at(8));
+
   node->cell_to_input_weights(inputs.at(9));   // Optional
   node->cell_to_forget_weights(inputs.at(10)); // Optional
   node->cell_to_output_weights(inputs.at(11)); // Optional
-  node->input_gate_bias(inputs.at(12));        // Optional
+
+  node->input_gate_bias(inputs.at(12)); // Optional
   node->forget_gate_bias(inputs.at(13));
   node->cell_gate_bias(inputs.at(14));
   node->output_gate_bias(inputs.at(15));
+
   node->projection_weights(inputs.at(16)); // Optional
   node->projection_bias(inputs.at(17));    // Optional
-  node->activation_state(inputs.at(18));
+
+  node->output_state(inputs.at(18));
   node->cell_state(inputs.at(19));
+
   node->input_layer_norm_coefficients(inputs.at(20));  // Optional
   node->forget_layer_norm_coefficients(inputs.at(21)); // Optional
   node->cell_layer_norm_coefficients(inputs.at(22));   // Optional
index faf0ec9..7b9e445 100644 (file)
@@ -76,8 +76,8 @@ public:
   loco::Node *projection_bias(void) const { return at(17)->node(); }
   void projection_bias(loco::Node *node) { at(17)->node(node); }
 
-  loco::Node *activation_state(void) const { return at(18)->node(); }
-  void activation_state(loco::Node *node) { at(18)->node(node); }
+  loco::Node *output_state(void) const { return at(18)->node(); }
+  void output_state(loco::Node *node) { at(18)->node(node); }
   loco::Node *cell_state(void) const { return at(19)->node(); }
   void cell_state(loco::Node *node) { at(19)->node(node); }
 
index 6b00d6f..2b10930 100644 (file)
@@ -52,7 +52,7 @@ TEST(CircleUnidirectionalSequenceLSTMTest, constructor_P)
   ASSERT_EQ(nullptr, trc_node.projection_weights());
   ASSERT_EQ(nullptr, trc_node.projection_bias());
 
-  ASSERT_EQ(nullptr, trc_node.activation_state());
+  ASSERT_EQ(nullptr, trc_node.output_state());
   ASSERT_EQ(nullptr, trc_node.cell_state());
 
   ASSERT_EQ(nullptr, trc_node.input_layer_norm_coefficients());
index 0cc45e8..27049be 100644 (file)
@@ -79,28 +79,6 @@ void LoggerConfig::configure(const hermes::Source *source, hermes::Source::Setti
 
 void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const
 {
-  // TODO remove deprecated codes
-#if 0
-  setting.filter(hermes::SeverityCategory::FATAL).reject_all();
-  setting.filter(hermes::SeverityCategory::ERROR).reject_all();
-  setting.filter(hermes::SeverityCategory::WARN).reject_all();
-  setting.filter(hermes::SeverityCategory::INFO).reject_all();
-  setting.filter(hermes::SeverityCategory::VERBOSE).reject_all();
-
-  // TODO enable FATAL and ERROR
-  if (_show_warn)
-  {
-    setting.filter(hermes::SeverityCategory::WARN).accept_all();
-  }
-  if (_show_info)
-  {
-    setting.filter(hermes::SeverityCategory::INFO).accept_all();
-  }
-  if (_show_verbose)
-  {
-    setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose);
-  }
-#endif
   setting.reject_all();
   setting.filter(hermes::SeverityCategory::FATAL).accept_upto(_show_verbose);
   setting.filter(hermes::SeverityCategory::ERROR).accept_upto(_show_verbose);
index 48e4579..42f11be 100644 (file)
@@ -63,6 +63,10 @@ std::string to_str(loco::DataType type)
   }
 }
 
+std::string to_str(float value) { return std::to_string(value); }
+
+std::string to_str(int32_t value) { return std::to_string(value); }
+
 std::string to_str(bool value) { return value ? "true" : "false"; }
 
 std::string to_str(luci::FusedActFunc fused)
@@ -1047,7 +1051,7 @@ CircleUnidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::Circ
           "output_gate_bias",
           "projection_weights",
           "projection_bias",
-          "activation_state",
+          "output_state",
           "cell_state",
           "input_layer_norm_coefficients",
           "forget_layer_norm_coefficients",
index 3323014..e8c834b 100644 (file)
@@ -61,8 +61,7 @@ void connect(luci::ConnectNode *cn, const luci::CircleUnidirectionalSequenceLSTM
     loco::must_cast<luci::CircleNode *>(node->projection_weights());
   luci::CircleNode *projection_bias = loco::must_cast<luci::CircleNode *>(node->projection_bias());
 
-  luci::CircleNode *activation_state =
-    loco::must_cast<luci::CircleNode *>(node->activation_state());
+  luci::CircleNode *output_state = loco::must_cast<luci::CircleNode *>(node->output_state());
   luci::CircleNode *cell_state = loco::must_cast<luci::CircleNode *>(node->cell_state());
 
   luci::CircleNode *input_layer_norm_coefficients =
@@ -98,7 +97,7 @@ void connect(luci::ConnectNode *cn, const luci::CircleUnidirectionalSequenceLSTM
   cloned->projection_weights(cn->find_clone(projection_weights));
   cloned->projection_bias(cn->find_clone(projection_bias));
 
-  cloned->activation_state(cn->find_clone(activation_state));
+  cloned->output_state(cn->find_clone(output_state));
   cloned->cell_state(cn->find_clone(cell_state));
 
   cloned->input_layer_norm_coefficients(cn->find_clone(input_layer_norm_coefficients));
index 2630461..6472b58 100644 (file)
@@ -79,7 +79,7 @@ public:
     node()->projection_weights(input(16));
     node()->projection_bias(input(17));
 
-    node()->activation_state(input(18));
+    node()->output_state(input(18));
     node()->cell_state(input(19));
 
     node()->input_layer_norm_coefficients(input(20));
index 251dbea..9912305 100644 (file)
@@ -22,6 +22,8 @@
 
 #include <loco.h>
 
+#include <cassert>
+
 namespace
 {
 
@@ -88,11 +90,14 @@ std::unique_ptr<loco::Graph> clone_graph(loco::Graph *graph_org, luci::CloneCont
 {
   auto graph = loco::make_graph();
   auto graph_clone = graph.get();
+  auto &graph_name = graph_org->name();
 
-  graph_clone->name(graph_org->name());
+  graph_clone->name(graph_name);
 
   // clone inputs
-  for (uint32_t n = 0; n < graph_org->inputs()->size(); ++n)
+  auto inputs = graph_org->inputs();
+  assert(inputs);
+  for (uint32_t n = 0; n < inputs->size(); ++n)
   {
     auto input_org = luci::input_node(graph_org, n);
     assert(input_org != nullptr);
index d9d004d..ac18a5f 100644 (file)
@@ -31,7 +31,7 @@ target_link_libraries(luci_pass PRIVATE luci_log)
 target_link_libraries(luci_pass PRIVATE luci_service)
 target_link_libraries(luci_pass PRIVATE luci_logex)
 target_link_libraries(luci_pass PRIVATE luci_profile)
-target_link_libraries(luci_pass PRIVATE mio_tflite280_inc)
+target_link_libraries(luci_pass PRIVATE luci_compute)
 target_link_libraries(luci_pass PRIVATE nncc_common)
 target_link_libraries(luci_pass PRIVATE pepper_csv2vec)
 target_link_libraries(luci_pass PRIVATE oops)
index b94822c..d77e89d 100644 (file)
@@ -52,14 +52,17 @@ public:
       FoldCast,
       FoldDensify,
       FoldDepthwiseConv2D,
+      FoldFullyConnected,
       FoldDequantize,
       FoldGather,
       FoldSparseToDense,
       ForwardReshapeToUnaryOp,
+      ForwardTransposeOp,
       SparsifyTensorPass,
       FusePreActivationBatchNorm,
       MakeBatchNormGammaPositive,
       FuseActivationFunction,
+      FusePRelu,
       ShuffleWeightTo16x1Float32,
       RemoveRedundantTranspose,
       ReplaceMulAddWithDepthwiseConv,
@@ -83,6 +86,8 @@ public:
       RemoveRedundantReshape,
       RemoveFakeQuant,
       RemoveQuantDequantSeq,
+      RemoveDuplicateConst,
+      UnrollUnidirSeqLSTM,
     };
 
     enum AlgorithmParameters
diff --git a/compiler/luci/pass/include/luci/Pass/FoldFullyConnectedPass.h b/compiler/luci/pass/include/luci/Pass/FoldFullyConnectedPass.h
new file mode 100644 (file)
index 0000000..bd36ff1
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
+#define __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold FullyConnected with constant input and filter into a
+ * constant tensor
+ */
+struct FoldFullyConnectedPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldFullyConnectedPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ForwardTransposeOpPass.h b/compiler/luci/pass/include/luci/Pass/ForwardTransposeOpPass.h
new file mode 100644 (file)
index 0000000..b44b1bd
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
+#define __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Forward Transpose Ops for further optimization.
+ */
+struct ForwardTransposeOpPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ForwardTransposeOpPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FusePReluPass.h b/compiler/luci/pass/include/luci/Pass/FusePReluPass.h
new file mode 100644 (file)
index 0000000..a21acf4
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_PRELU_PASS_H__
+#define __LUCI_FUSE_PRELU_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse certain pattern of subgraph into CirclePRelu
+ *         with auxiliary nodes
+ *
+ * For detailed subgraph pattern to be fused, please check its implementation.
+ */
+struct FusePReluPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FusePReluPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_PRELU_PASS_H__
index ea6db85..6874046 100644 (file)
@@ -39,29 +39,12 @@ public:
     loco::DataType input_model_dtype = loco::DataType::Unknown;
     loco::DataType output_model_dtype = loco::DataType::Unknown;
     QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
-    loco::DataType input_type = loco::DataType::Unknown;
-    loco::DataType output_type = loco::DataType::Unknown;
+    std::vector<loco::DataType> input_types;
+    std::vector<loco::DataType> output_types;
     bool TF_style_maxpool = false;
     std::vector<LayerInfo> layers_info;
   };
 
-  // For backward-compatibility
-  // TODO Remove this constructor
-public:
-  QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
-                         QuantizationGranularity granularity)
-  {
-    _ctx = std::make_unique<Context>();
-    {
-      _ctx->input_model_dtype = input_model_dtype;
-      _ctx->output_model_dtype = output_model_dtype;
-      _ctx->granularity = granularity;
-      _ctx->input_type = output_model_dtype;
-      _ctx->output_type = output_model_dtype;
-      _ctx->TF_style_maxpool = false;
-    }
-  }
-
 public:
   QuantizeWithMinMaxPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
   {
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveDuplicateConstPass.h b/compiler/luci/pass/include/luci/Pass/RemoveDuplicateConstPass.h
new file mode 100644 (file)
index 0000000..000cdcc
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
+#define __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove duplicate Const nodes.
+ */
+struct RemoveDuplicateConstPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveDuplicateConstPass"; }
+
+  bool run(loco::Graph *g) final;
+
+private:
+  bool remove_duplicate_const();
+
+  template <loco::DataType DT> void add_to_map(luci::CircleConst *const_node);
+
+  std::map<float, std::vector<CircleConst *>> _sum_to_const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h b/compiler/luci/pass/include/luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h
new file mode 100644 (file)
index 0000000..fd5a708
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
+#define __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Unroll UnidirectionalSequenceLSTM
+ */
+struct UnrollUnidirectionalSequenceLSTMPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::UnrollUnidirectionalSequenceLSTMPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
index 74c569d..5e1613a 100644 (file)
 #include "luci/Pass/FoldDensifyPass.h"
 #include "luci/Pass/FoldDepthwiseConv2DPass.h"
 #include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/FoldFullyConnectedPass.h"
 #include "luci/Pass/FoldGatherPass.h"
 #include "luci/Pass/FoldSparseToDensePass.h"
 #include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+#include "luci/Pass/ForwardTransposeOpPass.h"
 #include "luci/Pass/FuseActivationFunctionPass.h"
 #include "luci/Pass/FuseAddWithFullyConnectedPass.h"
 #include "luci/Pass/FuseAddWithTConvPass.h"
 #include "luci/Pass/FuseInstanceNormPass.h"
 #include "luci/Pass/FuseMeanWithMeanPass.h"
 #include "luci/Pass/FusePreActivationBatchNormPass.h"
+#include "luci/Pass/FusePReluPass.h"
 #include "luci/Pass/FuseTransposeWithMeanPass.h"
 #include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+#include "luci/Pass/RemoveDuplicateConstPass.h"
 #include "luci/Pass/RemoveFakeQuantPass.h"
 #include "luci/Pass/RemoveQuantDequantSeqPass.h"
 #include "luci/Pass/RemoveRedundantReshapePass.h"
@@ -66,6 +70,7 @@
 #include "luci/Pass/SubstituteTransposeToReshapePass.h"
 #include "luci/Pass/TransformMinMaxToRelu6Pass.h"
 #include "luci/Pass/TransformMinReluToRelu6Pass.h"
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
 // TODO add more passes
 
 #include "luci/Pass/CircleShapeInferencePass.h"
@@ -274,6 +279,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<FuseActivationFunctionPass>());
   }
+  if (_options->query(Options::Algorithm::FusePRelu))
+  {
+    phase.emplace_back(std::make_unique<FusePReluPass>());
+  }
   if (_options->query(Options::Algorithm::FuseTransposeWithMean))
   {
     phase.emplace_back(std::make_unique<FuseTransposeWithMeanPass>());
@@ -298,6 +307,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
   }
+  if (_options->query(Options::Algorithm::FoldFullyConnected))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldFullyConnectedPass>());
+  }
   if (_options->query(Options::Algorithm::FoldGather))
   {
     phase.emplace_back(std::make_unique<luci::FoldGatherPass>());
@@ -310,6 +323,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::ForwardReshapeToUnaryOpPass>());
   }
+  if (_options->query(Options::Algorithm::ForwardTransposeOp))
+  {
+    phase.emplace_back(std::make_unique<luci::ForwardTransposeOpPass>());
+  }
   if (_options->query(Options::Algorithm::FusePreActivationBatchNorm))
   {
     phase.emplace_back(std::make_unique<luci::FusePreActivationBatchNormPass>());
@@ -326,6 +343,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::ExpandBroadcastConstPass>());
   }
+  if (_options->query(Options::Algorithm::RemoveDuplicateConst))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveDuplicateConstPass>());
+  }
   if (_options->query(Options::Algorithm::RemoveFakeQuant))
   {
     phase.emplace_back(std::make_unique<luci::RemoveFakeQuantPass>());
@@ -407,6 +428,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::TransformMinReluToRelu6Pass>());
   }
+  if (_options->query(Options::Algorithm::UnrollUnidirSeqLSTM))
+  {
+    phase.emplace_back(std::make_unique<luci::UnrollUnidirectionalSequenceLSTMPass>());
+  }
 
   /* TRANSFORM DECLARATION END */
 
index 9a6550b..3ffa118 100644 (file)
@@ -40,6 +40,7 @@
 
 #include <luci/IR/CircleNode.h>
 #include <logo/Phase.h>
+#include <pepper/csv2vec.h>
 
 #include <memory>
 
@@ -49,6 +50,154 @@ namespace
 using namespace luci;
 using LayerParam = luci::CircleQuantizer::Options::LayerParam;
 
+// This function updates user-given input_type to match with the input signature of graph
+// If user gives only one input_type, it will be expanded to the number of graph inputs
+void canonicalize_input_type(loco::Graph *g, std::vector<loco::DataType> &input_type)
+{
+  if (g == nullptr)
+    return;
+
+  const auto inputs = g->inputs();
+
+  assert(inputs); // FIX_CALLER_UNLESS
+
+  // Check validity of the number of input dtype given by a user
+  if (input_type.size() != 1 and input_type.size() != inputs->size())
+  {
+    throw std::runtime_error(
+      "Invalid number of input dtype. The number of input dtype should be 1 or "
+      "the same as the number of graph inputs.");
+  }
+
+  // Handle the case when a user gives only one input dtype
+  if (input_type.size() == 1)
+  {
+    const auto user_given_dtype = input_type[0];
+    input_type.clear();
+
+    // Expand input dtype to the number of graph inputs
+    // Since quantizer can only quantize float32, user_given_dtype is set only for float32 inputs
+    auto input_nodes = loco::input_nodes(g);
+    for (uint32_t i = 0; i < input_nodes.size(); i++)
+    {
+      auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+
+      if (input->dtype() == loco::DataType::FLOAT32)
+        input_type.push_back(user_given_dtype);
+      else
+        input_type.push_back(input->dtype());
+    }
+  }
+
+  // Finally, check validity of input_type
+  // input_type is valid if
+  // C1. for non-float32 model input, input_type == model's input dtype
+  // or
+  // C2. for float32 model input, input_type == uint8, int16, or float32
+  auto input_nodes = loco::input_nodes(g);
+  for (uint32_t i = 0; i < input_nodes.size(); i++)
+  {
+    auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+    assert(i == input->index()); // FIX_ME_UNLESS
+
+    if (input->dtype() != loco::DataType::FLOAT32)
+    {
+      // C1
+      if (input->dtype() != input_type[i])
+        throw std::runtime_error(
+          "Input dtype of " + input->name() +
+          " is invalid. It has to be the same with the model's input dtype.");
+    }
+    else
+    {
+      // C2
+      if (input_type[i] != loco::DataType::FLOAT32 and input_type[i] != loco::DataType::U8 and
+          input_type[i] != loco::DataType::S16)
+      {
+        throw std::runtime_error("Input dtype of " + input->name() +
+                                 " is invalid. For float32 input, the input dtype after "
+                                 "quantization must be one of uint8, int16, or float32.");
+      }
+    }
+  }
+}
+
+// This function updates user-given output_type to match with the output signature of graph
+// If user gives only one output_type, it will be expanded to the number of graph outputs
+// NOTE This function is almost same with canonicalize_input_type, but it is written as a
+// separate function for more precise error messaging.
+// TODO Find a way to reduce duplicate codes
+void canonicalize_output_type(loco::Graph *g, std::vector<loco::DataType> &output_type)
+{
+  if (g == nullptr)
+    return;
+
+  const auto outputs = g->outputs();
+
+  assert(outputs); // FIX_CALLER_UNLESS
+
+  // Check validity of the number of output dtype given by a user
+  if (output_type.size() != 1 and output_type.size() != outputs->size())
+  {
+    throw std::runtime_error(
+      "Invalid number of output dtype. The number of output dtype should be 1 or "
+      "the same as the number of graph outputs.");
+  }
+
+  // Handle the case when a user gives only one output dtype
+  if (output_type.size() == 1)
+  {
+    const auto user_given_dtype = output_type[0];
+    output_type.clear();
+
+    // Expand output dtype to the number of graph outputs
+    // If dtype of graph output is float32, it will be replaced with user_given_dtype
+    // Otherwise, it will not change
+    auto output_nodes = loco::output_nodes(g);
+    for (uint32_t i = 0; i < output_nodes.size(); i++)
+    {
+      auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+
+      if (output->dtype() == loco::DataType::FLOAT32)
+        output_type.push_back(user_given_dtype);
+      else
+        output_type.push_back(output->dtype());
+    }
+  }
+
+  // Finally, check validity of output_type
+  // output_type is valid if
+  // C1. for non-float32 model output, output_type == model's output dtype
+  // or
+  // C2. for float32 model output, output_type == uint8, int16, or float32
+  auto output_nodes = loco::output_nodes(g);
+  for (uint32_t i = 0; i < output_nodes.size(); i++)
+  {
+    auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+    assert(i == output->index()); // FIX_ME_UNLESS
+
+    if (output->dtype() != loco::DataType::FLOAT32)
+    {
+      // C1
+      if (output->dtype() != output_type[i])
+        throw std::runtime_error(
+          "Output dtype of " + output->name() +
+          " is invalid. It has to be the same with the model's output dtype.");
+    }
+    else
+    {
+      // C2
+      if (output_type[i] != loco::DataType::FLOAT32 and output_type[i] != loco::DataType::U8 and
+          output_type[i] != loco::DataType::S16)
+      {
+        throw std::runtime_error("Output dtype of " + output->name() +
+                                 " is invalid. For float32 output, the output dtype after "
+                                 "quantization must be one of uint8, int16, or float32.");
+      }
+    }
+  }
+}
+
 template <typename T> T lexical_cast(const std::string &str)
 {
   std::istringstream ss;
@@ -253,8 +402,10 @@ void CircleQuantizer::quantize(loco::Graph *g) const
     static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
     static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
     static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
-    static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16", "float32"};
-    static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16", "float32"};
+    static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16",   "int32",
+                                                                    "int64", "float32", "bool"};
+    static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16",   "int32",
+                                                                     "int64", "float32", "bool"};
 
     auto input_model_dtype =
       _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
@@ -268,6 +419,9 @@ void CircleQuantizer::quantize(loco::Graph *g) const
     if (output_type.empty())
       output_type = output_model_dtype;
 
+    auto input_type_vec = pepper::csv_to_vector<std::string>(input_type);
+    auto output_type_vec = pepper::csv_to_vector<std::string>(output_type);
+
     bool TF_style_maxpool =
       _options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) == "True";
 
@@ -285,13 +439,19 @@ void CircleQuantizer::quantize(loco::Graph *g) const
       throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
                                to_string(qwmm_supported_granularity));
 
-    if (!in_array(to_lower_case(input_type), qwmm_supported_input_type))
-      throw std::runtime_error("Unsupported input type. List of supported input types: " +
-                               to_string(qwmm_supported_input_type));
+    for (auto dtype : input_type_vec)
+    {
+      if (!in_array(to_lower_case(dtype), qwmm_supported_input_type))
+        throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                                 to_string(qwmm_supported_input_type));
+    }
 
-    if (!in_array(to_lower_case(output_type), qwmm_supported_output_type))
-      throw std::runtime_error("Unsupported output type. List of supported output types: " +
-                               to_string(qwmm_supported_output_type));
+    for (auto dtype : output_type_vec)
+    {
+      if (!in_array(to_lower_case(dtype), qwmm_supported_output_type))
+        throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                                 to_string(qwmm_supported_output_type));
+    }
 
     if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
         str_to_dtype(output_model_dtype) != loco::DataType::U8)
@@ -314,6 +474,13 @@ void CircleQuantizer::quantize(loco::Graph *g) const
       }
     }
 
+    auto input_types = str_vec_to_dtype_vec(input_type_vec);
+    auto output_types = str_vec_to_dtype_vec(output_type_vec);
+
+    // Canonicalize user-given input/output_type (match with # of inputs/outputs)
+    canonicalize_input_type(g, input_types);
+    canonicalize_output_type(g, output_types);
+
     // Input model checker for quantization
     luci::QuantizePreCheckerPass input_model_checker{};
     input_model_checker.run(g);
@@ -323,8 +490,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const
       ctx->input_model_dtype = str_to_dtype(input_model_dtype);
       ctx->output_model_dtype = str_to_dtype(output_model_dtype);
       ctx->granularity = str_to_granularity(granularity);
-      ctx->input_type = str_to_dtype(input_type);
-      ctx->output_type = str_to_dtype(output_type);
+      ctx->input_types = input_types;
+      ctx->output_types = output_types;
       ctx->TF_style_maxpool = TF_style_maxpool;
 
       for (auto layer_param : layer_params)
@@ -347,8 +514,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const
     {
       verify_ctx->output_model_dtype = str_to_dtype(output_model_dtype);
       verify_ctx->granularity = str_to_granularity(granularity);
-      verify_ctx->input_type = str_to_dtype(input_type);
-      verify_ctx->output_type = str_to_dtype(output_type);
+      verify_ctx->input_types = input_types;
+      verify_ctx->output_types = output_types;
       verify_ctx->TF_style_maxpool = TF_style_maxpool;
 
       for (auto layer_param : layer_params)
index 55a29d1..99e1e29 100644 (file)
@@ -503,43 +503,30 @@ bool is_NCHW(const luci::CirclePadV2 *node)
   return true;
 }
 
-// NOTE Following conditions can be extended later
-// NOTE Used for Maximum, Miminum as ReLU/ReLU6
-//
-// Find T with an NCHW pattern described below
-//   - Input (non-constant) shape : [N, C, H, W]
-//   - Input (constant) shape : [1] or []
-//   - Output shape : [N, C, H, W]
-template <class T>
-bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node,
-                          luci::CircleConst *&comp_const)
+bool is_const(const loco::Node *node)
 {
-  auto x = dynamic_cast<luci::CircleConst *>(node->x());
-  auto y = dynamic_cast<luci::CircleConst *>(node->y());
-
-  if (x != nullptr && y == nullptr)
-  {
-    pred_node = loco::must_cast<luci::CircleNode *>(node->y());
-    comp_const = x;
-  }
-  else if (x == nullptr && y != nullptr)
-  {
-    pred_node = loco::must_cast<luci::CircleNode *>(node->x());
-    comp_const = y;
-  }
-  else
-  {
-    // Ignore if T does not have a comp_const input.
+  if (not dynamic_cast<const luci::CircleConst *>(node))
     return false;
-  }
 
-  if (pred_node->rank() != 4)
+  return true;
+}
+
+bool is_scalar_const(const loco::Node *node)
+{
+  auto const_node = dynamic_cast<const luci::CircleConst *>(node);
+  if (not const_node)
     return false;
 
-  // Check if scalar
-  const auto const_rank = comp_const->rank();
-  if (const_rank == 0 || (const_rank == 1 && comp_const->dim(0).value() == 1))
+  const auto const_rank = const_node->rank();
+  // shape of scalar
+  // 1. rank = 0
+  // 2. rank = 1, dimension = 1
+  if (const_rank == 0)
+    return true;
+
+  if (const_rank == 1 && const_node->dim(0).value() == 1)
     return true;
+
   return false;
 }
 
@@ -854,22 +841,30 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
 
   bool visit(luci::CircleLogistic *node) { return convert_unary_x<luci::CircleLogistic>(node); }
 
-  bool visit(luci::CircleLogSoftmax *node)
-  {
-    return convert_unary_logits<luci::CircleLogSoftmax>(node);
-  }
-
   bool visit(luci::CircleMaximum *node)
   {
-    luci::CircleNode *pred_node = nullptr;
-    luci::CircleConst *comp_constant = nullptr;
-
-    if (is_NCHW_with_s_const<luci::CircleMaximum>(node, pred_node, comp_constant))
+    if ((not is_const(node->x())) and is_scalar_const(node->y()))
     {
       auto pre_trans = create_pre_transpose(node);
-      pre_trans->a(pred_node);
+      pre_trans->a(node->x());
       node->x(pre_trans);
     }
+    else if (is_scalar_const(node->x()) and (not is_const(node->y())))
+    {
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(node->y());
+      node->y(pre_trans);
+    }
+    else if ((not is_const(node->x())) and (not is_const(node->y())))
+    {
+      auto pre_trans_x = create_pre_transpose(node);
+      pre_trans_x->a(node->x());
+      node->x(pre_trans_x);
+
+      auto pre_trans_y = create_pre_transpose(node);
+      pre_trans_y->a(node->y());
+      node->y(pre_trans_y);
+    }
     else
     {
       // TODO support other cases
@@ -963,15 +958,18 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
 
   bool visit(luci::CircleMinimum *node)
   {
-    luci::CircleNode *pred_node = nullptr;
-    luci::CircleConst *comp_constant = nullptr;
-
-    if (is_NCHW_with_s_const<luci::CircleMinimum>(node, pred_node, comp_constant))
+    if ((not is_const(node->x())) and is_scalar_const(node->y()))
     {
       auto pre_trans = create_pre_transpose(node);
-      pre_trans->a(pred_node);
+      pre_trans->a(node->x());
       node->x(pre_trans);
     }
+    else if (is_scalar_const(node->x()) and (not is_const(node->y())))
+    {
+      auto pre_trans = create_pre_transpose(node);
+      pre_trans->a(node->y());
+      node->y(pre_trans);
+    }
     else
     {
       // TODO support other cases
@@ -1168,14 +1166,88 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
     return true;
   }
 
+  // TODO Reduce duplicate codes with CircleReduceMax
+  bool visit(luci::CircleReduceMin *node)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(node->input());
+    if (input->rank() != 4)
+      return false;
+
+    auto rindices = dynamic_cast<luci::CircleConst *>(node->reduction_indices());
+    if (not rindices)
+      return false;
+
+    auto nhwc_rindices = create_NHWC_rindices(rindices);
+    if (not nhwc_rindices)
+      return false;
+
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(input);
+    node->input(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    node->reduction_indices(nhwc_rindices);
+
+    if (node->keep_dims())
+    {
+      auto post_trans = create_post_transpose(node);
+      loco::replace(node).with(post_trans);
+
+      post_trans->a(node);
+
+      return true;
+    }
+
+    // The below codes handle the cases where node->keep_dims() == false
+    // 1D output never needs a transpose
+    if (node->rank() <= 1)
+      return true;
+
+    std::vector<bool> reduced_dims_nhwc(4, false);
+    uint32_t num_reduced_indices = nhwc_rindices->size<loco::DataType::S32>();
+
+    for (uint32_t ri = 0; ri < num_reduced_indices; ++ri)
+    {
+      reduced_dims_nhwc[nhwc_rindices->at<loco::DataType::S32>(ri)] = true;
+    }
+
+    // if channel dimension has been reduced, we don't need a transpose
+    if (reduced_dims_nhwc[3])
+      return true;
+
+    // likewise, if both space dimensions are reduced, no transpose is needed
+    if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2])
+      return true;
+
+    std::vector<int32_t> post_trans_ind;
+    // case 1: only N is reduced
+    if (num_reduced_indices == 1 && reduced_dims_nhwc[0])
+      post_trans_ind = {2, 0, 1};
+
+    // case 2: only H or W is reduced
+    if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2]))
+      post_trans_ind = {0, 2, 1};
+
+    // case 3: N and either H or W are reduced
+    if (num_reduced_indices == 2)
+      post_trans_ind = {1, 0};
+
+    auto post_trans = create_Nd_transpose(node, post_trans_ind);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
   bool visit(luci::CircleRelu *node) { return convert_unary_features<luci::CircleRelu>(node); }
 
   bool visit(luci::CircleRelu6 *node) { return convert_unary_features<luci::CircleRelu6>(node); }
 
   bool visit(luci::CircleRsqrt *node) { return convert_unary_x<luci::CircleRsqrt>(node); }
 
-  bool visit(luci::CircleSoftmax *node) { return convert_unary_logits<luci::CircleSoftmax>(node); }
-
   bool visit(luci::CircleSplitV *node)
   {
     // Change split dimension
@@ -1375,6 +1447,10 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
       collect_intermediate = [&](loco::Node *n) {
         for (auto succ : loco::succs(n))
         {
+          // Skip unnecessary traversal
+          if (intermediate.find(succ) != intermediate.end())
+            continue;
+
           // Exit condition
           if (is_post_transpose(succ) || is_post_reshape(succ))
             continue;
@@ -1429,12 +1505,13 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
           set_data_format(node, DataFormat::NCHW);
         }
         break;
+      // SOFTMAX, LOG_SOFTMAX are not converted, because
+      // tflite/circle assumes the last channel is always axis
       case luci::CircleOpcode::ADD:
       case luci::CircleOpcode::CONCATENATION:
       case luci::CircleOpcode::ELU:
       case luci::CircleOpcode::LEAKY_RELU:
       case luci::CircleOpcode::LOGISTIC:
-      case luci::CircleOpcode::LOG_SOFTMAX:
       case luci::CircleOpcode::MAXIMUM:
       case luci::CircleOpcode::MEAN:
       case luci::CircleOpcode::MINIMUM:
@@ -1443,10 +1520,10 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
       case luci::CircleOpcode::PAD:
       case luci::CircleOpcode::PADV2:
       case luci::CircleOpcode::REDUCE_MAX:
+      case luci::CircleOpcode::REDUCE_MIN:
       case luci::CircleOpcode::RELU:
       case luci::CircleOpcode::RELU6:
       case luci::CircleOpcode::RSQRT:
-      case luci::CircleOpcode::SOFTMAX:
       case luci::CircleOpcode::SPLIT_V:
       case luci::CircleOpcode::SQUARED_DIFFERENCE:
       case luci::CircleOpcode::SUB:
@@ -1487,7 +1564,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
       {
         // TODO replace the check above with the input rank check, and remove the condition below
         if (not dynamic_cast<luci::CircleMean *>(node) and
-            not dynamic_cast<luci::CircleReduceMax *>(node))
+            not dynamic_cast<luci::CircleReduceMax *>(node) and
+            not dynamic_cast<luci::CircleReduceMin *>(node))
           continue;
       }
 
index 6bb3d32..fd32651 100644 (file)
@@ -483,22 +483,6 @@ public:
   luci::CircleLogistic *logistic = nullptr;
 };
 
-class LogSoftmaxGraph final : public SimpleGraph
-{
-protected:
-  loco::Node *insertGraphBody(loco::Node *input) override
-  {
-    log_softmax = g.nodes()->create<luci::CircleLogSoftmax>();
-    log_softmax->logits(input);
-    log_softmax->name("log_softmax");
-
-    return log_softmax;
-  }
-
-public:
-  luci::CircleLogSoftmax *log_softmax = nullptr;
-};
-
 class MaximumGraph final : public SimpleGraph
 {
 protected:
@@ -530,6 +514,27 @@ public:
   luci::CircleConst *limit = nullptr;
 };
 
+class MaximumNonConstGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    max = g.nodes()->create<luci::CircleMaximum>();
+    max->dtype(loco::DataType::FLOAT32);
+    max->shape({1, 16, 4, 4});
+
+    max->x(input);
+    max->y(input);
+
+    max->name("max");
+
+    return max;
+  }
+
+public:
+  luci::CircleMaximum *max = nullptr;
+};
+
 class MeanGraph final : public SimpleGraph
 {
 protected:
@@ -874,6 +879,51 @@ private:
   std::initializer_list<uint32_t> _shape = {1, 16, 1, 1};
 };
 
+class ReduceMinGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    rm = g.nodes()->create<luci::CircleReduceMin>();
+    rindices = g.nodes()->create<luci::CircleConst>();
+
+    rm->dtype(loco::DataType::FLOAT32);
+    rindices->dtype(loco::DataType::S32);
+
+    rm->shape(_shape);
+    rindices->shape({static_cast<uint32_t>(_axes.size())});
+
+    rindices->size<loco::DataType::S32>(_axes.size());
+    for (uint32_t i = 0; i < _axes.size(); ++i)
+    {
+      rindices->at<loco::DataType::S32>(i) = _axes[i];
+    }
+
+    rm->input(input);
+    rm->reduction_indices(rindices);
+    rm->keep_dims(_keep_dims);
+
+    rm->name("reduce_max");
+    rindices->name("rindices");
+
+    return rm;
+  }
+
+public:
+  void keep_dims(bool val) { _keep_dims = val; }
+  void axes(std::vector<int32_t> val) { _axes = val; }
+  void shape(std::initializer_list<uint32_t> val) { _shape = val; }
+
+public:
+  luci::CircleReduceMin *rm = nullptr;
+  luci::CircleConst *rindices = nullptr;
+
+private:
+  bool _keep_dims = true;
+  std::vector<int32_t> _axes = {2, 3};
+  std::initializer_list<uint32_t> _shape = {1, 16, 1, 1};
+};
+
 class ReluGraph final : public SimpleGraph
 {
 protected:
@@ -922,22 +972,6 @@ public:
   luci::CircleRsqrt *rsqrt = nullptr;
 };
 
-class SoftmaxGraph final : public SimpleGraph
-{
-protected:
-  loco::Node *insertGraphBody(loco::Node *input) override
-  {
-    softmax = g.nodes()->create<luci::CircleSoftmax>();
-    softmax->logits(input);
-    softmax->name("softmax");
-
-    return softmax;
-  }
-
-public:
-  luci::CircleSoftmax *softmax = nullptr;
-};
-
 class SplitVGraphlet
 {
 public:
@@ -1357,44 +1391,50 @@ TEST(ConvertNCHWToNHWC, Logistic)
   EXPECT_EQ(16, g.logistic->dim(3).value());
 }
 
-TEST(ConvertNCHWToNHWC, LogSoftmax)
+TEST(ConvertNCHWToNHWC, Maximum)
 {
-  LogSoftmaxGraph g;
+  MaximumGraph g;
   g.init();
 
-  run_phase(&g.g, true, true);
+  run_phase(&g.g, false, false);
+
+  auto input_succs = loco::succs(g.input);
+  EXPECT_EQ(1, input_succs.size());
+  check_post_trans(*input_succs.begin());
 
-  check_pre_trans(g.log_softmax->logits());
+  check_pre_trans(g.max->x());
 
-  auto log_softmax_succs = loco::succs(g.log_softmax);
-  EXPECT_EQ(1, log_softmax_succs.size());
-  check_post_trans(*log_softmax_succs.begin());
+  auto max_succs = loco::succs(g.max);
+  EXPECT_EQ(1, max_succs.size());
+  check_post_trans(*max_succs.begin());
 
-  // Check log_softmax shape
-  EXPECT_EQ(1, g.log_softmax->dim(0).value());
-  EXPECT_EQ(4, g.log_softmax->dim(1).value());
-  EXPECT_EQ(4, g.log_softmax->dim(2).value());
-  EXPECT_EQ(16, g.log_softmax->dim(3).value());
+  check_pre_trans(g.output->from());
 }
 
-TEST(ConvertNCHWToNHWC, Maximum)
+TEST(ConvertNCHWToNHWC, Maximum_non_scalar_NEG)
 {
   MaximumGraph g;
   g.init();
 
-  run_phase(&g.g, false, false);
+  g.limit->shape({3});
 
-  auto input_succs = loco::succs(g.input);
-  EXPECT_EQ(1, input_succs.size());
-  check_post_trans(*input_succs.begin());
+  luci::ConvertNCHWToNHWCPass pass(true, true);
+  EXPECT_FALSE(pass.run(&g.g));
+}
+
+TEST(ConvertNCHWToNHWC, MaximumNonConst)
+{
+  MaximumNonConstGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
 
   check_pre_trans(g.max->x());
+  check_pre_trans(g.max->y());
 
   auto max_succs = loco::succs(g.max);
   EXPECT_EQ(1, max_succs.size());
   check_post_trans(*max_succs.begin());
-
-  check_pre_trans(g.output->from());
 }
 
 TEST(ConvertNCHWToNHWC, Mean)
@@ -1553,6 +1593,17 @@ TEST(ConvertNCHWToNHWC, Minimum)
   check_pre_trans(g.output->from());
 }
 
+TEST(ConvertNCHWToNHWC, Minimum_non_scalar_NEG)
+{
+  MinimumGraph g;
+  g.init();
+
+  g.limit->shape({3});
+
+  luci::ConvertNCHWToNHWCPass pass(true, true);
+  EXPECT_FALSE(pass.run(&g.g));
+}
+
 TEST(ConvertNCHWToNHWC, Mul)
 {
   MulGraph g;
@@ -1893,6 +1944,85 @@ TEST(ConvertNCHWToNHWC, ReduceMax_keep_dims_false)
   }
 }
 
+TEST(ConvertNCHWToNHWC, ReduceMin)
+{
+  ReduceMinGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.rm->input());
+
+  auto rm_succs = loco::succs(g.rm);
+  EXPECT_EQ(1, rm_succs.size());
+  check_post_trans(*rm_succs.begin());
+
+  auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+  EXPECT_NE(nullptr, new_rindices);
+  EXPECT_EQ(1, new_rindices->rank());
+  EXPECT_EQ(2, new_rindices->dim(0).value());
+  EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>());
+  EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1));
+}
+
+TEST(ConvertNCHWToNHWC, ReduceMin_keep_dims_false)
+{
+  struct TC
+  {
+    std::vector<int32_t> nchw_ind;
+    std::vector<int32_t> nhwc_ind;
+    std::initializer_list<uint32_t> shape;
+    bool needs_transpose = false;
+  };
+
+  uint32_t n = 1;
+  uint32_t c = 16;
+  uint32_t h = 4;
+  uint32_t w = 4;
+
+  std::vector<TC> test_cases{{{0}, {0}, {c, h, w}, true},       {{1}, {3}, {n, h, w}, false},
+                             {{2}, {1}, {n, c, w}, true},       {{3}, {2}, {n, c, h}, true},
+                             {{0, 1}, {0, 3}, {h, w}, false},   {{0, 2}, {0, 1}, {c, w}, true},
+                             {{0, 3}, {0, 2}, {c, h}, true},    {{1, 2}, {3, 1}, {n, w}, false},
+                             {{1, 3}, {3, 2}, {n, h}, false},   {{2, 3}, {1, 2}, {n, c}, false},
+                             {{0, 1, 2}, {0, 3, 1}, {w}, false}};
+
+  for (auto &tc : test_cases)
+  {
+    ReduceMinGraph g;
+    g.keep_dims(false);
+    g.axes(tc.nchw_ind);
+    g.shape(tc.shape);
+    g.init();
+
+    run_phase(&g.g, true, true);
+
+    check_pre_trans(g.rm->input());
+
+    auto rm_succs = loco::succs(g.rm);
+    EXPECT_EQ(1, rm_succs.size());
+    if (tc.needs_transpose)
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleTranspose *>(*rm_succs.begin()));
+    }
+    else
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleOutput *>(*rm_succs.begin()));
+    }
+
+    auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+    EXPECT_NE(nullptr, new_rindices);
+    EXPECT_EQ(1, new_rindices->rank());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size<loco::DataType::S32>());
+    for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i)
+    {
+      EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at<loco::DataType::S32>(i));
+    }
+  }
+}
+
 TEST(ConvertNCHWToNHWC, Relu)
 {
   ReluGraph g;
@@ -1953,26 +2083,6 @@ TEST(ConvertNCHWToNHWC, Rsqrt)
   EXPECT_EQ(16, g.rsqrt->dim(3).value());
 }
 
-TEST(ConvertNCHWToNHWC, Softmax)
-{
-  SoftmaxGraph g;
-  g.init();
-
-  run_phase(&g.g, true, true);
-
-  check_pre_trans(g.softmax->logits());
-
-  auto softmax_succs = loco::succs(g.softmax);
-  EXPECT_EQ(1, softmax_succs.size());
-  check_post_trans(*softmax_succs.begin());
-
-  // Check softmax shape
-  EXPECT_EQ(1, g.softmax->dim(0).value());
-  EXPECT_EQ(4, g.softmax->dim(1).value());
-  EXPECT_EQ(4, g.softmax->dim(2).value());
-  EXPECT_EQ(16, g.softmax->dim(3).value());
-}
-
 TEST(ConvertNCHWToNHWC, SplitV)
 {
   SplitVGraph g;
index 72f5901..aacfce3 100644 (file)
@@ -31,7 +31,10 @@ namespace
 luci::CircleQuantize *create_quantize(luci::CircleNode *node)
 {
   auto quantize = node->graph()->nodes()->create<luci::CircleQuantize>();
-  quantize->name(node->name() + "_Quantize");
+  // DESIGN NOTE: Why use '_FQ_Quantize' instead of '_Quantize'?
+  // '_Quantize' is used in mixed-precision quantization
+  // We add '_FQ' to distinguish Op from mixed-precision quantization
+  quantize->name(node->name() + "_FQ_Quantize");
   quantize->dtype(node->dtype());
   quantize->rank(node->rank());
   for (uint32_t i = 0; i < node->rank(); i++)
@@ -50,7 +53,10 @@ luci::CircleQuantize *create_quantize(luci::CircleNode *node)
 luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
 {
   auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
-  dequantize->name(node->name() + "_Dequantize");
+  // DESIGN NOTE: Why use '_FQ_Dequantize' instead of '_Dequantize'?
+  // '_Dequantize' is used in mixed-precision quantization
+  // We add '_FQ' to distinguish Op from mixed-precision quantization
+  dequantize->name(node->name() + "_FQ_Dequantize");
   dequantize->dtype(loco::DataType::FLOAT32);
   dequantize->rank(node->rank());
   for (uint32_t i = 0; i < node->rank(); i++)
@@ -184,6 +190,7 @@ struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void>
 
   // For non-const activation, insert Quantize-Dequantize Ops
   // and dequantize the node
+  void visit(luci::CircleAbs *node) { fq_activation(node); }
   void visit(luci::CircleAdd *node) { fq_activation(node); }
   void visit(luci::CircleAveragePool2D *node) { fq_activation(node); }
   void visit(luci::CircleBatchMatMul *node) { fq_activation(node); }
@@ -201,6 +208,7 @@ struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void>
   void visit(luci::CirclePad *node) { fq_activation(node); }
   void visit(luci::CirclePRelu *node) { fq_activation(node); }
   void visit(luci::CircleMean *node) { fq_activation(node); }
+  void visit(luci::CircleReduceProd *node) { fq_activation(node); }
   void visit(luci::CircleReduceMax *node) { fq_activation(node); }
   void visit(luci::CircleRelu *node) { fq_activation(node); }
   void visit(luci::CircleRelu6 *node) { fq_activation(node); }
@@ -216,15 +224,20 @@ struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void>
   // (dtype will be automatically updated by type inference)
   void visit(luci::CircleCast *) {}
   void visit(luci::CircleConcatenation *) {}
+  void visit(luci::CircleDepthToSpace *) {}
   void visit(luci::CircleGather *) {}
   void visit(luci::CircleSlice *) {}
   void visit(luci::CircleStridedSlice *) {}
   void visit(luci::CircleReshape *) {}
+  void visit(luci::CircleSpaceToDepth *) {}
   void visit(luci::CircleSplit *) {}
   void visit(luci::CircleSplitOut *) {}
   void visit(luci::CircleSplitV *) {}
   void visit(luci::CircleSplitVOut *) {}
   void visit(luci::CircleTranspose *) {}
+  void visit(luci::CirclePack *) {}
+  void visit(luci::CircleUnpack *) {}
+  void visit(luci::CircleUnpackOut *) {}
 
   // For Ops that return index, fake quantization is unnecessary
   void visit(luci::CircleArgMax *) {}
index 0734e07..5df1b72 100644 (file)
@@ -19,6 +19,8 @@
 
 #include <luci/IR/CircleNodes.h>
 
+#include <limits> // std::numeric_limits
+
 #include <gtest/gtest.h>
 
 namespace
index 6e423e3..33f9f1d 100644 (file)
@@ -23,6 +23,8 @@
 
 #include <luci/Log.h>
 
+#include <limits> // std::numeric_limits
+
 namespace
 {
 
index b1ef568..36cae04 100644 (file)
@@ -19,6 +19,8 @@
 
 #include <luci/IR/CircleNodes.h>
 
+#include <limits> // std::numeric_limits
+
 #include <gtest/gtest.h>
 
 namespace
diff --git a/compiler/luci/pass/src/FoldFullyConnectedPass.cpp b/compiler/luci/pass/src/FoldFullyConnectedPass.cpp
new file mode 100644 (file)
index 0000000..a3bca7e
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldFullyConnectedPass.h"
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <luci/Log.h>
+
+#include <limits> // std::numeric_limits
+
+namespace
+{
+
+bool set_kernel_parameters(tflite::FullyConnectedParams *params, luci::CircleFullyConnected *node)
+{
+  switch (node->fusedActivationFunction())
+  {
+    case luci::FusedActFunc::NONE:
+    case luci::FusedActFunc::TANH:
+      params->float_activation_min = std::numeric_limits<float>::lowest();
+      params->float_activation_max = std::numeric_limits<float>::max();
+      break;
+    case luci::FusedActFunc::RELU:
+      params->float_activation_min = 0;
+      params->float_activation_max = std::numeric_limits<float>::max();
+      break;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      params->float_activation_min = -1;
+      params->float_activation_max = 1;
+      break;
+    case luci::FusedActFunc::RELU6:
+      params->float_activation_min = 0;
+      params->float_activation_max = 6;
+      break;
+    default:
+    {
+      LOGGER(l);
+      WARN(l) << "Unsupported activation: " << uint32_t(node->fusedActivationFunction());
+      return false;
+    }
+  }
+
+  assert(node->weights_format() ==
+         luci::CircleFullyConnected::WeightsFormat::DEFAULT); // FIX_CALLER_UNLESS
+  params->weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+  return true;
+}
+
+#define RETURN_FALSE_UNLESS(cond) \
+  if (not(cond))                  \
+    return false;
+
+/**
+ * Fold FullyConnected with constant input and filter into a constant tensor
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleFullyConnected]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+bool fold_fully_connected(luci::CircleFullyConnected *node)
+{
+  RETURN_FALSE_UNLESS(node != nullptr);
+
+  LOGGER(l);
+
+  auto const input = dynamic_cast<luci::CircleConst *>(node->input());
+  auto const weights = dynamic_cast<luci::CircleConst *>(node->weights());
+  auto const bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  auto const no_bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias());
+
+  RETURN_FALSE_UNLESS(input != nullptr);
+  RETURN_FALSE_UNLESS(weights != nullptr);
+  RETURN_FALSE_UNLESS(node->weights_format() == luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+  RETURN_FALSE_UNLESS(bias != nullptr or no_bias != nullptr);
+
+  RETURN_FALSE_UNLESS(input->dtype() == loco::DataType::FLOAT32);
+  RETURN_FALSE_UNLESS(weights->dtype() == loco::DataType::FLOAT32);
+  if (bias)
+    RETURN_FALSE_UNLESS(bias->dtype() == loco::DataType::FLOAT32);
+
+  auto const input_elems = input->size<loco::DataType::FLOAT32>();
+
+  RETURN_FALSE_UNLESS(weights->rank() == 2);
+  RETURN_FALSE_UNLESS(input_elems % weights->dim(1).value() == 0);
+  auto const batch_size = input_elems / weights->dim(1).value();
+  auto const num_units = weights->dim(0).value();
+
+  if (bias)
+    RETURN_FALSE_UNLESS(bias->size<loco::DataType::FLOAT32>() == num_units);
+
+  tflite::FullyConnectedParams params{};
+  if (!set_kernel_parameters(&params, node))
+    return false; // Unsupported kernel parameter values
+
+  std::vector<uint32_t> output_shape;
+  if (node->keep_num_dims() == false)
+  {
+    output_shape.push_back(batch_size);
+    output_shape.push_back(num_units);
+  }
+  else
+  {
+    output_shape.resize(input->rank());
+    for (uint32_t i = 0; i < input->rank(); i++)
+      output_shape[i] = input->dim(i).value();
+    output_shape[input->rank() - 1] = num_units;
+  }
+
+  auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+  {
+    constant->name(node->name());
+    constant->dtype(node->dtype());
+    constant->rank(node->rank());
+    constant->shape_status(luci::ShapeStatus::VALID);
+    uint32_t num_elem = 1;
+    for (uint32_t i = 0; i < node->rank(); ++i)
+    {
+      constant->dim(i).set(node->dim(i).value());
+      num_elem *= node->dim(i).value();
+    }
+    constant->size<loco::DataType::FLOAT32>(num_elem);
+  }
+
+  auto tensor_shape = [](luci::CircleNode *node) {
+    if (node == nullptr)
+      return tflite::RuntimeShape();
+
+    tflite::RuntimeShape runtime_shape(node->rank());
+    for (uint32_t i = 0; i < node->rank(); ++i)
+      runtime_shape.SetDim(i, node->dim(i).value());
+    return runtime_shape;
+  };
+
+  auto tensor_data = [](luci::CircleConst *node) -> float * {
+    if (node == nullptr)
+      return nullptr;
+
+    return &node->at<loco::DataType::FLOAT32>(0);
+  };
+
+  tflite::reference_ops::FullyConnected(
+    params, tensor_shape(input), tensor_data(input), tensor_shape(weights), tensor_data(weights),
+    tensor_shape(bias), tensor_data(bias), tensor_shape(constant), tensor_data(constant));
+
+  loco::replace(node).with(constant);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for FullyConnected Op
+ **/
+bool FoldFullyConnectedPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto fc = dynamic_cast<CircleFullyConnected *>(node);
+
+    if (fold_fully_connected(fc))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
diff --git a/compiler/luci/pass/src/FoldFullyConnectedPass.test.cpp b/compiler/luci/pass/src/FoldFullyConnectedPass.test.cpp
new file mode 100644 (file)
index 0000000..a8e64a2
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldFullyConnectedPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <limits> // std::numeric_limits
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph has an FullyConnected Op with constant inputs
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleFullyConnected]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+class FoldFullyConnectedTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+#define INPUT_DIM 80
+#define NUM_UNITS 32
+
+public:
+  FoldFullyConnectedTest() : luci::ConstantFoldingTestGraph({INPUT_DIM}, loco::DataType::FLOAT32)
+  {
+    _fc = _g.nodes()->create<luci::CircleFullyConnected>();
+    _fc_input = _g.nodes()->create<luci::CircleConst>();
+    _fc_weights = _g.nodes()->create<luci::CircleConst>();
+    _fc_bias = _g.nodes()->create<luci::CircleConst>();
+
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->input(_fc_input);
+    _fc->weights(_fc_weights);
+    _fc->bias(_fc_bias);
+    _fc->shape({NUM_UNITS});
+    _fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+    _fc->keep_num_dims(true);
+
+    _fc_input->dtype(loco::DataType::FLOAT32);
+    _fc_input->shape({INPUT_DIM});
+    _fc_input->size<loco::DataType::FLOAT32>(INPUT_DIM);
+
+    _fc_weights->dtype(loco::DataType::FLOAT32);
+    _fc_weights->shape({NUM_UNITS, INPUT_DIM});
+    _fc_weights->size<loco::DataType::FLOAT32>(NUM_UNITS * INPUT_DIM);
+
+    _fc_bias->dtype(loco::DataType::FLOAT32);
+    _fc_bias->shape({1, NUM_UNITS});
+    _fc_bias->size<loco::DataType::FLOAT32>(NUM_UNITS);
+
+    for (uint32_t i = 0; i < INPUT_DIM; ++i)
+      _fc_input->at<loco::DataType::FLOAT32>(i) = 1.0;
+
+    for (uint32_t i = 0; i < INPUT_DIM * NUM_UNITS; ++i)
+      _fc_weights->at<loco::DataType::FLOAT32>(i) = 1.0;
+
+    for (uint32_t i = 0; i < NUM_UNITS; ++i)
+      _fc_bias->at<loco::DataType::FLOAT32>(i) = 0.0;
+
+    _output->from(_fc);
+  }
+
+protected:
+  void init() final {}
+
+protected:
+  loco::Node *createFoldedPattern() final { return nullptr; }
+
+protected:
+  luci::CircleConst *getFoldedPattern() final
+  {
+    return loco::must_cast<luci::CircleConst *>(_output->from());
+  }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleConst *_fc_input = nullptr;
+  luci::CircleConst *_fc_weights = nullptr;
+  luci::CircleConst *_fc_bias = nullptr;
+#undef INPUT_DIM
+#undef NUM_UNITS
+};
+
+} // namespace
+
+TEST_F(FoldFullyConnectedTest, fold_fc)
+{
+  luci::FoldFullyConnectedPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(folded_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(32, folded_const->dim(0));
+  EXPECT_EQ(32, folded_const->size<loco::DataType::FLOAT32>());
+  for (uint32_t i = 0; i < 32; ++i)
+    EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(i), 80,
+                std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_no_bias)
+{
+  auto no_bias = _g.nodes()->create<luci::CircleOutputExclude>();
+  _fc->bias(no_bias);
+
+  luci::FoldFullyConnectedPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(32, folded_const->dim(0));
+  EXPECT_EQ(32, folded_const->size<loco::DataType::FLOAT32>());
+  for (uint32_t i = 0; i < 32; ++i)
+    EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(i), 80,
+                std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_NEG)
+{
+  auto new_fc = _g.nodes()->create<luci::CircleFullyConnected>();
+  _fc->input(new_fc);
+
+  luci::FoldFullyConnectedPass pass;
+  ASSERT_FALSE(pass.run(&_g));
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_weight_format_NEG)
+{
+  auto new_fc = _g.nodes()->create<luci::CircleFullyConnected>();
+  _fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8);
+
+  luci::FoldFullyConnectedPass pass;
+  ASSERT_FALSE(pass.run(&_g));
+}
index bc09abe..3494a6e 100644 (file)
@@ -76,6 +76,26 @@ luci::CircleReshape *create_cloned_reshape(luci::CircleReshape *reshape)
   return new_reshape;
 }
 
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleAbs *abs)
+{
+  assert(reshape != nullptr); // FIX_CALLER_UNLESS
+  assert(abs != nullptr);     // FIX_CALLER_UNLESS
+
+  auto new_reshape = create_cloned_reshape(reshape);
+  if (not new_reshape)
+    return false;
+
+  // reconnect network
+  loco::replace(abs).with(new_reshape);
+  abs->x(reshape->tensor());
+  new_reshape->tensor(abs);
+
+  // Do shape inference for this node again.
+  abs->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  return true;
+}
+
 bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
 {
   assert(reshape != nullptr);
@@ -136,6 +156,14 @@ protected:
     return false;
   }
 
+  bool visit(luci::CircleAbs *node)
+  {
+    auto reshape = as_reshape(node->x());
+    if (reshape == nullptr)
+      return false;
+    return forward_reshape(reshape, node);
+  }
+
   bool visit(luci::CircleNeg *node)
   {
     auto reshape = as_reshape(node->x());
diff --git a/compiler/luci/pass/src/ForwardTransposeOpPass.cpp b/compiler/luci/pass/src/ForwardTransposeOpPass.cpp
new file mode 100644 (file)
index 0000000..c76d733
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardTransposeOpPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Service/CircleNodeClone.h>
+
+using namespace luci;
+
+namespace
+{
+
+// Create new Transpose Op including perm
+// Return nullptr if failed
+CircleTranspose *create_cloned_transpose(CircleTranspose *transpose)
+{
+  assert(transpose != nullptr); // FIX_CALLER_UNLESS
+
+  auto perm = dynamic_cast<CircleConst *>(transpose->perm());
+  if (not perm)
+    return nullptr;
+
+  CircleConst *cloned_perm = clone(perm);
+  if (cloned_perm == nullptr)
+    return nullptr;
+
+  cloned_perm->name(perm->name() + "_C");
+  luci::add_origin(cloned_perm, luci::get_origin(perm));
+
+  auto cloned_node = clone_node(transpose, transpose->graph());
+  if (cloned_node == nullptr)
+    return nullptr;
+
+  auto new_transpose = loco::must_cast<luci::CircleTranspose *>(cloned_node);
+  new_transpose->perm(cloned_perm);
+  new_transpose->name(transpose->name() + "_C");
+  luci::add_origin(new_transpose, luci::get_origin(transpose));
+
+  return new_transpose;
+}
+
+uint32_t cal_offset(const std::vector<uint32_t> &shape, const std::vector<uint32_t> &indices)
+{
+  assert(shape.size() == indices.size()); // FIX_CALLER_UNLESS
+
+  uint32_t offset = 0;
+  for (uint32_t i = 0; i < indices.size(); i++)
+  {
+    uint32_t index = indices[i];
+    for (uint32_t j = shape.size() - 1; j > i; j--)
+    {
+      index *= shape[j];
+    }
+    offset += index;
+  }
+  return offset;
+}
+
+// Return reverse-transpose of 'node'
+// i.e., Transpose(return value) = node
+CircleConst *reverse_transposed(CircleConst *node, std::vector<uint32_t> &t)
+{
+  assert(node->rank() == t.size()); // FIX_CALLER_UNLESS
+  assert(node->rank() == 4);        // FIX_CALLER_UNLESS
+
+  std::vector<uint32_t> orig_shape(node->rank());
+  std::vector<uint32_t> new_shape(node->rank());
+
+  for (uint32_t i = 0; i < node->rank(); i++)
+  {
+    assert(t[i] < node->rank()); // FIX_CALLER_UNLESS
+
+    orig_shape[i] = node->dim(i).value();
+    new_shape[t[i]] = node->dim(i).value();
+  }
+
+  auto clone_const = clone(node);
+  for (uint32_t i = 0; i < node->rank(); i++)
+    clone_const->dim(i).set(new_shape[i]);
+
+  clone_const->name(clone_const->name() + "_r_transposed");
+  add_origin(clone_const, luci::get_origin(node));
+
+  for (uint32_t n = 0; n < clone_const->dim(0).value(); n++)
+  {
+    for (uint32_t h = 0; h < clone_const->dim(1).value(); h++)
+    {
+      for (uint32_t w = 0; w < clone_const->dim(2).value(); w++)
+      {
+        for (uint32_t c = 0; c < clone_const->dim(3).value(); c++)
+        {
+          std::vector<uint32_t> new_indices{n, h, w, c};
+          std::vector<uint32_t> orig_indices{new_indices[t[0]], new_indices[t[1]],
+                                             new_indices[t[2]], new_indices[t[3]]};
+
+          const auto data = node->at<loco::DataType::FLOAT32>(cal_offset(orig_shape, orig_indices));
+          clone_const->at<loco::DataType::FLOAT32>(cal_offset(new_shape, new_indices)) = data;
+        }
+      }
+    }
+  }
+
+  return clone_const;
+}
+
+bool check_rank_four(const CircleConst *c) { return c->rank() == 4; }
+
+// Return true if below conditions are met
+// 1. t->perm() is CircleConst
+// 2. t->perm() is S32
+bool check_perm(const CircleTranspose *t)
+{
+  auto perm = dynamic_cast<CircleConst *>(t->perm());
+  if (not perm)
+    return false;
+
+  switch (perm->dtype())
+  {
+    case loco::DataType::S32:
+      for (uint32_t i = 0; i < perm->size<loco::DataType::S32>(); i++)
+      {
+        auto data = perm->at<loco::DataType::S32>(i);
+        // TODO Support not normalized index
+        if (data < 0 or data >= static_cast<int32_t>(t->rank()))
+          return false;
+      }
+      break;
+    // TODO Support S64 data type
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+#define RETURN_FALSE_UNLESS(COND) \
+  if (not(COND))                  \
+    return false;
+
+// Elementwise Binary Operator with const
+class EBOWithConstPattern final : public CircleNodeMutableVisitor<bool>
+{
+private:
+  template <typename CIRCLE_OP_PTR> bool has_pattern(CIRCLE_OP_PTR node)
+  {
+    if (auto x = dynamic_cast<luci::CircleConst *>(node->x()))
+    {
+      if (auto y = dynamic_cast<luci::CircleTranspose *>(node->y()))
+      {
+        RETURN_FALSE_UNLESS(check_rank_four(x));
+        RETURN_FALSE_UNLESS(check_perm(y));
+
+        auto new_const = gen_new_const(y, x);
+        assert(new_const); // FIX_ME_UNLESS
+
+        auto new_transpose = create_cloned_transpose(y);
+        assert(new_transpose); // FIX_ME_UNLESS
+
+        // Reconnect network
+        node->x(new_const);
+        node->y(y->a());
+        loco::replace(node).with(new_transpose);
+        new_transpose->a(node);
+
+        // Do shape inference for this node again.
+        node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+        return true;
+      }
+    }
+
+    if (auto y = dynamic_cast<luci::CircleConst *>(node->y()))
+    {
+      if (auto x = dynamic_cast<luci::CircleTranspose *>(node->x()))
+      {
+        RETURN_FALSE_UNLESS(check_rank_four(y));
+        RETURN_FALSE_UNLESS(check_perm(x));
+
+        auto new_const = gen_new_const(x, y);
+        assert(new_const); // FIX_ME_UNLESS
+
+        auto new_transpose = create_cloned_transpose(x);
+        assert(new_transpose); // FIX_ME_UNLESS
+
+        // Reconnect network
+        node->y(new_const);
+        node->x(x->a());
+        loco::replace(node).with(new_transpose);
+        new_transpose->a(node);
+
+        // Do shape inference for this node again.
+        node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+public:
+  // Default
+  bool visit(luci::CircleNode *) { return false; }
+
+  bool visit(luci::CircleAdd *node) { return has_pattern(node); }
+
+  bool visit(luci::CircleMul *node) { return has_pattern(node); }
+
+private:
+  // Return a new const node after Tranpose Op is forwarded
+  // Return nullptr if unsupported cases
+  CircleConst *gen_new_const(CircleTranspose *t, CircleConst *c)
+  {
+    const auto perm = dynamic_cast<CircleConst *>(t->perm());
+
+    // Only support constant perm
+    if (not perm)
+      return nullptr;
+
+    std::vector<uint32_t> perm_data;
+    switch (perm->dtype())
+    {
+      case loco::DataType::S32:
+        for (uint32_t i = 0; i < perm->size<loco::DataType::S32>(); i++)
+        {
+          auto data = perm->at<loco::DataType::S32>(i);
+          assert(data >= 0 and data < static_cast<int32_t>(t->rank()));
+          perm_data.emplace_back(static_cast<uint32_t>(data));
+        }
+        break;
+      // TODO Support S64 data type
+      default:
+        return nullptr;
+    }
+
+    assert(perm_data.size() == t->rank()); // FIX_CALLER_UNLESS
+
+    return reverse_transposed(c, perm_data);
+  }
+};
+
+// Elementwise Unary Operator
+class EwUnaryPattern final : public CircleNodeMutableVisitor<bool>
+{
+private:
+  // input is 'x'
+  template <typename CIRCLE_OP_PTR> bool has_pattern_x(CIRCLE_OP_PTR node)
+  {
+    if (auto x = dynamic_cast<luci::CircleTranspose *>(node->x()))
+    {
+      RETURN_FALSE_UNLESS(check_perm(x));
+
+      auto new_transpose = create_cloned_transpose(x);
+      assert(new_transpose); // FIX_ME_UNLESS
+
+      // Reconnect network
+      node->x(x->a());
+      loco::replace(node).with(new_transpose);
+      new_transpose->a(node);
+
+      // Do shape inference for this node again.
+      node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+      return true;
+    }
+
+    return false;
+  }
+
+public:
+  // Default
+  bool visit(luci::CircleNode *) { return false; }
+
+  bool visit(luci::CircleAbs *node) { return has_pattern_x(node); }
+};
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *                       |
+ *                  [CircleNode]  [CircleConst]
+ *                       |       /
+ *              [CircleTranspose] [CircleConst]
+ *                /      |       /
+ *     [CircleNode]  [(BinaryOp)]
+ *          |            |     \
+ *          |            |      [CircleNode]
+ *          |            |           |
+ *
+ *   BinaryOp: CircleAdd, CircleMul, ...
+ *
+ *                       |
+ *                  [CircleNode]  [CircleConst]
+ *                       |       /
+ *              [CircleTranspose]
+ *                /      |
+ *     [CircleNode]  [(UnaryOp)]
+ *          |            |     \
+ *          |            |      [CircleNode]
+ *          |            |           |
+ *
+ *   UnaryOp: CircleAbs, ...
+ *
+ * AFTER
+ *                       |
+ *   [CircleConst]  [CircleNode]  [CircleConst(updated)]
+ *         |       /     |       /
+ *  [CircleTranspose] [(BinaryOp)] [CircleConst]
+ *         |             |        /
+ *   [CircleNode] [CircleTranspose]
+ *         |             |      \
+ *         |             |       [CircleNode]
+ *         |             |            |
+ *
+ *                       |
+ *   [CircleConst]  [CircleNode]
+ *         |       /     |
+ *  [CircleTranspose] [(UnaryOp)] [CircleConst]
+ *         |             |        /
+ *   [CircleNode] [CircleTranspose]
+ *         |             |      \
+ *         |             |       [CircleNode]
+ *         |             |            |
+ *
+ *   Note: new [CircleTranspose] is added after [(BinaryOp)]
+ */
+bool ForwardTransposeOpPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  EBOWithConstPattern eboc;
+  EwUnaryPattern ewu;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    if (circle_node->accept(&eboc))
+      changed = true;
+    else if (circle_node->accept(&ewu))
+      changed = true;
+  }
+  return changed;
+}
+
+#undef RETURN_FALSE_UNLESS
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ForwardTransposeOpPass.test.cpp b/compiler/luci/pass/src/ForwardTransposeOpPass.test.cpp
new file mode 100644 (file)
index 0000000..2d061c2
--- /dev/null
@@ -0,0 +1,524 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardTransposeOpPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <logo/Phase.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace
+{
+
+using namespace luci::test;
+
+template <typename T> class TransposeBinaryOpGraphlet
+{
+public:
+  TransposeBinaryOpGraphlet() = default;
+
+public:
+  virtual ~TransposeBinaryOpGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 perm)
+  {
+    std::vector<uint32_t> shape_in_v = shape_in;
+    std::vector<uint32_t> perm_v = perm;
+
+    assert(shape_in_v.size() == perm_v.size()); // FIX_CALLER_UNLESS
+
+    _perm = g->nodes()->create<luci::CircleConst>();
+    _const = g->nodes()->create<luci::CircleConst>();
+    _transpose = g->nodes()->create<luci::CircleTranspose>();
+    _binary = g->nodes()->create<T>();
+
+    _perm->dtype(loco::DataType::S32);
+    _perm->rank(1);
+    _perm->dim(0).set(perm_v.size());
+    _perm->shape_status(luci::ShapeStatus::VALID);
+
+    _const->dtype(loco::DataType::FLOAT32);
+    _const->rank(shape_in_v.size());
+    for (uint32_t i = 0; i < shape_in_v.size(); i++)
+      _const->dim(i).set(shape_in_v[perm_v[i]]);
+    _const->shape_status(luci::ShapeStatus::VALID);
+
+    // values
+    const auto size = perm_v.size();
+    _perm->size<loco::DataType::S32>(size);
+    for (uint32_t i = 0; i < size; i++)
+      _perm->at<loco::DataType::S32>(i) = perm_v[i];
+
+    uint32_t elems = 1;
+    for (uint32_t i = 0; i < size; i++)
+      elems *= shape_in_v[i];
+
+    _const->size<loco::DataType::FLOAT32>(elems);
+    for (uint32_t i = 0; i < elems; i++)
+      _const->at<loco::DataType::FLOAT32>(i) = i;
+
+    _perm->name("transpose_perm");
+    _transpose->name("transpose");
+    _binary->name("binary");
+  }
+
+  luci::CircleTranspose *transpose(void) { return _transpose; }
+
+  void switch_xy(void)
+  {
+    assert(_binary); // FIX_CALLER_UNLESS
+    auto temp = _binary->x();
+    _binary->x(_binary->y());
+    _binary->y(temp);
+  }
+
+protected:
+  luci::CircleTranspose *_transpose = nullptr;
+  T *_binary = nullptr;
+  luci::CircleConst *_perm = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+using TransposeAddGraphlet = TransposeBinaryOpGraphlet<luci::CircleAdd>;
+using TransposeMulGraphlet = TransposeBinaryOpGraphlet<luci::CircleMul>;
+
+class ForwardTransposeToAddGraph : public TestIOGraph, public TransposeAddGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeAddGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _transpose->a(input());
+    _transpose->perm(_perm);
+    _binary->x(_transpose);
+    _binary->y(_const);
+
+    output()->from(_binary);
+  }
+};
+
+class ForwardTransposeToAddInvalidGraph : public TestIOGraph, public TransposeAddGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeAddGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _transpose->a(input());
+    _transpose->perm(_perm);
+    _binary->x(_transpose);
+    _binary->y(_transpose);
+
+    output()->from(_binary);
+  }
+};
+
+class ForwardTransposeToMulGraph : public TestIOGraph, public TransposeMulGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeMulGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _transpose->a(input());
+    _transpose->perm(_perm);
+    _binary->x(_transpose);
+    _binary->y(_const);
+
+    output()->from(_binary);
+  }
+};
+
+void run_phase(loco::Graph *g)
+{
+  logo::Phase phase;
+
+  // Default passes.
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+  // Pass to test
+  phase.emplace_back(std::make_unique<luci::ForwardTransposeOpPass>());
+
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.run(phase);
+}
+
+class ForwardTransposeToAddGraphTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToAddGraph _graph;
+};
+
+class ForwardTransposeToAddGraphNegTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToAddInvalidGraph _graph;
+};
+
+class ForwardTransposeToMulGraphTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToMulGraph _graph;
+};
+
+} // namespace
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_add_xy)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto add = dynamic_cast<luci::CircleAdd *>(transpose->a());
+  EXPECT_NE(nullptr, add);
+  EXPECT_EQ(4, add->rank());
+  EXPECT_EQ(1, add->dim(0).value());
+  EXPECT_EQ(64, add->dim(1).value());
+  EXPECT_EQ(51, add->dim(2).value());
+  EXPECT_EQ(1, add->dim(3).value());
+
+  auto add_const = dynamic_cast<luci::CircleConst *>(add->y());
+  EXPECT_NE(nullptr, add_const);
+  EXPECT_EQ(4, add_const->rank());
+  EXPECT_EQ(1, add_const->dim(0).value());
+  EXPECT_EQ(64, add_const->dim(1).value());
+  EXPECT_EQ(51, add_const->dim(2).value());
+  EXPECT_EQ(1, add_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_add_yx)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+  _graph.switch_xy();
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto mul = dynamic_cast<luci::CircleAdd *>(transpose->a());
+  EXPECT_NE(nullptr, mul);
+  EXPECT_EQ(4, mul->rank());
+  EXPECT_EQ(1, mul->dim(0).value());
+  EXPECT_EQ(64, mul->dim(1).value());
+  EXPECT_EQ(51, mul->dim(2).value());
+  EXPECT_EQ(1, mul->dim(3).value());
+
+  auto mul_const = dynamic_cast<luci::CircleConst *>(mul->x());
+  EXPECT_NE(nullptr, mul_const);
+  EXPECT_EQ(4, mul_const->rank());
+  EXPECT_EQ(1, mul_const->dim(0).value());
+  EXPECT_EQ(64, mul_const->dim(1).value());
+  EXPECT_EQ(51, mul_const->dim(2).value());
+  EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_mul_xy)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto mul = dynamic_cast<luci::CircleMul *>(transpose->a());
+  EXPECT_NE(nullptr, mul);
+  EXPECT_EQ(4, mul->rank());
+  EXPECT_EQ(1, mul->dim(0).value());
+  EXPECT_EQ(64, mul->dim(1).value());
+  EXPECT_EQ(51, mul->dim(2).value());
+  EXPECT_EQ(1, mul->dim(3).value());
+
+  auto mul_const = dynamic_cast<luci::CircleConst *>(mul->y());
+  EXPECT_NE(nullptr, mul_const);
+  EXPECT_EQ(4, mul_const->rank());
+  EXPECT_EQ(1, mul_const->dim(0).value());
+  EXPECT_EQ(64, mul_const->dim(1).value());
+  EXPECT_EQ(51, mul_const->dim(2).value());
+  EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_mul_yx)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+  _graph.switch_xy();
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto mul = dynamic_cast<luci::CircleMul *>(transpose->a());
+  EXPECT_NE(nullptr, mul);
+  EXPECT_EQ(4, mul->rank());
+  EXPECT_EQ(1, mul->dim(0).value());
+  EXPECT_EQ(64, mul->dim(1).value());
+  EXPECT_EQ(51, mul->dim(2).value());
+  EXPECT_EQ(1, mul->dim(3).value());
+
+  auto mul_const = dynamic_cast<luci::CircleConst *>(mul->x());
+  EXPECT_NE(nullptr, mul_const);
+  EXPECT_EQ(4, mul_const->rank());
+  EXPECT_EQ(1, mul_const->dim(0).value());
+  EXPECT_EQ(64, mul_const->dim(1).value());
+  EXPECT_EQ(51, mul_const->dim(2).value());
+  EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_transpose_add_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  // Remove add
+  _graph.output()->from(_graph.transpose());
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToAddGraphNegTest, forward_transpose_add_non_const_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_transpose_mul_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  // Remove mul
+  _graph.output()->from(_graph.transpose());
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+// Unary
+
+namespace
+{
+
+template <typename T> class TransposeUnaryOpGraphlet
+{
+public:
+  TransposeUnaryOpGraphlet() = default;
+
+public:
+  virtual ~TransposeUnaryOpGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 perm)
+  {
+    std::vector<uint32_t> shape_in_v = shape_in;
+    std::vector<uint32_t> perm_v = perm;
+
+    assert(shape_in_v.size() == perm_v.size()); // FIX_CALLER_UNLESS
+
+    _perm = g->nodes()->create<luci::CircleConst>();
+    _const = g->nodes()->create<luci::CircleConst>();
+    _transpose = g->nodes()->create<luci::CircleTranspose>();
+    _unary = g->nodes()->create<T>();
+
+    _perm->dtype(loco::DataType::S32);
+    _perm->rank(1);
+    _perm->dim(0).set(perm_v.size());
+    _perm->shape_status(luci::ShapeStatus::VALID);
+
+    _const->dtype(loco::DataType::FLOAT32);
+    _const->rank(shape_in_v.size());
+    for (uint32_t i = 0; i < shape_in_v.size(); i++)
+      _const->dim(i).set(shape_in_v[perm_v[i]]);
+    _const->shape_status(luci::ShapeStatus::VALID);
+
+    // values
+    const auto size = perm_v.size();
+    _perm->size<loco::DataType::S32>(size);
+    for (uint32_t i = 0; i < size; i++)
+      _perm->at<loco::DataType::S32>(i) = perm_v[i];
+
+    uint32_t elems = 1;
+    for (uint32_t i = 0; i < size; i++)
+      elems *= shape_in_v[i];
+
+    _const->size<loco::DataType::FLOAT32>(elems);
+    for (uint32_t i = 0; i < elems; i++)
+      _const->at<loco::DataType::FLOAT32>(i) = i;
+
+    _perm->name("transpose_perm");
+    _transpose->name("transpose");
+    _unary->name("_unary");
+  }
+
+  luci::CircleTranspose *transpose(void) { return _transpose; }
+
+protected:
+  luci::CircleTranspose *_transpose = nullptr;
+  T *_unary = nullptr;
+  luci::CircleConst *_perm = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+using TransposeAbsGraphlet = TransposeUnaryOpGraphlet<luci::CircleAbs>;
+
+class ForwardTransposeToAbsGraph : public TestIOGraph, public TransposeAbsGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeAbsGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _transpose->a(input());
+    _transpose->perm(_perm);
+    _unary->x(_transpose);
+
+    output()->from(_unary);
+  }
+};
+
+class ForwardTransposeToAbsInvalidGraph : public TestIOGraph, public TransposeAbsGraphlet
+{
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    TransposeAbsGraphlet::init(g(), shape_in, shape_out);
+
+    _relu = g()->nodes()->create<luci::CircleRelu>();
+    _relu->dtype(loco::DataType::FLOAT32);
+    _relu->name("relu");
+
+    // connect network
+    _relu->features(input());
+    _unary->x(_relu);
+
+    output()->from(_unary);
+  }
+
+protected:
+  luci::CircleRelu *_relu = nullptr;
+};
+
+class ForwardTransposeToAbsGraphTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToAbsGraph _graph;
+};
+
+class ForwardTransposeToAbsGraphNegTest : public ::testing::Test
+{
+public:
+  void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+  ForwardTransposeToAbsInvalidGraph _graph;
+};
+
+} // namespace
+
+TEST_F(ForwardTransposeToAbsGraphTest, forward_abs_x)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  run_pass();
+
+  auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+  EXPECT_NE(nullptr, transpose);
+  EXPECT_EQ(4, transpose->rank());
+  EXPECT_EQ(1, transpose->dim(0).value());
+  EXPECT_EQ(1, transpose->dim(1).value());
+  EXPECT_EQ(51, transpose->dim(2).value());
+  EXPECT_EQ(64, transpose->dim(3).value());
+
+  auto abs = dynamic_cast<luci::CircleAbs *>(transpose->a());
+  EXPECT_NE(nullptr, abs);
+  EXPECT_EQ(4, abs->rank());
+  EXPECT_EQ(1, abs->dim(0).value());
+  EXPECT_EQ(64, abs->dim(1).value());
+  EXPECT_EQ(51, abs->dim(2).value());
+  EXPECT_EQ(1, abs->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAbsGraphTest, forward_transpose_abs_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  // Remove abs
+  _graph.output()->from(_graph.transpose());
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToAbsGraphNegTest, forward_transpose_abs_non_transpose_NEG)
+{
+  _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+  luci::ForwardTransposeOpPass pass;
+  EXPECT_FALSE(pass.run(_graph.g()));
+}
index 3cf31ed..1d4a2e3 100644 (file)
@@ -86,6 +86,14 @@ bool fuse_add_with_fc(luci::CircleFullyConnected *fc)
   if (not(addition->dim(rank - 1) == weights->dim(0)))
     return false;
 
+  auto bias = loco::must_cast<luci::CircleNode *>(fc->bias());
+
+  // We only support (1) constant bias (2) no bias
+  // If bias is neither (1) nor (2), it would be a feature map
+  if (bias->opcode() != luci::CircleOpcode::CIRCLECONST and
+      bias->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    return false;
+
   auto fused_bias = luci::clone(addition);
 
   // Add existing bias values
index 4cc2eb5..3007965 100644 (file)
@@ -125,6 +125,15 @@ public:
 public:
   luci::CircleFullyConnected *fc() { return _fc; }
 
+public:
+  void to_fm_bias(void)
+  {
+    assert(_fc != nullptr); // FIX_ME_UNLESS
+
+    auto new_fc = _fc->graph()->nodes()->create<luci::CircleFullyConnected>();
+    _fc->bias(new_fc);
+  }
+
 protected:
   luci::CircleFullyConnected *_fc = nullptr;
   luci::CircleAdd *_add = nullptr;
@@ -174,3 +183,14 @@ TEST_F(FuseAddWithFullyConnectedPassTest, simple_test)
     EXPECT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
   }
 }
+
+TEST_F(FuseAddWithFullyConnectedPassTest, fm_bias_NEG)
+{
+  g.init();
+
+  // Bias is a feature map. Add is not fused.
+  g.to_fm_bias();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(false, ret);
+}
index 09180d8..3f8f700 100644 (file)
@@ -679,7 +679,6 @@ bool FuseBCQPass::run(luci::Module *m)
         if (output_node->index() == 0 || (int)output_node->index() > original_output_cnt)
         {
           auto noOp = main_graph->nodes()->create<luci::CircleOutputExclude>();
-          noOp->dtype(loco::DataType::FLOAT32); // TODO Remove this setting
           output_node->from(noOp);
           changed = true;
         }
index e6b54df..265a839 100644 (file)
 
 namespace
 {
+
+template <class CIRCLENODE>
+void replace_with_relu(luci::CircleNode *target, luci::CircleNode *feature,
+                       const std::string &relu_name)
+{
+  assert(target != nullptr);
+  assert(feature != nullptr);
+
+  auto relu = target->graph()->nodes()->create<CIRCLENODE>();
+  relu->features(feature);
+  relu->name(relu_name);
+  luci::add_origin(relu, luci::get_origin(target));
+
+  replace(target).with(relu);
+}
+
+} // namespace
+
+namespace
+{
 /**
  *  Fuse Mul-Add to TransposeConv if possible.
  *
@@ -49,10 +69,10 @@ namespace
  *                     |  / /                                |     /
  *     [CircleTransposeConv]                            [CircleAdd]
  *                     |
- *              ([CircleRelu6])
+ *        ([CircleRelu]/[CircleRelu6])
  *                     |
  *
- * Note: CircleRelu6 is inserted if Add activation is ReLU6
+ * Note: CircleRelu or CircleRelu6 is inserted if Add activation is ReLU/ReLU6
  */
 bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
 {
@@ -80,7 +100,8 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
   if (add->dtype() != loco::DataType::FLOAT32)
     return false;
   if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
-      add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU6 &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU)
     return false;
 
   // tconv bias is optional
@@ -202,19 +223,23 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
     luci::add_origin(fused_tconv, luci::get_origin(bias));
   }
 
-  if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+  switch (add->fusedActivationFunction())
   {
-    // separate relu op from add op
-    auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
-    relu->features(fused_tconv);
-    relu->name(name + "/Relu6");
-    luci::add_origin(relu, luci::get_origin(add));
+    case luci::FusedActFunc::RELU6:
+      replace_with_relu<luci::CircleRelu6>(add, fused_tconv, name + "/Relu6");
+      break;
 
-    replace(add).with(relu);
-  }
-  else
-  {
-    replace(add).with(fused_tconv);
+    case luci::FusedActFunc::RELU:
+      replace_with_relu<luci::CircleRelu>(add, fused_tconv, name + "/Relu");
+      break;
+
+    case luci::FusedActFunc::NONE:
+      replace(add).with(fused_tconv);
+      break;
+
+    default:
+      assert(false);
+      break;
   }
 
   return true;
diff --git a/compiler/luci/pass/src/FusePReluPass.cpp b/compiler/luci/pass/src/FusePReluPass.cpp
new file mode 100644 (file)
index 0000000..a5ce60e
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FusePReluPass.h"
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/CircleNodeClone.h>
+
+#include <cassert>
+
+// Helper to fuse PRelu
+namespace
+{
+
+/**
+ * Below diagram shows PRelu pattern to fuse.
+ * - this pattern will be replaced with one PRelu
+ *
+ *           [In]
+ *            |
+ *            V
+ *     +---- ifm ----+
+ *     |      |      |
+ *     |      |      V
+ *     |      |     abs
+ *     |      V      |
+ *     |     sub <---+
+ *     |      |
+ *     |      V
+ *     |   mul_alpha (alpha of PRelu)
+ *     |      |
+ *     V      V
+ *    relu mul_half (0.5)
+ *     |      |
+ *     |      V
+ *     +---> add
+ *            |
+ *            V
+ *          [Out]
+ *
+ */
+class PReluPattern final
+{
+public:
+  PReluPattern(luci::CircleAdd *candidate)
+  {
+    assert(candidate);
+    _add_ofm = candidate;
+  }
+
+public:
+  bool matched();
+
+public:
+  luci::CircleNode *_ifm = nullptr;
+  luci::CircleRelu *_relu = nullptr;
+  luci::CircleAbs *_abs = nullptr;
+  luci::CircleSub *_sub = nullptr;
+  luci::CircleMul *_mul_alpha = nullptr;
+  luci::CircleMul *_mul_half = nullptr;
+  luci::CircleAdd *_add_ofm = nullptr;
+  luci::CircleConst *_const_alpha = nullptr;
+  luci::CircleConst *_const_half = nullptr;
+};
+
+#define CHECK_OR_FALSE(condition) \
+  if (not(condition))             \
+    return false;
+
+bool PReluPattern::matched()
+{
+  // check pattern
+  CHECK_OR_FALSE(luci::fill(&_relu, &_mul_half).with_commutative_args_of(_add_ofm));
+  CHECK_OR_FALSE(luci::fill(&_mul_alpha, &_const_half).with_commutative_args_of(_mul_half));
+  CHECK_OR_FALSE(luci::fill(&_sub, &_const_alpha).with_commutative_args_of(_mul_alpha));
+
+  CHECK_OR_FALSE(luci::fill(&_ifm, &_abs).with_args_of(_sub));
+
+  CHECK_OR_FALSE(_relu->features() == _ifm);
+  CHECK_OR_FALSE(_abs->x() == _ifm);
+
+  // Check Activation to be NONE
+  CHECK_OR_FALSE(_sub->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_mul_alpha->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_mul_half->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(_add_ofm->fusedActivationFunction() == luci::FusedActFunc::NONE);
+
+  // TODO support other types?
+  // check if _const_half is really FLOAT32 & 0.5
+  CHECK_OR_FALSE(_const_half->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(_const_half->size<loco::DataType::FLOAT32>() == 1);
+  CHECK_OR_FALSE(_const_half->at<loco::DataType::FLOAT32>(0) == 0.5);
+
+  // check _const_alpha condition
+  CHECK_OR_FALSE(_const_alpha->dtype() == loco::DataType::FLOAT32);
+  // TODO add more if needed
+
+  return true;
+}
+
+#undef CHECK_OR_FALSE
+
+class FusePRelu final
+{
+public:
+  FusePRelu(const PReluPattern &p) : _p(p) {}
+
+public:
+  void apply(void);
+
+private:
+  luci::CirclePRelu *create_prelu(loco::Graph *graph);
+
+private:
+  const PReluPattern &_p;
+};
+
+luci::CirclePRelu *FusePRelu::create_prelu(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto prelu = graph->nodes()->create<luci::CirclePRelu>();
+  prelu->input(_p._ifm);
+  prelu->alpha(_p._const_alpha);
+  prelu->name(_p._add_ofm->name() + "_prelu");
+  return prelu;
+}
+
+void FusePRelu::apply()
+{
+  auto graph = _p._add_ofm->graph();
+
+  auto prelu = create_prelu(graph);
+
+  // set origin
+  std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+    luci::get_origin(_p._relu),      luci::get_origin(_p._abs),      luci::get_origin(_p._sub),
+    luci::get_origin(_p._mul_alpha), luci::get_origin(_p._mul_half), luci::get_origin(_p._add_ofm)};
+
+  luci::add_origin(prelu, luci::composite_origin(origin_vec));
+
+  replace(_p._add_ofm).with(prelu);
+}
+
+} // namespace
+
+namespace
+{
+
+bool fuse_prelu(luci::CircleAdd *add)
+{
+  assert(add);
+
+  PReluPattern pattern(add);
+  if (pattern.matched())
+  {
+    FusePRelu fuse(pattern);
+    fuse.apply();
+    return true;
+  }
+  return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FusePReluPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto add = dynamic_cast<luci::CircleAdd *>(node);
+    if (not add)
+      continue;
+
+    if (fuse_prelu(add))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FusePReluPass.test.cpp b/compiler/luci/pass/src/FusePReluPass.test.cpp
new file mode 100644 (file)
index 0000000..209fe39
--- /dev/null
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FusePReluPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class PReluGraphlet
+{
+public:
+  PReluGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    _abs = g->nodes()->create<luci::CircleAbs>();
+    _sub = g->nodes()->create<luci::CircleSub>();
+    _mul_alpha = g->nodes()->create<luci::CircleMul>();
+    _mul_half = g->nodes()->create<luci::CircleMul>();
+    _relu = g->nodes()->create<luci::CircleRelu>();
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _const_alpha = g->nodes()->create<luci::CircleConst>();
+    _const_half = g->nodes()->create<luci::CircleConst>();
+
+    _sub->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _mul_alpha->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _mul_half->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _abs->name("abs");
+    _sub->name("sub");
+    _mul_alpha->name("mul_alpha");
+    _mul_half->name("mul_half");
+    _relu->name("relu");
+    _add->name("add");
+    _const_alpha->name("const_alpha");
+    _const_half->name("const_half");
+
+    _const_alpha->dtype(loco::DataType::FLOAT32);
+    _const_alpha->size<loco::DataType::FLOAT32>(1);
+    _const_alpha->shape({1});
+    _const_alpha->at<loco::DataType::FLOAT32>(0) = 0.1;
+    _const_alpha->shape_status(luci::ShapeStatus::VALID);
+
+    _const_half->dtype(loco::DataType::FLOAT32);
+    _const_half->size<loco::DataType::FLOAT32>(1);
+    _const_half->shape({1});
+    _const_half->at<loco::DataType::FLOAT32>(0) = 0.5;
+    _const_half->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  void invalid_half() { _const_half->at<loco::DataType::FLOAT32>(0) = 0.1; }
+  void invalid_act() { _add->fusedActivationFunction(luci::FusedActFunc::RELU); }
+
+protected:
+  luci::CircleAbs *_abs = nullptr;
+  luci::CircleSub *_sub = nullptr;
+  luci::CircleMul *_mul_alpha = nullptr;
+  luci::CircleMul *_mul_half = nullptr;
+  luci::CircleRelu *_relu = nullptr;
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_const_alpha = nullptr;
+  luci::CircleConst *_const_half = nullptr;
+};
+
+class FusePReluTestGraph : public TestIOGraph, public PReluGraphlet
+{
+public:
+  FusePReluTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    PReluGraphlet::init(g());
+
+    _relu->features(input());
+    _abs->x(input());
+    _sub->x(input());
+    _sub->y(_abs);
+    _mul_alpha->x(_sub);
+    _mul_alpha->y(_const_alpha);
+    _mul_half->x(_mul_alpha);
+    _mul_half->y(_const_half);
+    _add->x(_relu);
+    _add->y(_mul_half);
+
+    output()->from(_add);
+  }
+};
+
+class FusePReluTestNegGraph : public TestIOGraph, public PReluGraphlet
+{
+public:
+  FusePReluTestNegGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    PReluGraphlet::init(g());
+
+    _relu->features(input());
+    _abs->x(input());
+    // NOTE x and y are incorrect
+    _sub->x(_abs);
+    _sub->y(input());
+    _mul_alpha->x(_sub);
+    _mul_alpha->y(_const_alpha);
+    _mul_half->x(_mul_alpha);
+    _mul_half->y(_const_half);
+    _add->x(_relu);
+    _add->y(_mul_half);
+
+    output()->from(_add);
+  }
+};
+
+} // namespace
+
+TEST(FusePReluPassTest, name)
+{
+  luci::FusePReluPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(FusePReluPassTest, fuse)
+{
+  FusePReluTestGraph g;
+  luci::FusePReluPass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_invalid_half_NEG)
+{
+  FusePReluTestNegGraph g;
+  luci::FusePReluPass pass;
+
+  g.init();
+  g.invalid_half();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_invalid_act_NEG)
+{
+  FusePReluTestNegGraph g;
+  luci::FusePReluPass pass;
+
+  g.init();
+  g.invalid_act();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_NEG)
+{
+  FusePReluTestNegGraph g;
+  luci::FusePReluPass pass;
+
+  g.init();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
index 06a4ae9..45d229a 100644 (file)
@@ -34,6 +34,8 @@ bool is_quantized(const CircleNode *node)
           node->dtype() == loco::DataType::S64);  // bias (int16 quant)
 }
 
+bool is_fp32(const CircleNode *node) { return node->dtype() == loco::DataType::FLOAT32; }
+
 uint8_t fp32_to_uint8_cast(float f)
 {
   assert(std::numeric_limits<uint8_t>::min() <= f);
@@ -124,8 +126,8 @@ void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &
                      : scale_factor_from_max_side;
 
   // protect scale from being very low to avoid overflow/underflow
-  if (scaling_factor < 1e-9)
-    scaling_factor = 1e-9;
+  if (scaling_factor < 1e-8)
+    scaling_factor = 1e-8;
 
   zp = 0;
   nudged_min = static_cast<float>(qmin_double * scaling_factor);
index 4d5316c..0720c98 100644 (file)
@@ -60,6 +60,9 @@ void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2);
 // Return true if the node is quantized
 bool is_quantized(const CircleNode *node);
 
+// Return true if the node is fp32
+bool is_fp32(const CircleNode *node);
+
 enum ActivationQType
 {
   MinMax,             // Quantize using recorded min/max
index 95251a8..214e61c 100644 (file)
@@ -44,12 +44,8 @@ void QuantizeActivation::visit(luci::CircleNode *node)
   LOGGER(l);
   INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
 
-  // Check if this is already quantized
-  if (is_quantized(node))
-    return;
-
-  // Check if this is bool type (bool type is not quantized)
-  if (node->dtype() == loco::DataType::BOOL)
+  // Check if node is fp32
+  if (not is_fp32(node))
     return;
 
   // Check if this is const (const activation is handled by QuantizeConstInputActivation)
@@ -185,7 +181,7 @@ void QuantizeConstInputActivation::visit(luci::CircleNode *node)
   {                                                             \
     auto input = node->INPUT_NAME();                            \
     auto const_node = dynamic_cast<luci::CircleConst *>(input); \
-    if (const_node && !is_quantized(const_node))                \
+    if (const_node && is_fp32(const_node))                      \
     {                                                           \
       auto new_const = luci::clone(const_node);                 \
       quant_const(new_const, _output_type);                     \
@@ -199,7 +195,7 @@ void QuantizeConstInputActivation::visit(luci::CircleNode *node)
   {                                                               \
     auto input1 = node->INPUT_NAME1();                            \
     auto const_node1 = dynamic_cast<luci::CircleConst *>(input1); \
-    if (const_node1 && !is_quantized(const_node1))                \
+    if (const_node1 && is_fp32(const_node1))                      \
     {                                                             \
       auto new_const1 = luci::clone(const_node1);                 \
       quant_const(new_const1, _output_type);                      \
@@ -207,7 +203,7 @@ void QuantizeConstInputActivation::visit(luci::CircleNode *node)
     }                                                             \
     auto input2 = node->INPUT_NAME2();                            \
     auto const_node2 = dynamic_cast<luci::CircleConst *>(input2); \
-    if (const_node2 && !is_quantized(const_node2))                \
+    if (const_node2 && is_fp32(const_node2))                      \
     {                                                             \
       auto new_const2 = luci::clone(const_node2);                 \
       quant_const(new_const2, _output_type);                      \
@@ -216,6 +212,7 @@ void QuantizeConstInputActivation::visit(luci::CircleNode *node)
   }
 
 // Ops that receive a single activation as an input
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleAbs, x)
 QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMax, input)
 QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMin, input)
 QUANTIZE_SINGLE_CONST_INPUT(luci::CircleBatchToSpaceND, input)
@@ -278,7 +275,7 @@ void QuantizeConstInputActivation::visit(luci::CircleAddN *node)
   {
     auto input_node = node->inputs(i);
     auto const_node = dynamic_cast<luci::CircleConst *>(input_node);
-    if (const_node && !is_quantized(const_node))
+    if (const_node && is_fp32(const_node))
     {
       auto new_const = luci::clone(const_node);
       quant_const(new_const, _output_type);
index fc32d1c..c6c991a 100644 (file)
@@ -102,6 +102,7 @@ private:
   void visit(luci::CircleNode *node);
 
   // Ops that receive a single activation as an input
+  void visit(luci::CircleAbs *node);
   void visit(luci::CircleArgMax *node);
   void visit(luci::CircleArgMin *node);
   void visit(luci::CircleBatchToSpaceND *node);
index 500ae12..29cdaff 100644 (file)
@@ -90,6 +90,118 @@ void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
   }
 }
 
+// TODO Reduce duplicate code with QuantizeDequantizeWeights
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+                            std::vector<float> &scaling_factor, std::vector<int64_t> &zp,
+                            std::vector<float> &nudged_min, std::vector<float> &nudged_max,
+                            int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+  const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+  const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    compute_sym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
+  }
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(loco::DataType::S16);      // change the type of tensor
+  node->size<loco::DataType::S16>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::S16>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+                            int32_t &channel_dim_index)
+{
+  loco::TensorShape dimension;
+  dimension.rank(4);
+
+  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  {
+    throw std::runtime_error("Failed to find channel index in " + node->name());
+  }
+  auto size = dimension.dim(channel_dim_index).value();
+
+  std::vector<bool> has_min_max_value(size, false);
+  min.resize(size);
+  max.resize(size);
+
+  auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    if (has_min_max_value[channel_idx])
+    {
+      min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+      max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+    }
+    else
+    {
+      min[channel_idx] = data;
+      max[channel_idx] = data;
+      has_min_max_value[channel_idx] = true;
+    }
+  };
+
+  iterate_per_channel(node, channel_dim_index, cal_minmax);
+}
+
+void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
+                                   std::vector<float> &max, std::vector<float> &scaling_factor,
+                                   std::vector<int64_t> &zp, std::vector<float> &nudged_min,
+                                   std::vector<float> &nudged_max, int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  const int32_t kMinScale = 0;
+  const int32_t kMaxScale = 255;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    compute_asym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
+  }
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(loco::DataType::U8);      // change the type of tensor
+  node->size<loco::DataType::U8>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
                             int32_t &channel_dim_index)
 {
@@ -250,7 +362,37 @@ void QuantizeWeights::quantize_weights(luci::CircleConst *weights)
     auto quantparam = weights->quantparam();
     if (quantparam == nullptr)
     {
-      assert(false && "quantparam is nullptr");
+      // Find min/max on the fly
+      // NOTE This is for the case when QuantizeDequantizeWeights is skipped
+      // TODO Reduce duplicate codes
+      std::vector<float> min;
+      std::vector<float> max;
+      int32_t channel_dim_index = 0;
+
+      cal_minmax_per_channel(weights, min, max, channel_dim_index);
+
+      std::vector<float> nudged_min(min.size());
+      std::vector<float> nudged_max(min.size());
+      std::vector<float> scaling_factor(min.size());
+      std::vector<int64_t> zp(min.size());
+
+      if (output_type == loco::DataType::U8)
+      {
+        asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max,
+                                      channel_dim_index);
+      }
+      else
+      {
+        sym_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max,
+                               channel_dim_index);
+      }
+
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale = scaling_factor;
+      quantparam->zerop = zp;
+      quantparam->quantized_dimension = channel_dim_index;
+      weights->quantparam(std::move(quantparam));
+
       return;
     }
 
@@ -273,8 +415,35 @@ void QuantizeWeights::quantize_weights(luci::CircleConst *weights)
   // Find min/max per layer-wise
   else
   {
-    // Quantize using recorded quantparam
     auto quantparam = weights->quantparam();
+    if (quantparam == nullptr)
+    {
+      // Find min/max on the fly
+      // NOTE This is for the case when QuantizeDequantizeWeights is skipped
+      // TODO Reduce duplicate codes
+      float min = std::numeric_limits<float>::max();
+      float max = std::numeric_limits<float>::lowest();
+      for (uint32_t i = 0; i < weights->size<loco::DataType::FLOAT32>(); i++)
+      {
+        auto data = weights->at<loco::DataType::FLOAT32>(i);
+        min = data < min ? data : min;
+        max = data > max ? data : max;
+      }
+      float scaling_factor{0};
+      int64_t zp{0};
+      float nudged_min{0};
+      float nudged_max{0};
+
+      asymmetric_wquant_with_minmax_per_layer(weights, min, max, scaling_factor, zp, nudged_min,
+                                              nudged_max);
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale.push_back(scaling_factor);
+      quantparam->zerop.push_back(zp);
+      weights->quantparam(std::move(quantparam));
+      return;
+    }
+
+    // Quantize using recorded quantparam
     assert(quantparam != nullptr);
     assert(quantparam->min.size() == 1);   // only support layer-wise quant
     assert(quantparam->scale.size() == 1); // only support layer-wise quant
index 0051445..c68e067 100644 (file)
@@ -32,8 +32,6 @@
 #include <luci/Log.h>
 #include <logo/Phase.h>
 
-#include <oops/UserExn.h>
-
 #include <iostream>
 #include <cmath>
 
@@ -154,8 +152,8 @@ namespace
  * 2. After output feature map
  *
  * For example, if default_dtype = U8 and op_dtype = S16,
- * 1. Quantize Op for U8->S16 is inserted before ifm
- * 2. Quantize Op for S16->U8 is inserted after ofm
+ * 1. Quantize (U8->S16) is inserted before ifm
+ * 2. Quantize (S16->U8) is inserted after ofm
  *
  * Why not insert Quantize Op for const ifm?
  * We quantize const tensor at once to preserve precision.
@@ -181,6 +179,10 @@ private:
     if (input->opcode() == luci::CircleOpcode::CIRCLECONST)
       return nullptr;
 
+    // input is not quantizable (ex: index)
+    if (input->quantparam() == nullptr)
+      return nullptr;
+
     auto input_quant = create_quantize_op(input, _op_dtype);
     input_quant->input(input);
     auto origin_node = loco::must_cast<luci::CircleNode *>(origin);
@@ -192,6 +194,11 @@ private:
   {
     auto output = loco::must_cast<luci::CircleNode *>(node);
     assert(output->opcode() != luci::CircleOpcode::CIRCLECONST); // FIX_CALLER_UNLESS
+
+    // output is not quantizable (ex: index)
+    if (output->quantparam() == nullptr)
+      return;
+
     auto output_quant = create_quantize_op(output, _default_dtype);
 
     luci::add_origin(output_quant, luci::get_origin(output));
@@ -253,6 +260,7 @@ private:
   void visit(luci::CircleUnpackOut *) {}
 
   // Ops that receive a single activation as an input
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAbs, x)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAveragePool2D, value)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleBatchToSpaceND, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleConv2D, input)
@@ -365,10 +373,20 @@ private:
 void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
 {
   auto inputs = g->inputs();
-  for (auto node : loco::input_nodes(g))
+
+  assert(inputs);                                     // FIX_CALLER_UNLESS
+  assert(inputs->size() == _ctx->input_types.size()); // FIX_CALLER_UNLESS
+
+  // NOTE loco::input_nodes returns input nodes following the order of InputIndex
+  auto input_nodes = loco::input_nodes(g);
+  for (uint32_t i = 0; i < input_nodes.size(); i++)
   {
-    auto input = loco::must_cast<luci::CircleInput *>(node);
-    if (input->dtype() == _ctx->input_type)
+    auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+    assert(i == input->index()); // Fix input_type logic
+
+    const auto user_given_dtype = _ctx->input_types[i];
+
+    if (input->dtype() == user_given_dtype)
       continue;
 
     // Bool type is not quantizable
@@ -394,7 +412,7 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
 
     // Update qparam of input
     // This step is skipped if input_type is float32
-    if (_ctx->input_type != loco::DataType::FLOAT32)
+    if (user_given_dtype != loco::DataType::FLOAT32)
     {
       auto quantparam = input->quantparam();
       assert(quantparam);
@@ -408,13 +426,13 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
       float nudged_min{0};
       float nudged_max{0};
 
-      if (_ctx->input_type == loco::DataType::U8)
+      if (user_given_dtype == loco::DataType::U8)
       {
         compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
       }
       else
       {
-        assert(_ctx->input_type == loco::DataType::S16);
+        assert(user_given_dtype == loco::DataType::S16);
         compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
       }
       input->quantparam()->scale[0] = scaling_factor;
@@ -422,20 +440,29 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
     }
 
     // Update dtype of input
-    input->dtype(_ctx->input_type);
+    input->dtype(user_given_dtype);
 
     auto graph_input = inputs->at(input->index());
-    graph_input->dtype(_ctx->input_type);
+    graph_input->dtype(user_given_dtype);
   }
 }
 
 void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
 {
   auto outputs = g->outputs();
-  for (auto node : loco::output_nodes(g))
+  assert(outputs);                                      // FIX_CALLER_UNLESS
+  assert(outputs->size() == _ctx->output_types.size()); // Fix CircleQuantizer unless
+
+  // NOTE loco::output_nodes returns output nodes following the order of OutputIndex
+  auto output_nodes = loco::output_nodes(g);
+  for (uint32_t i = 0; i < output_nodes.size(); i++)
   {
-    auto output = loco::must_cast<luci::CircleOutput *>(node);
-    if (output->dtype() == _ctx->output_type)
+    auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+    assert(i == output->index()); // Fix output_type logic
+
+    const auto user_given_dtype = _ctx->output_types[i];
+
+    if (output->dtype() == user_given_dtype)
       continue;
 
     // Bool type is not quantizable
@@ -444,12 +471,12 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
 
     auto from = loco::must_cast<luci::CircleNode *>(output->from());
 
-    // The last Op is not quantizable Op (ex: ArgMax)
+    // The last Op is not quantizable (ex: ArgMax)
     if (not from->quantparam())
       continue;
 
     // Insert Dequantize Op for float32 output_type
-    if (_ctx->output_type == loco::DataType::FLOAT32)
+    if (user_given_dtype == loco::DataType::FLOAT32)
     {
       auto dequant_op = create_dequantize(from);
       loco::replace(from).with(dequant_op);
@@ -458,7 +485,7 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
     else
     {
       // Insert Quantize Op for non-float32 output_type
-      auto quant_op = create_quantize_op(from, _ctx->output_type);
+      auto quant_op = create_quantize_op(from, user_given_dtype);
       loco::replace(from).with(quant_op);
       quant_op->input(from);
 
@@ -467,10 +494,10 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
     }
 
     // Update dtype of output
-    output->dtype(_ctx->output_type);
+    output->dtype(user_given_dtype);
 
     auto graph_output = outputs->at(output->index());
-    graph_output->dtype(_ctx->output_type);
+    graph_output->dtype(user_given_dtype);
   }
 }
 
@@ -493,9 +520,9 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
  * Weights is quantized using min/max of its value
  *
  * Bias is quantized using input scale (s_i) and weights scale (s_w)
- * - Activation and weights should be quantized earlier than bias
+ * - Therefore, activation and weights should be quantized earlier than bias
  *
- * Quantization Steps
+ * Overall Quantization Steps
  * 1. Quantize Activation
  *   - Quantize using recorded min/max (QuantizeActivation)
  *   - Insert Quantize Ops for mixed-precision quantization (InsertQuantizeOp)
@@ -550,7 +577,10 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
   };
 
   // Quantize activation
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  // Why all_nodes?
+  // Models can have inactive (unused) inputs.
+  // We do not reject such models, but quantize them too
+  for (auto node : loco::all_nodes(g))
   {
     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
     QuantizeActivation qa(_ctx->input_model_dtype, quantize_dtype(circle_node));
index d5fa21f..49c2d46 100644 (file)
@@ -53,8 +53,14 @@ public:
 
 TEST(QuantizeWithMinMaxPassTest, name)
 {
-  luci::QuantizeWithMinMaxPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
-                                    luci::QuantizationGranularity::LayerWise);
+  auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::U8;
+    ctx->granularity = luci::QuantizationGranularity::LayerWise;
+  }
+
+  luci::QuantizeWithMinMaxPass pass(std::move(ctx));
   auto const name = pass.name();
   ASSERT_NE(nullptr, name);
 }
@@ -65,8 +71,14 @@ TEST(QuantizeWithMinMaxPassTest, int_concat)
 {
   SimpleConcatGraph g(loco::DataType::S32);
 
-  luci::QuantizeWithMinMaxPass qwmm(loco::DataType::FLOAT32, loco::DataType::U8,
-                                    luci::QuantizationGranularity::LayerWise);
+  auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::U8;
+    ctx->granularity = luci::QuantizationGranularity::LayerWise;
+  }
+
+  luci::QuantizeWithMinMaxPass qwmm(std::move(ctx));
 
   qwmm.run(&g.g);
 
@@ -74,3 +86,22 @@ TEST(QuantizeWithMinMaxPassTest, int_concat)
   EXPECT_EQ(nullptr, g.input_1->quantparam());
   EXPECT_EQ(nullptr, g.input_2->quantparam());
 }
+
+TEST(QuantizeWithMinMaxPassTest, inactive_input)
+{
+  SimpleConcatGraph g(loco::DataType::FLOAT32);
+
+  // Unused input
+  g.g.nodes()->create<luci::CircleInput>();
+
+  auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::U8;
+    ctx->granularity = luci::QuantizationGranularity::LayerWise;
+  }
+
+  luci::QuantizeWithMinMaxPass qwmm(std::move(ctx));
+
+  EXPECT_NO_THROW(qwmm.run(&g.g));
+}
index 7409a51..d9bea43 100644 (file)
@@ -38,26 +38,13 @@ public:
   {
     loco::DataType output_model_dtype = loco::DataType::Unknown;
     QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
-    loco::DataType input_type = loco::DataType::Unknown;
-    loco::DataType output_type = loco::DataType::Unknown;
+    std::vector<loco::DataType> input_types;
+    std::vector<loco::DataType> output_types;
     bool TF_style_maxpool = false;
     std::vector<LayerInfo> layers_info;
   };
 
 public:
-  QuantizedModelVerifier(loco::DataType quantized_dtype, QuantizationGranularity granularity)
-  {
-    _ctx = std::make_unique<Context>();
-    {
-      _ctx->output_model_dtype = quantized_dtype;
-      _ctx->granularity = granularity;
-      _ctx->input_type = quantized_dtype;
-      _ctx->output_type = quantized_dtype;
-      _ctx->TF_style_maxpool = false;
-    }
-  }
-
-public:
   QuantizedModelVerifier(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
   {
     // DO NOTHING
index 21b4fe1..05ec317 100644 (file)
@@ -18,7 +18,9 @@
 
 #include "luci/Pass/QuantizeWithMinMaxPass.h"
 #include "luci/Pass/QuantizationParameters.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
 
+#include <logo/Phase.h>
 #include <luci/test/TestIOGraph.h>
 
 #include <gtest/gtest.h>
@@ -104,12 +106,56 @@ void insert_scale_zp(luci::CircleNode *node, float scale, int64_t zp)
   qparam->zerop.push_back(zp);
 }
 
+void run_phase(loco::Graph *g, Type quantized_dtype, Granularity granularity)
+{
+  logo::Phase phase;
+
+  // Default passes.
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+  auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = quantized_dtype;
+    ctx->granularity = granularity;
+    // Test graph has only one input/output
+    ctx->input_types = {quantized_dtype};
+    ctx->output_types = {quantized_dtype};
+  }
+
+  phase.emplace_back(std::make_unique<luci::QuantizeWithMinMaxPass>(std::move(ctx)));
+
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.run(phase);
+}
+
+void run_phase(loco::Graph *g, std::unique_ptr<luci::QuantizeWithMinMaxPass::Context> &&ctx)
+{
+  logo::Phase phase;
+
+  // Default passes.
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+  phase.emplace_back(std::make_unique<luci::QuantizeWithMinMaxPass>(std::move(ctx)));
+
+  logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+  phase_runner.run(phase);
+}
+
 void quantize_and_verify(loco::Graph *g, Type quantized_dtype, Granularity granularity)
 {
-  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
-  pass.run(g);
+  run_phase(g, quantized_dtype, granularity);
 
-  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+  auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+  {
+    ctx->output_model_dtype = quantized_dtype;
+    ctx->granularity = granularity;
+    // Test graph has only one input/output
+    ctx->input_types = {quantized_dtype};
+    ctx->output_types = {quantized_dtype};
+  }
+
+  luci::QuantizedModelVerifier verifier(std::move(ctx));
   verifier.verify(g);
 }
 
@@ -132,14 +178,14 @@ void quantize_and_verify_with_layer_info(loco::Graph *g, Type quantized_dtype,
       ctx->input_model_dtype = Type::FLOAT32;
       ctx->output_model_dtype = quantized_dtype;
       ctx->granularity = granularity;
-      ctx->input_type = quantized_dtype;
-      ctx->output_type = quantized_dtype;
+      // Test graph has only one input/output
+      ctx->input_types = {quantized_dtype};
+      ctx->output_types = {quantized_dtype};
       ctx->TF_style_maxpool = false;
       ctx->layers_info.push_back(info);
     }
 
-    luci::QuantizeWithMinMaxPass pass(std::move(ctx));
-    pass.run(g);
+    run_phase(g, std::move(ctx));
   }
 
   // Do verification
@@ -148,8 +194,8 @@ void quantize_and_verify_with_layer_info(loco::Graph *g, Type quantized_dtype,
     {
       ctx->output_model_dtype = quantized_dtype;
       ctx->granularity = granularity;
-      ctx->input_type = quantized_dtype;
-      ctx->output_type = quantized_dtype;
+      ctx->input_types = {quantized_dtype};
+      ctx->output_types = {quantized_dtype};
       ctx->TF_style_maxpool = false;
       ctx->layers_info.push_back(info);
     }
@@ -164,13 +210,21 @@ void quantize_and_verify_with_layer_info(loco::Graph *g, Type quantized_dtype,
 void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
                                          Granularity granularity, Type wrong_dtype)
 {
-  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
-  pass.run(g->g());
+  run_phase(g->g(), quantized_dtype, granularity);
 
   auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
   node->dtype(wrong_dtype);
 
-  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+  auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+  {
+    ctx->output_model_dtype = quantized_dtype;
+    ctx->granularity = granularity;
+    // Test graph has only one input/output
+    ctx->input_types = {quantized_dtype};
+    ctx->output_types = {quantized_dtype};
+  }
+
+  luci::QuantizedModelVerifier verifier(std::move(ctx));
   verifier.verify(g->g());
 }
 
@@ -179,13 +233,21 @@ void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quanti
 void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
                                                 Granularity granularity)
 {
-  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
-  pass.run(g->g());
+  run_phase(g->g(), quantized_dtype, granularity);
 
   auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
   insert_scale_zp(node, 1.0, 1);
 
-  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+  auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+  {
+    ctx->output_model_dtype = quantized_dtype;
+    ctx->granularity = granularity;
+    // Test graph has only one input/output
+    ctx->input_types = {quantized_dtype};
+    ctx->output_types = {quantized_dtype};
+  }
+
+  luci::QuantizedModelVerifier verifier(std::move(ctx));
   verifier.verify(g->g());
 }
 
@@ -238,6 +300,24 @@ public:
   virtual void init(void) = 0;
 };
 
+class TypedTestGraph : public luci::test::TestIOGraph
+{
+protected:
+  void init(Type T, const luci::test::ShapeU32 shape_in, const luci::test::ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+
+    input()->dtype(T);
+    output()->dtype(T);
+
+    g()->inputs()->at(0)->dtype(T);
+    g()->outputs()->at(0)->dtype(T);
+  }
+
+public:
+  virtual void init(void) = 0;
+};
+
 class InstanceNormTestGraph final : public SimpleTestGraph
 {
 public:
@@ -603,6 +683,9 @@ public:
     output()->from(_argmax);
 
     set_minmax_to_non_const(g(), -1, 1);
+
+    // Sync output dtype with graph's output dtype
+    g()->outputs()->at(0)->dtype(output()->dtype());
   }
 
 public:
@@ -904,6 +987,9 @@ public:
     output()->from(_op);
 
     set_minmax_to_non_const(g(), -1, 1);
+
+    // Sync output dtype with graph's output dtype
+    g()->outputs()->at(0)->dtype(output()->dtype());
   }
 
   loco::Node *x(void) const { return _op->x(); }
@@ -934,6 +1020,9 @@ public:
     output()->from(_op);
 
     set_minmax_to_non_const(g(), -1, 1);
+
+    // Sync output dtype with graph's output dtype
+    g()->outputs()->at(0)->dtype(output()->dtype());
   }
 
   loco::Node *x(void) const { return _op->x(); }
@@ -1218,6 +1307,33 @@ private:
   luci::CircleConst *_const = nullptr;
 };
 
+template <Type T> class IntMulTestGraph final : public TypedTestGraph
+{
+public:
+  void init(void) override
+  {
+    TypedTestGraph::init(T, {32}, {32});
+
+    _const = create_dummy_const<T>(g(), {32});
+    _mul = g()->nodes()->create<luci::CircleMul>();
+    {
+      _mul->x(input());
+      _mul->y(_const);
+      _mul->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _mul->name("test");
+      _mul->dtype(T);
+    }
+    output()->from(_mul);
+  }
+
+  loco::Node *x() { return _mul->x(); }
+  loco::Node *y() { return _mul->y(); }
+
+private:
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
 class AddTestGraph final : public SimpleTestGraph
 {
 public:
@@ -1246,6 +1362,33 @@ private:
   luci::CircleConst *_const = nullptr;
 };
 
+template <Type T> class IntAddTestGraph final : public TypedTestGraph
+{
+public:
+  void init(void) override
+  {
+    TypedTestGraph::init(T, {32}, {32});
+
+    _const = create_dummy_const<T>(g(), {32});
+    _add = g()->nodes()->create<luci::CircleAdd>();
+    {
+      _add->x(input());
+      _add->y(_const);
+      _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _add->name("test");
+      _add->dtype(T);
+    }
+    output()->from(_add);
+  }
+
+  loco::Node *x() { return _add->x(); }
+  loco::Node *y() { return _add->y(); }
+
+private:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
 } // namespace
 
 // Quantize and verify with given configurations
@@ -1286,34 +1429,46 @@ private:
 
 // Quantize and verify with wrong type
 // Users can specify the test target
-#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity, wrong_dtype, target) \
-  do                                                                               \
-  {                                                                                \
-    graph g;                                                                       \
-    g.init();                                                                      \
-    auto node = loco::must_cast<luci::CircleNode *>(target);                       \
-    luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity);           \
-    pass.run(g.g());                                                               \
-    auto after_node = loco::must_cast<luci::CircleNode *>(target);                 \
-    after_node->dtype(wrong_dtype);                                                \
-    luci::QuantizedModelVerifier verifier(type, granularity);                      \
-    EXPECT_ANY_THROW(verifier.verify(g.g()));                                      \
+#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity_, wrong_dtype, target) \
+  do                                                                                \
+  {                                                                                 \
+    graph g;                                                                        \
+    g.init();                                                                       \
+    auto node = loco::must_cast<luci::CircleNode *>(target);                        \
+    run_phase(g.g(), type, granularity_);                                           \
+    auto after_node = loco::must_cast<luci::CircleNode *>(target);                  \
+    after_node->dtype(wrong_dtype);                                                 \
+    auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();           \
+    {                                                                               \
+      ctx->output_model_dtype = type;                                               \
+      ctx->granularity = granularity_;                                              \
+      ctx->input_types = {type};                                                    \
+      ctx->output_types = {type};                                                   \
+    }                                                                               \
+    luci::QuantizedModelVerifier verifier(std::move(ctx));                          \
+    EXPECT_ANY_THROW(verifier.verify(g.g()));                                       \
   } while (0)
 
 // Quantize and verify with wrong granularity
 // Users can specify the test target
-#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target) \
-  do                                                                         \
-  {                                                                          \
-    graph g;                                                                 \
-    g.init();                                                                \
-    auto node = loco::must_cast<luci::CircleNode *>(target);                 \
-    luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity);     \
-    pass.run(g.g());                                                         \
-    auto after_node = loco::must_cast<luci::CircleNode *>(target);           \
-    insert_scale_zp(after_node, 1.0, 1);                                     \
-    luci::QuantizedModelVerifier verifier(type, granularity);                \
-    EXPECT_ANY_THROW(verifier.verify(g.g()));                                \
+#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity_, target) \
+  do                                                                          \
+  {                                                                           \
+    graph g;                                                                  \
+    g.init();                                                                 \
+    auto node = loco::must_cast<luci::CircleNode *>(target);                  \
+    run_phase(g.g(), type, granularity_);                                     \
+    auto after_node = loco::must_cast<luci::CircleNode *>(target);            \
+    insert_scale_zp(after_node, 1.0, 1);                                      \
+    auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();     \
+    {                                                                         \
+      ctx->output_model_dtype = type;                                         \
+      ctx->granularity = granularity_;                                        \
+      ctx->input_types = {type};                                              \
+      ctx->output_types = {type};                                             \
+    }                                                                         \
+    luci::QuantizedModelVerifier verifier(std::move(ctx));                    \
+    EXPECT_ANY_THROW(verifier.verify(g.g()));                                 \
   } while (0)
 
 // Test a local helper function
@@ -2512,6 +2667,29 @@ TEST(QuantizedModelVerifierTest, Add_wrong_granularity_NEG)
   SUCCEED();
 }
 
+TEST(QuantizedModelVerifierTest, Add_inttype)
+{
+  // Tests for S32
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  // Tests for S64
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  SUCCEED();
+}
+
 TEST(QuantizedModelVerifierTest, Mul)
 {
   TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::LayerWise);
@@ -2544,6 +2722,29 @@ TEST(QuantizedModelVerifierTest, Mul_wrong_granularity_NEG)
   SUCCEED();
 }
 
+TEST(QuantizedModelVerifierTest, Mul_inttype)
+{
+  // Tests for S32
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  // Tests for S64
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  SUCCEED();
+}
+
 // TODO Add following testcases
 //
 // CircleConv2D
diff --git a/compiler/luci/pass/src/RemoveDuplicateConstPass.cpp b/compiler/luci/pass/src/RemoveDuplicateConstPass.cpp
new file mode 100644 (file)
index 0000000..e50dda9
--- /dev/null
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveDuplicateConstPass.h"
+
+#include <luci/Log.h>
+
+namespace
+{
+
+bool compare_quant_params(luci::CircleConst *left, luci::CircleConst *right)
+{
+  const auto left_quant_param = left->quantparam();
+  const auto right_quant_param = right->quantparam();
+
+  if (left_quant_param == right_quant_param)
+    return true;
+
+  if (left_quant_param != nullptr and right_quant_param != nullptr)
+  {
+    if (left_quant_param->scale == right_quant_param->scale and
+        left_quant_param->quantized_dimension == right_quant_param->quantized_dimension and
+        left_quant_param->zerop == right_quant_param->zerop and
+        left_quant_param->min == right_quant_param->min and
+        left_quant_param->max == right_quant_param->max)
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool compare_dim_values(luci::CircleConst *left, luci::CircleConst *right)
+{
+  const auto left_rank = left->rank();
+  const auto right_rank = right->rank();
+
+  if (left_rank != right_rank)
+    return false;
+
+  for (uint32_t i = 0; i < left_rank; ++i)
+  {
+    if (left->dim(i).value() != right->dim(i).value())
+      return false;
+  }
+
+  return true;
+}
+
+template <loco::DataType DT> bool is_equal_consts(luci::CircleConst *left, luci::CircleConst *right)
+{
+  if (not compare_quant_params(left, right))
+    return false;
+
+  if (not compare_dim_values(left, right))
+    return false;
+
+  for (uint32_t i = 0; i < left->size<DT>(); ++i)
+  {
+    if (left->at<DT>(i) != right->at<DT>(i))
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveDuplicateConstPass::remove_duplicate_const()
+{
+  bool changed = false;
+
+  for (auto &cur_pair : _sum_to_const)
+  {
+    // if single const - continue
+    if (cur_pair.second.size() == 1)
+      continue;
+
+    for (auto reference_const : cur_pair.second)
+    {
+      if (reference_const == nullptr)
+        continue;
+
+      for (uint32_t i = 0; i < cur_pair.second.size(); ++i)
+      {
+        auto cur_const = cur_pair.second.at(i);
+        if (cur_const == nullptr or cur_const == reference_const)
+          continue;
+
+        if (cur_const->dtype() != reference_const->dtype())
+          continue;
+
+        bool is_equal = false;
+
+        switch (cur_const->dtype())
+        {
+          case loco::DataType::FLOAT32:
+            is_equal = is_equal_consts<loco::DataType::FLOAT32>(reference_const, cur_const);
+            break;
+          case loco::DataType::S32:
+            is_equal = is_equal_consts<loco::DataType::S32>(reference_const, cur_const);
+            break;
+          case loco::DataType::S16:
+            is_equal = is_equal_consts<loco::DataType::S16>(reference_const, cur_const);
+            break;
+          case loco::DataType::S8:
+            is_equal = is_equal_consts<loco::DataType::S8>(reference_const, cur_const);
+            break;
+          case loco::DataType::U8:
+            is_equal = is_equal_consts<loco::DataType::U8>(reference_const, cur_const);
+            break;
+          default:
+            continue;
+        }
+
+        if (not is_equal)
+          continue;
+
+        loco::replace(cur_const).with(reference_const);
+
+        // Remove from next checking
+        cur_pair.second[i] = nullptr;
+
+        changed = true;
+      }
+    }
+  }
+
+  return changed;
+}
+
+template <loco::DataType DT>
+void RemoveDuplicateConstPass::add_to_map(luci::CircleConst *const_node)
+{
+  const auto const_size = const_node->size<DT>();
+  float sum = 0.0;
+
+  for (uint32_t i = 0; i < const_size; ++i)
+  {
+    sum += const_node->at<DT>(i);
+  }
+
+  if (_sum_to_const.find(sum) == _sum_to_const.end())
+  {
+    _sum_to_const[sum] = {const_node};
+  }
+  else
+  {
+    _sum_to_const.at(sum).push_back(const_node);
+  }
+}
+
+/**
+ * Remove duplicate Const nodes.
+ *
+ * BEFORE
+ *    [CircleNode]   [CircleConst]
+ *          |        /
+ *          |      /
+ *    [CircleNode]    [CircleConst]
+ *          |        /
+ *          |      /
+ *    [CircleNode]
+ *
+ * AFTER
+ *
+ *    [CircleNode]   [CircleConst]
+ *          |        /     /
+ *          |      /     /
+ *    [CircleNode]     /
+ *          |        /
+ *          |      /
+ *    [CircleNode]
+ *
+ */
+bool RemoveDuplicateConstPass::run(loco::Graph *g)
+{
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto const_node = dynamic_cast<luci::CircleConst *>(node);
+    if (const_node == nullptr)
+      continue;
+
+    switch (const_node->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        add_to_map<loco::DataType::FLOAT32>(const_node);
+        break;
+      case loco::DataType::S32:
+        add_to_map<loco::DataType::S32>(const_node);
+        break;
+      case loco::DataType::S16:
+        add_to_map<loco::DataType::S16>(const_node);
+        break;
+      case loco::DataType::S8:
+        add_to_map<loco::DataType::S8>(const_node);
+        break;
+      case loco::DataType::U8:
+        add_to_map<loco::DataType::U8>(const_node);
+        break;
+      default:
+        continue;
+    }
+  }
+
+  return remove_duplicate_const();
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveDuplicateConstPass.test.cpp b/compiler/luci/pass/src/RemoveDuplicateConstPass.test.cpp
new file mode 100644 (file)
index 0000000..5052a3e
--- /dev/null
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveDuplicateConstPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace luci::test;
+
+class DuplicateConstsGraphlet
+{
+public:
+  DuplicateConstsGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, bool is_duplicate)
+  {
+    _reshape_shape = g->nodes()->create<luci::CircleConst>();
+    _reshape_shape->rank(1);
+    _reshape_shape->dim(0).set(1);
+    _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+    _reshape_shape->dtype(loco::DataType::S32);
+
+    _reshape_shape->size<loco::DataType::S32>(1);
+    _reshape_shape->at<loco::DataType::S32>(0) = 5;
+    _reshape_shape->name("reshape_shape_1");
+
+    _reshape_shape_duplicate = g->nodes()->create<luci::CircleConst>();
+    _reshape_shape_duplicate->rank(1);
+    _reshape_shape_duplicate->dim(0).set(1);
+    _reshape_shape_duplicate->shape_status(luci::ShapeStatus::VALID);
+    _reshape_shape_duplicate->dtype(loco::DataType::S32);
+    if (is_duplicate)
+    {
+      _reshape_shape_duplicate->size<loco::DataType::S32>(1);
+      _reshape_shape_duplicate->at<loco::DataType::S32>(0) = 5;
+    }
+    else
+    {
+      _reshape_shape_duplicate->size<loco::DataType::S32>(2);
+      _reshape_shape_duplicate->at<loco::DataType::S32>(0) = 1;
+      _reshape_shape_duplicate->at<loco::DataType::S32>(1) = 5;
+    }
+    _reshape_shape_duplicate->name("reshape_shape_2");
+
+    _reshape_f = g->nodes()->create<luci::CircleReshape>();
+    _reshape_f->newShape()->rank(1);
+    _reshape_f->newShape()->dim(0) = 5;
+    _reshape_f->name("reshape_f");
+
+    _reshape_s = g->nodes()->create<luci::CircleReshape>();
+    if (is_duplicate)
+    {
+      _reshape_s->newShape()->rank(1);
+      _reshape_s->newShape()->dim(0) = 5;
+    }
+    else
+    {
+      _reshape_s->newShape()->rank(2);
+      _reshape_s->newShape()->dim(0) = 1;
+      _reshape_s->newShape()->dim(1) = 5;
+    }
+    _reshape_s->name("reshape_s");
+  }
+
+protected:
+  luci::CircleReshape *_reshape_f = nullptr;
+  luci::CircleReshape *_reshape_s = nullptr;
+  luci::CircleConst *_reshape_shape = nullptr;
+  luci::CircleConst *_reshape_shape_duplicate = nullptr;
+};
+
+class DuplicateConstsGraph : public TestIOGraph, public DuplicateConstsGraphlet
+{
+public:
+  DuplicateConstsGraph() = default;
+
+public:
+  void init(const ShapeU32 in_shape, const ShapeU32 out_shape, bool is_duplicate)
+  {
+    TestIOGraph::init(in_shape, out_shape);
+
+    DuplicateConstsGraphlet::init(g(), is_duplicate);
+
+    // connect graph
+    _reshape_f->tensor(input());
+    _reshape_f->shape(_reshape_shape);
+
+    _reshape_s->tensor(_reshape_f);
+    _reshape_s->shape(_reshape_shape_duplicate);
+
+    output()->from(_reshape_s);
+  }
+};
+} // namespace
+
+TEST(RemoveDuplicateConstPass, name)
+{
+  luci::RemoveDuplicateConstPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDuplicateConstPass, remove_duplicate)
+{
+  DuplicateConstsGraph g;
+  g.init({1, 5}, {5}, true);
+
+  luci::RemoveDuplicateConstPass pass;
+  while (pass.run(g.g()))
+    ;
+
+  uint32_t const_num = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    auto target_node = dynamic_cast<luci::CircleConst *>(node);
+    if (target_node != nullptr)
+      const_num++;
+  }
+
+  ASSERT_EQ(const_num, 1);
+}
+
+TEST(RemoveDuplicateConstPass, remove_duplicate_NEG)
+{
+  DuplicateConstsGraph g;
+  g.init({1, 5}, {1, 5}, false);
+
+  luci::RemoveDuplicateConstPass pass;
+  while (pass.run(g.g()))
+    ;
+
+  uint32_t const_num = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    auto target_node = dynamic_cast<luci::CircleConst *>(node);
+    if (target_node != nullptr)
+      const_num++;
+  }
+
+  ASSERT_EQ(const_num, 2);
+}
index 741b709..07457c1 100644 (file)
@@ -64,6 +64,40 @@ luci::CircleNode *fromActivation(luci::CircleNode *inp, luci::FusedActFunc act)
   }
 }
 
+// Create CircleReshape where
+// - dtype is same with node
+// - shape is same with node
+// NOTE: User should set input(tensor) of the returned Op.
+luci::CircleReshape *create_reshape(luci::CircleFullyConnected *node)
+{
+  assert(node); // FIX_CALLER_UNLESS
+
+  auto g = node->graph();
+
+  auto reshape = g->nodes()->create<luci::CircleReshape>();
+  reshape->name(node->name() + "/reshape");
+  reshape->dtype(node->dtype());
+  luci::add_origin(reshape, luci::get_origin(node));
+
+  auto shape_const = g->nodes()->create<luci::CircleConst>();
+  shape_const->dtype(loco::DataType::S32);
+  shape_const->rank(1);
+  shape_const->dim(0).set(node->rank());
+  shape_const->size<loco::DataType::S32>(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+  {
+    assert(node->dim(i).known()); // FIX_CALLER_UNLESS
+    shape_const->at<loco::DataType::S32>(i) = node->dim(i).value();
+  }
+  shape_const->shape_status(luci::ShapeStatus::VALID);
+  shape_const->name(node->name() + "/shape");
+  luci::add_origin(shape_const, luci::get_origin(node));
+
+  reshape->shape(shape_const);
+
+  return reshape;
+}
+
 /**
  *  Replace Fully Connected with Batched MatMul
  *
@@ -79,19 +113,23 @@ luci::CircleNode *fromActivation(luci::CircleNode *inp, luci::FusedActFunc act)
  *
  *              [Node1]  [Node2]
  *                  \      /
- *               [BatchMatMul] [BiasValue]?
+ *               [BatchMatMul]
+ *                      |
+ *                 [Reshape]   [BiasValue]?
  *                        \       /
  *                          [Add]?
  *                            |
  *                       [Activation]?
  *
  * Nodes with "?" denote optional elements
+ * NOTE Reshape Op is inserted to keep the original shape of FullyConnected Op
+ * Reshape Op can be redundant (input shape == output shape). This can be removed
+ * by RemoveUnnecessaryReshapePass.
  */
 bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
 {
   luci::CircleNode *x = nullptr;
   luci::CircleNode *y = nullptr;
-  luci::CircleNode *b = nullptr;
   luci::CircleTranspose *ty = nullptr;
   luci::CircleTranspose *tx = nullptr;
   bool adj_x = false;
@@ -122,10 +160,13 @@ bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
     x = loco::must_cast<luci::CircleNode *>(fc->input());
   }
 
-  b = loco::must_cast<luci::CircleNode *>(fc->bias());
+  if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32)
+    return false;
 
-  if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32 ||
-      b->dtype() != loco::DataType::FLOAT32)
+  auto bc = dynamic_cast<luci::CircleConst *>(fc->bias());
+  // NOTE bias can be empty as CircleOutputExclude type
+  // NOTE we can only handle bias as FLOAT32 type as of now
+  if (nullptr != bc && bc->dtype() != loco::DataType::FLOAT32)
     return false;
 
   auto name = fc->name();
@@ -141,6 +182,9 @@ bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
 
   luci::add_origin(matmul, luci::get_origin(fc));
 
+  auto reshape = create_reshape(fc);
+  reshape->tensor(matmul);
+
   auto all_zero = [](const luci::CircleConst *c) {
     bool ac = true;
     for (uint32_t i = 0; i < c->size<loco::DataType::FLOAT32>() && ac; i++)
@@ -150,12 +194,11 @@ bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
     return ac;
   };
 
-  auto bc = dynamic_cast<luci::CircleConst *>(b);
-  if ((nullptr != bc) && !all_zero(bc))
+  if (nullptr != bc && !all_zero(bc))
   {
     auto bias_add = fc->graph()->nodes()->create<luci::CircleAdd>();
-    bias_add->x(matmul);
-    bias_add->y(b);
+    bias_add->x(reshape);
+    bias_add->y(bc);
     bias_add->name(fc->name() + "/bias_add");
     bias_add->dtype(fc->dtype());
     add_origin(bias_add, get_origin(fc));
@@ -164,7 +207,8 @@ bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
   }
   else
   {
-    auto n = fromActivation(matmul, fc->fusedActivationFunction());
+    // NOTE bias doesn't exist or bias is all zero
+    auto n = fromActivation(reshape, fc->fusedActivationFunction());
     add_origin(n, luci::get_origin(fc));
     n->name(fc->name() + "fusedActivation");
     n->dtype(fc->dtype());
index 7606a61..93024f3 100644 (file)
@@ -159,8 +159,8 @@ TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, simple_test)
   auto ret = pass.run(g.g());
   EXPECT_EQ(true, ret);
 
-  auto mm = dynamic_cast<luci::CircleBatchMatMul *>(g.output()->from());
-  EXPECT_NE(nullptr, mm);
+  auto res = dynamic_cast<luci::CircleReshape *>(g.output()->from());
+  EXPECT_NE(nullptr, res);
 }
 
 TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, nonzero_bias_test)
index 1e8f681..f618827 100644 (file)
@@ -153,7 +153,6 @@ bool resolve_matmul(luci::CircleCustom *cop)
   }
 
   auto empty_bias = graph->nodes()->create<luci::CircleOutputExclude>();
-  empty_bias->dtype(loco::DataType::FLOAT32); // Needed for type inference
 
   auto fc_node = graph->nodes()->create<luci::CircleFullyConnected>();
   fc_node->input(lhs);
index f37f277..7c038d5 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <loco.h>
 #include <oops/InternalExn.h>
+#include <limits> // std::numeric_limits
 
 #include <flatbuffers/flexbuffers.h>
 
index a650658..5a09e39 100644 (file)
@@ -20,6 +20,8 @@
 #include <luci/Profile/CircleNodeOrigin.h>
 #include <luci/Service/Nodes/CircleConst.h>
 
+#include <limits> // std::numeric_limits
+
 namespace
 {
 
diff --git a/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.cpp b/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.cpp
new file mode 100644 (file)
index 0000000..b73efaf
--- /dev/null
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
+
+#include "helpers/NodeFiller.h"
+#include "helpers/TypeMapper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <string>
+#include <vector>
+
+/**
+ *  BEFORE
+ *        [CircleNode]
+ *              |
+ *   [UnidirectionalSequenceLSTM]
+ *              |
+ *        [CircleNode]
+ *
+ *  AFTER
+ *
+ *        [CircleNode]
+ *              |
+ *      [CircleTranspose]
+ *              |
+ *        [CircleUnpack]
+ *              |
+ *       [CircleUnpackOut]
+ *              |
+ *      (Unrolled sub network)
+ *              |
+ *        [CirclePack]
+ *              |                        |
+ *      [CircleTranspose]     [UnidirectionalSequenceLSTM]
+ *              |                        |
+ *        [CircleNode]
+ *
+ *  NOTE for timesteps = 1,
+ *       first [CircleTranspose] is not added and
+ *       last [CirclePack] + [CircleTranspose] is replaced with [CircleReshape]
+ *
+ *  First unrolled sub network is as follows
+ *    - [] and 'Circle' are omitted
+ *    - all FC has one or two Const for Weight/Bias
+ *
+ *            (input)
+ *              |
+ *              FC
+ *              |
+ *            Split
+ *    +---------+----------+----------+
+ *    |         |          |          |
+ *    |      Logistic   Logistic     Tanh
+ *    |  Const  |          |          |
+ *    |    |    |          |          |
+ *    |    +-- Mul         +-- Mul ---+
+ *    |         |               |
+ *    |         +---- Add ------+
+ *    |                |
+ *    |           +----+----+
+ *    |           |         |
+ *  Logistic     Tanh       |
+ *    |           |         |
+ *    +-- Mul ----+         |
+ *         |                |
+ *       (output)          (A)
+ *
+ *  and following unrolled sub networks are;
+ *
+ *   (prev-output) (input)
+ *        |          |
+ *        FC         FC
+ *        |          |
+ *        +--- Add --+
+ *   Const      |
+ *     |        |
+ *     +------ Add
+ *              |
+ *            Split
+ *              |
+ *    +---------+----------+----------+
+ * SplitOut SplitOut   SplitOut   SplitOut
+ *    |         |          |          |
+ *    |      Logistic   Logistic     Tanh
+ *    |  (A')   |          |          |
+ *    |   |     |          |          |
+ *    |   +--- Mul         +-- Mul ---+
+ *    |         |               |
+ *    |         +---- Add ------+
+ *    |                |
+ *    |           +----+----+
+ *    |           |         |
+ *  Logistic     Tanh       |
+ *    |           |         |
+ *    +-- Mul ----+         |
+ *         |                |
+ *      (output)          (next)
+ *
+ * where (A) and (A') are connected
+ *
+ */
+
+namespace
+{
+
+struct UnrollLSTM
+{
+  luci::CircleConst *transpose_perm(void);
+  luci::CircleTranspose *first_transpose(luci::CircleNode *input);
+  std::vector<luci::CircleUnpackOut *> input_unpacks(luci::CircleNode *input);
+  luci::CircleConst *merged_weights(luci::CircleConst *iw, luci::CircleConst *fw,
+                                    luci::CircleConst *cw, luci::CircleConst *ow);
+  luci::CircleFullyConnected *create_input_matmul(luci::CircleNode *input);
+  luci::CircleAdd *create_input_matmul(luci::CircleNode *input, luci::CircleMul *mul,
+                                       uint32_t step);
+  std::vector<luci::CircleSplitOut *> matmul_splits(luci::CircleNode *input, uint32_t step);
+  luci::CircleConst *forget_zero(void);
+  luci::CircleMul *forget_gate_cell(std::vector<luci::CircleSplitOut *> &splits,
+                                    luci::CircleNode *prev, uint32_t step,
+                                    luci::CircleNode **retadd);
+  luci::CircleReshape *last_reshape(luci::CircleNode *input);
+  luci::CircleTranspose *last_transpose(std::vector<luci::CircleMul *> &output_muls);
+
+  luci::CircleUnidirectionalSequenceLSTM *_lstm{nullptr};
+  loco::Graph::NodeContext *_nctx{nullptr};
+  std::string _name;
+  uint32_t _batch{0};
+  uint32_t _timesteps{0};
+  uint32_t _units{0}; // output space dim
+};
+
+luci::CircleConst *UnrollLSTM::transpose_perm(void)
+{
+  auto perm = _nctx->create<luci::CircleConst>();
+  perm->dtype(loco::DataType::S32);
+  perm->rank(1);
+  perm->dim(0) = 3;
+  perm->size<loco::DataType::S32>(3);
+  perm->at<loco::DataType::S32>(0) = 1;
+  perm->at<loco::DataType::S32>(1) = 0;
+  perm->at<loco::DataType::S32>(2) = 2;
+  perm->shape_status(luci::ShapeStatus::VALID);
+
+  return perm;
+}
+
+luci::CircleTranspose *UnrollLSTM::first_transpose(luci::CircleNode *input)
+{
+  assert(input != nullptr);
+
+  auto perm = transpose_perm();
+  perm->name(_name + "_perm1");
+  luci::add_origin(perm, luci::get_origin(_lstm));
+
+  auto transpose = _nctx->create<luci::CircleTranspose>();
+  transpose->a(input);
+  transpose->perm(perm);
+  transpose->name(_name + "_trans1");
+  luci::add_origin(transpose, luci::get_origin(_lstm));
+
+  return transpose;
+}
+
+std::vector<luci::CircleUnpackOut *> UnrollLSTM::input_unpacks(luci::CircleNode *input)
+{
+  assert(input != nullptr);
+
+  // NOTE unpack input can be LSTM or Transpose
+  auto unpack = _nctx->create<luci::CircleUnpack>();
+  unpack->num(_timesteps);
+  unpack->axis(0);
+  unpack->value(input);
+  unpack->name(_name + "_unpack");
+  luci::add_origin(unpack, luci::get_origin(_lstm));
+
+  std::vector<luci::CircleUnpackOut *> outs;
+  for (uint32_t idx = 0; idx < _timesteps; ++idx)
+  {
+    auto unpackout = _nctx->create<luci::CircleUnpackOut>();
+    unpackout->input(unpack);
+    unpackout->index(idx);
+    unpackout->name(_name + "_unpackout_" + std::to_string(idx));
+    luci::add_origin(unpackout, luci::get_origin(_lstm));
+    outs.push_back(unpackout);
+  }
+
+  return outs;
+}
+
+luci::CircleConst *UnrollLSTM::merged_weights(luci::CircleConst *iw, luci::CircleConst *fw,
+                                              luci::CircleConst *cw, luci::CircleConst *ow)
+{
+  assert(iw != nullptr);
+  assert(fw != nullptr);
+  assert(cw != nullptr);
+  assert(ow != nullptr);
+
+  auto iw_rank = iw->rank();
+  assert(iw_rank == fw->rank());
+  assert(iw_rank == cw->rank());
+  assert(iw_rank == ow->rank());
+
+  uint32_t ne_w = 1;
+  for (uint32_t i = 0; i < iw_rank; i++)
+    ne_w *= iw->dim(i).value();
+
+  assert(iw->dtype() == loco::DataType::FLOAT32);
+  assert(fw->dtype() == loco::DataType::FLOAT32);
+  assert(cw->dtype() == loco::DataType::FLOAT32);
+  assert(ow->dtype() == loco::DataType::FLOAT32);
+
+  // merged weights
+  auto mw = _nctx->create<luci::CircleConst>();
+  mw->dtype(iw->dtype());
+  mw->rank(iw_rank);
+  mw->dim(0) = 4u * iw->dim(0).value();
+  for (uint32_t i = 1; i < iw_rank; i++)
+    mw->dim(i) = iw->dim(i);
+  mw->size<loco::DataType::FLOAT32>(4 * ne_w);
+  mw->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t i = 0; i < ne_w; ++i)
+  {
+    mw->at<loco::DataType::FLOAT32>(i + ne_w * 0) = iw->at<loco::DataType::FLOAT32>(i);
+    mw->at<loco::DataType::FLOAT32>(i + ne_w * 1) = fw->at<loco::DataType::FLOAT32>(i);
+    mw->at<loco::DataType::FLOAT32>(i + ne_w * 2) = cw->at<loco::DataType::FLOAT32>(i);
+    mw->at<loco::DataType::FLOAT32>(i + ne_w * 3) = ow->at<loco::DataType::FLOAT32>(i);
+  }
+  return mw;
+}
+
+luci::CircleFullyConnected *UnrollLSTM::create_input_matmul(luci::CircleNode *input)
+{
+  assert(input != nullptr);
+
+  // weights
+  auto iw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_input_weights());
+  auto fw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_forget_weights());
+  auto cw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_cell_weights());
+  auto ow = loco::must_cast<luci::CircleConst *>(_lstm->input_to_output_weights());
+
+  auto fcw = merged_weights(iw, fw, cw, ow);
+  fcw->name(_name + "_fc_w");
+  luci::add_origin(fcw, luci::get_origin(_lstm));
+
+  // bias
+  auto ib = loco::must_cast<luci::CircleConst *>(_lstm->input_gate_bias());
+  auto fb = loco::must_cast<luci::CircleConst *>(_lstm->forget_gate_bias());
+  auto cb = loco::must_cast<luci::CircleConst *>(_lstm->cell_gate_bias());
+  auto ob = loco::must_cast<luci::CircleConst *>(_lstm->output_gate_bias());
+
+  auto fcb = merged_weights(ib, fb, cb, ob);
+  fcb->name(_name + "_fc_b");
+  luci::add_origin(fcb, luci::get_origin(_lstm));
+
+  auto fc = _nctx->create<luci::CircleFullyConnected>();
+  fc->input(input);
+  fc->weights(fcw);
+  fc->bias(fcb);
+  fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fc->name(_name + "_fc");
+  luci::add_origin(fc, luci::get_origin(_lstm));
+
+  return fc;
+}
+
+luci::CircleAdd *UnrollLSTM::create_input_matmul(luci::CircleNode *input, luci::CircleMul *mul,
+                                                 uint32_t step)
+{
+  assert(input != nullptr);
+  assert(mul != nullptr);
+  assert(step < _timesteps);
+
+  auto base_name = _name + "_matmul" + std::to_string(step);
+
+  // input weights
+  auto iw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_input_weights());
+  auto fw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_forget_weights());
+  auto cw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_cell_weights());
+  auto ow = loco::must_cast<luci::CircleConst *>(_lstm->input_to_output_weights());
+
+  auto fcw = merged_weights(iw, fw, cw, ow);
+  fcw->name(base_name + "_fc_w");
+  luci::add_origin(fcw, luci::get_origin(_lstm));
+
+  auto fcb = _nctx->create<luci::CircleOutputExclude>();
+
+  auto fc = _nctx->create<luci::CircleFullyConnected>();
+  fc->input(input);
+  fc->weights(fcw);
+  fc->bias(fcb);
+  fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fc->name(base_name + "_fc");
+  luci::add_origin(fc, luci::get_origin(_lstm));
+
+  // recurrent weights
+  auto ri = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_input_weights());
+  auto rf = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_forget_weights());
+  auto rc = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_cell_weights());
+  auto ro = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_output_weights());
+
+  auto fcrw = merged_weights(ri, rf, rc, ro);
+  fcrw->name(base_name + "_fcr_w");
+  luci::add_origin(fcrw, luci::get_origin(_lstm));
+
+  auto fcrb = _nctx->create<luci::CircleOutputExclude>();
+
+  auto fcr = _nctx->create<luci::CircleFullyConnected>();
+  fcr->input(mul);
+  fcr->weights(fcrw);
+  fcr->bias(fcrb);
+  fcr->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fcr->name(base_name + "_fcr");
+  luci::add_origin(fcr, luci::get_origin(_lstm));
+
+  auto add_fc = _nctx->create<luci::CircleAdd>();
+  add_fc->x(fcr);
+  add_fc->y(fc);
+  add_fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+  add_fc->name(base_name + "_addfc");
+  luci::add_origin(add_fc, luci::get_origin(_lstm));
+
+  // bias
+  auto ib = loco::must_cast<luci::CircleConst *>(_lstm->input_gate_bias());
+  auto fb = loco::must_cast<luci::CircleConst *>(_lstm->forget_gate_bias());
+  auto cb = loco::must_cast<luci::CircleConst *>(_lstm->cell_gate_bias());
+  auto ob = loco::must_cast<luci::CircleConst *>(_lstm->output_gate_bias());
+
+  auto bias = merged_weights(ib, fb, cb, ob);
+  bias->name(base_name + "_bias");
+
+  auto add_bias = _nctx->create<luci::CircleAdd>();
+  add_bias->x(add_fc);
+  add_bias->y(bias);
+  add_bias->fusedActivationFunction(luci::FusedActFunc::NONE);
+  add_bias->name(base_name + "_addbias");
+  luci::add_origin(add_bias, luci::get_origin(_lstm));
+
+  return add_bias;
+}
+
+std::vector<luci::CircleSplitOut *> UnrollLSTM::matmul_splits(luci::CircleNode *input,
+                                                              uint32_t step)
+{
+  assert(input != nullptr);
+  assert(step < _timesteps);
+
+  std::string split_name = _name + "_sp" + std::to_string(step);
+
+  auto split_dim = _nctx->create<luci::CircleConst>();
+  split_dim->dtype(loco::DataType::S32);
+  split_dim->rank(1);
+  split_dim->dim(0) = 1;
+  split_dim->size<loco::DataType::S32>(1);
+  split_dim->at<loco::DataType::S32>(0) = 1;
+  split_dim->shape_status(luci::ShapeStatus::VALID);
+  split_dim->name(split_name + "_dim");
+  luci::add_origin(split_dim, luci::get_origin(_lstm));
+
+  auto split = _nctx->create<luci::CircleSplit>();
+  split->num_split(4);
+  split->split_dim(split_dim);
+  split->input(input);
+  split->name(split_name);
+  luci::add_origin(split, luci::get_origin(_lstm));
+
+  auto split_o0 = _nctx->create<luci::CircleSplitOut>();
+  split_o0->input(split);
+  split_o0->index(0);
+  split_o0->name(split_name + "_spo0");
+  luci::add_origin(split_o0, luci::get_origin(_lstm));
+
+  auto split_o1 = _nctx->create<luci::CircleSplitOut>();
+  split_o1->input(split);
+  split_o1->index(1);
+  split_o1->name(split_name + "_spo1");
+  luci::add_origin(split_o1, luci::get_origin(_lstm));
+
+  auto split_o2 = _nctx->create<luci::CircleSplitOut>();
+  split_o2->input(split);
+  split_o2->index(2);
+  split_o2->name(split_name + "_spo2");
+  luci::add_origin(split_o2, luci::get_origin(_lstm));
+
+  auto split_o3 = _nctx->create<luci::CircleSplitOut>();
+  split_o3->input(split);
+  split_o3->index(3);
+  split_o3->name(split_name + "_spo3");
+  luci::add_origin(split_o3, luci::get_origin(_lstm));
+
+  std::vector<luci::CircleSplitOut *> outs;
+  outs.push_back(split_o0);
+  outs.push_back(split_o1);
+  outs.push_back(split_o2);
+  outs.push_back(split_o3);
+  return outs;
+}
+
+luci::CircleConst *UnrollLSTM::forget_zero(void)
+{
+  uint32_t amount = _batch * _units;
+
+  auto zero = _nctx->create<luci::CircleConst>();
+  zero->dtype(loco::DataType::FLOAT32);
+  zero->rank(2);
+  zero->dim(0) = _batch;
+  zero->dim(1) = _units;
+  zero->size<loco::DataType::FLOAT32>(amount);
+  for (uint32_t idx = 0; idx < amount; ++idx)
+    zero->at<loco::DataType::FLOAT32>(idx) = 0.0;
+  zero->shape_status(luci::ShapeStatus::VALID);
+  zero->name(_name + "_zero");
+  luci::add_origin(zero, luci::get_origin(_lstm));
+  return zero;
+}
+
+luci::CircleMul *UnrollLSTM::forget_gate_cell(std::vector<luci::CircleSplitOut *> &splits,
+                                              luci::CircleNode *prev, uint32_t step,
+                                              luci::CircleNode **retadd)
+{
+  assert(splits.size() > 0);
+  assert(prev != nullptr);
+  assert(step < _timesteps);
+
+  std::string net_name = _name + "_net" + std::to_string(step);
+
+  auto split_0 = splits[0]; // input-input  : Logistic - Mul(c) - Add - Tanh - Mul
+  auto split_1 = splits[1]; // input-forget : Logistic - Mul(p) - Add - Tanh - Mul
+  auto split_2 = splits[2]; // input-cell   : Tanh - Mul(c) - Add - Tanh - Mul
+  auto split_3 = splits[3]; // input-output : Logistic - Mul
+
+  auto logis_0 = _nctx->create<luci::CircleLogistic>();
+  logis_0->x(split_0);
+  logis_0->name(net_name + "_log0");
+  luci::add_origin(logis_0, luci::get_origin(_lstm));
+
+  auto logis_1 = _nctx->create<luci::CircleLogistic>();
+  logis_1->x(split_1);
+  logis_1->name(net_name + "_log1");
+  luci::add_origin(logis_1, luci::get_origin(_lstm));
+
+  auto tanh_2 = _nctx->create<luci::CircleTanh>();
+  tanh_2->x(split_2);
+  tanh_2->name(net_name + "_tanh2");
+  luci::add_origin(tanh_2, luci::get_origin(_lstm));
+
+  auto logis_3 = _nctx->create<luci::CircleLogistic>();
+  logis_3->x(split_3);
+  logis_3->name(net_name + "_log3");
+  luci::add_origin(logis_3, luci::get_origin(_lstm));
+
+  auto mul_c = _nctx->create<luci::CircleMul>();
+  mul_c->x(logis_0);
+  mul_c->y(tanh_2);
+  mul_c->fusedActivationFunction(luci::FusedActFunc::NONE);
+  mul_c->name(net_name + "_mul1");
+  luci::add_origin(mul_c, luci::get_origin(_lstm));
+
+  auto mul_p = _nctx->create<luci::CircleMul>();
+  mul_p->x(logis_1);
+  mul_p->y(prev);
+  mul_p->fusedActivationFunction(luci::FusedActFunc::NONE);
+  mul_p->name(net_name + "_mul2");
+  luci::add_origin(mul_p, luci::get_origin(_lstm));
+
+  auto add_cp = _nctx->create<luci::CircleAdd>();
+  add_cp->x(mul_c);
+  add_cp->y(mul_p);
+  add_cp->fusedActivationFunction(luci::FusedActFunc::NONE);
+  add_cp->name(net_name + "_add1");
+  luci::add_origin(add_cp, luci::get_origin(_lstm));
+
+  if (retadd != nullptr)
+    *retadd = add_cp;
+
+  auto tanh_cp = _nctx->create<luci::CircleTanh>();
+  tanh_cp->x(add_cp);
+  tanh_cp->name(net_name + "_tanh3");
+  luci::add_origin(tanh_cp, luci::get_origin(_lstm));
+
+  auto mul_out = _nctx->create<luci::CircleMul>();
+  mul_out->x(logis_3);
+  mul_out->y(tanh_cp);
+  mul_out->fusedActivationFunction(luci::FusedActFunc::NONE);
+  mul_out->name(net_name + "_mul3");
+  luci::add_origin(mul_out, luci::get_origin(_lstm));
+
+  return mul_out;
+}
+
+luci::CircleReshape *UnrollLSTM::last_reshape(luci::CircleNode *input)
+{
+  assert(input != nullptr);
+
+  auto reshape_s = _nctx->create<luci::CircleConst>();
+  reshape_s->dtype(loco::DataType::S32);
+  reshape_s->rank(1);
+  reshape_s->dim(0) = 3;
+  reshape_s->size<loco::DataType::S32>(3);
+  reshape_s->at<loco::DataType::S32>(0) = _batch;
+  reshape_s->at<loco::DataType::S32>(1) = _timesteps;
+  reshape_s->at<loco::DataType::S32>(2) = _units;
+  reshape_s->shape_status(luci::ShapeStatus::VALID);
+  reshape_s->name(_name + "_reshape_s");
+  luci::add_origin(reshape_s, luci::get_origin(_lstm));
+
+  auto reshape = _nctx->create<luci::CircleReshape>();
+  reshape->tensor(input);
+  reshape->shape(reshape_s);
+  reshape->newShape()->rank(3);
+  reshape->newShape()->dim(0) = _batch;
+  reshape->newShape()->dim(1) = _timesteps;
+  reshape->newShape()->dim(2) = _units;
+  reshape->name(_name + "_reshape");
+  luci::add_origin(reshape, luci::get_origin(_lstm));
+
+  return reshape;
+}
+
+luci::CircleTranspose *UnrollLSTM::last_transpose(std::vector<luci::CircleMul *> &output_muls)
+{
+  assert(output_muls.size() == _timesteps);
+
+  auto pack = _nctx->create<luci::CirclePack>(_timesteps);
+  pack->axis(0);
+  for (uint32_t idx = 0; idx < _timesteps; ++idx)
+    pack->values(idx, output_muls[idx]);
+  pack->name(_name + "_pack");
+  luci::add_origin(pack, luci::get_origin(_lstm));
+
+  auto perm = transpose_perm();
+  perm->name(_name + "_perm2");
+  luci::add_origin(perm, luci::get_origin(_lstm));
+
+  auto transpose = _nctx->create<luci::CircleTranspose>();
+  transpose->a(pack);
+  transpose->perm(perm);
+  transpose->name(_name + "_trans2");
+  luci::add_origin(transpose, luci::get_origin(_lstm));
+
+  return transpose;
+}
+
+bool unroll_lstm(luci::CircleUnidirectionalSequenceLSTM *lstm)
+{
+  // NOTE shape of input of lstm is interpreted as [batch, timesteps, feature]
+  //      shape of output of lstm is interpreted as [batch, timesteps, units]
+  // TODO add more conditions to check LSTM
+  assert(lstm != nullptr);
+  assert(lstm->rank() == 3); // use assert to findout when this happens
+  if (lstm->rank() != 3)
+    return false;
+  if (!(lstm->dim(0).known() and lstm->dim(1).known() and lstm->dim(2).known()))
+    return false;
+
+  UnrollLSTM ulstm;
+  ulstm._lstm = lstm;
+  ulstm._nctx = lstm->graph()->nodes();
+  ulstm._name = lstm->name();
+  ulstm._batch = lstm->dim(0).value();
+  ulstm._timesteps = lstm->dim(1).value();
+  ulstm._units = lstm->dim(2).value(); // output space dim
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(lstm->input());
+  assert(input->rank() == 3); // use assert to findout when this happens
+  if (input->rank() != 3)
+    return false;
+  assert(input->dim(0).value() == ulstm._batch);
+  assert(input->dim(1).value() == ulstm._timesteps);
+
+  if (ulstm._timesteps > 1)
+  {
+    // Transpose to switch batch <-> timesteps
+    // NOTE TF uses Reshape when batch is 1 but as there is Transpose->Reshape
+    //      Pass, we can just use Transpose for both cases
+    auto transpose = ulstm.first_transpose(input);
+    input = transpose;
+  }
+
+  auto unpacks = ulstm.input_unpacks(input);
+  assert(unpacks.size() == ulstm._timesteps);
+  uint32_t step = 0;
+  auto unpackout = unpacks[step];
+
+  // First FC
+  auto fc_1 = ulstm.create_input_matmul(unpackout);
+  assert(fc_1 != nullptr);
+  auto splits = ulstm.matmul_splits(fc_1, step);
+  assert(splits.size() == 4);
+
+  luci::CircleNode *prev = nullptr; // prev step CircleAdd
+  luci::CircleNode *this_add = nullptr;
+
+  prev = ulstm.forget_zero(); // provide all zero constant for first step
+
+  std::vector<luci::CircleMul *> output_muls;
+  auto mul_gc = ulstm.forget_gate_cell(splits, prev, step, &this_add);
+  assert(mul_gc != nullptr);
+  assert(this_add != nullptr);
+  // gather all Muls for last Pack
+  output_muls.push_back(mul_gc);
+
+  for (step = 1; step < ulstm._timesteps; ++step)
+  {
+    auto unpackout = unpacks[step];
+    auto add_n = ulstm.create_input_matmul(unpackout, mul_gc, step);
+
+    auto splits = ulstm.matmul_splits(add_n, step);
+    assert(splits.size() == 4);
+
+    prev = this_add;
+    mul_gc = ulstm.forget_gate_cell(splits, prev, step, &this_add);
+    assert(mul_gc != nullptr);
+    assert(this_add != nullptr);
+
+    output_muls.push_back(mul_gc);
+  }
+  assert(output_muls.size() == ulstm._timesteps);
+
+  if (ulstm._timesteps == 1)
+  {
+    // Reshape for single step
+    auto reshape = ulstm.last_reshape(mul_gc);
+    loco::replace(lstm).with(reshape);
+  }
+  else
+  {
+    // Pack + Transpose for two or more steps
+    auto transpose = ulstm.last_transpose(output_muls);
+    loco::replace(lstm).with(transpose);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool UnrollUnidirectionalSequenceLSTMPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto lstm = dynamic_cast<luci::CircleUnidirectionalSequenceLSTM *>(node))
+    {
+      if (unroll_lstm(lstm))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.test.cpp b/compiler/luci/pass/src/UnrollUnidirectionalSequenceLSTMPass.test.cpp
new file mode 100644 (file)
index 0000000..3f273cb
--- /dev/null
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class UniSeqLSTMGraphlet
+{
+public:
+  UniSeqLSTMGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 oshape)
+  {
+    _uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+    _uslstm->dtype(loco::DataType::FLOAT32);
+    _uslstm->shape(oshape);
+    _uslstm->name("uslstm");
+
+    _uslstm->fusedActivationFunction(luci::FusedActFunc::TANH);
+    _uslstm->cell_clip(0.0);
+    _uslstm->proj_clip(0.0);
+    _uslstm->time_major(false);
+    _uslstm->asymmetric_quantize_inputs(false);
+
+    _iw = weight_1x1(g);
+    _rw = weight_1x1(g);
+    _gb = weight_1(g);
+    _ex = g->nodes()->create<luci::CircleOutputExclude>();
+  }
+
+protected:
+  luci::CircleConst *weight_1x1(loco::Graph *g)
+  {
+    auto w = g->nodes()->create<luci::CircleConst>();
+    w->dtype(loco::DataType::FLOAT32);
+    w->rank(2);
+    w->dim(0) = 1;
+    w->dim(1) = 1;
+    w->size<loco::DataType::FLOAT32>(1);
+    w->at<loco::DataType::FLOAT32>(0) = 1.0;
+    w->shape_status(luci::ShapeStatus::VALID);
+    return w;
+  }
+
+  luci::CircleConst *weight_1(loco::Graph *g)
+  {
+    auto w = g->nodes()->create<luci::CircleConst>();
+    w->dtype(loco::DataType::FLOAT32);
+    w->rank(1);
+    w->dim(0) = 1;
+    w->size<loco::DataType::FLOAT32>(1);
+    w->at<loco::DataType::FLOAT32>(0) = 1.0;
+    w->shape_status(luci::ShapeStatus::VALID);
+    return w;
+  }
+
+protected:
+  luci::CircleUnidirectionalSequenceLSTM *_uslstm = nullptr;
+  luci::CircleConst *_iw = nullptr;
+  luci::CircleConst *_rw = nullptr;
+  luci::CircleConst *_gb = nullptr;
+  luci::CircleOutputExclude *_ex = nullptr;
+};
+
+class UnrollUniSeqLSTMPassTestGraph : public TestIOGraph, public UniSeqLSTMGraphlet
+{
+public:
+  UnrollUniSeqLSTMPassTestGraph() = default;
+
+  void init(const ShapeU32 ishape, const ShapeU32 oshape)
+  {
+    TestIOGraph::init(ishape, oshape);
+    UniSeqLSTMGraphlet::init(g(), oshape);
+
+    auto inode = input();
+    _uslstm->input(inode);
+
+    _uslstm->input_to_input_weights(_iw);
+    _uslstm->input_to_forget_weights(_iw);
+    _uslstm->input_to_cell_weights(_iw);
+    _uslstm->input_to_output_weights(_iw);
+
+    _uslstm->recurrent_to_input_weights(_rw);
+    _uslstm->recurrent_to_forget_weights(_rw);
+    _uslstm->recurrent_to_cell_weights(_rw);
+    _uslstm->recurrent_to_output_weights(_rw);
+
+    _uslstm->cell_to_input_weights(_ex);
+    _uslstm->cell_to_forget_weights(_ex);
+    _uslstm->cell_to_output_weights(_ex);
+
+    _uslstm->input_gate_bias(_gb);
+    _uslstm->forget_gate_bias(_gb);
+    _uslstm->cell_gate_bias(_gb);
+    _uslstm->output_gate_bias(_gb);
+
+    _uslstm->projection_weights(_ex);
+    _uslstm->projection_bias(_ex);
+
+    _uslstm->output_state(_ex);
+    _uslstm->cell_state(_ex);
+
+    _uslstm->input_layer_norm_coefficients(_ex);
+    _uslstm->forget_layer_norm_coefficients(_ex);
+    _uslstm->cell_layer_norm_coefficients(_ex);
+    _uslstm->output_layer_norm_coefficients(_ex);
+
+    output()->from(_uslstm);
+  }
+};
+
+} // namespace
+
+namespace
+{
+
+using namespace luci::test;
+
+// FakeQuantGraphlet is for simple negative test
+class FakeQuantGraphlet
+{
+public:
+  FakeQuantGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    _fq = g->nodes()->create<luci::CircleFakeQuant>();
+    _fq->name("fq");
+  }
+
+protected:
+  luci::CircleFakeQuant *_fq = nullptr;
+};
+
+class FakeQuantGraph : public TestIOGraph, public FakeQuantGraphlet
+{
+public:
+  FakeQuantGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1, 1, 1}, {1, 1, 1});
+    FakeQuantGraphlet::init(g());
+
+    _fq->inputs(input());
+
+    output()->from(_fq);
+  }
+};
+
+} // namespace
+
+TEST(UnrollUnidirectionalSequenceLSTMPassTestName, name)
+{
+  luci::UnrollUnidirectionalSequenceLSTMPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+class UnrollUnidirectionalSequenceLSTMPassTest : public ::testing::Test
+{
+public:
+  UnrollUniSeqLSTMPassTestGraph g;
+  luci::UnrollUnidirectionalSequenceLSTMPass pass;
+};
+
+TEST_F(UnrollUnidirectionalSequenceLSTMPassTest, simple_run)
+{
+  g.init({1, 1, 1}, {1, 1, 1});
+
+  EXPECT_TRUE(pass.run(g.g()));
+}
+
+class UnrollUnidirectionalSequenceLSTMPassTestN : public ::testing::Test
+{
+public:
+  FakeQuantGraph g;
+  luci::UnrollUnidirectionalSequenceLSTMPass pass;
+};
+
+TEST_F(UnrollUnidirectionalSequenceLSTMPassTestN, simple_run_NEG)
+{
+  g.init();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
index 408e6b8..6bf7ff6 100644 (file)
@@ -133,6 +133,10 @@ private:
 
   bool visit(const luci::CircleAdd *node)
   {
+    // Skip granularity check for indices
+    if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+      return true;
+
     RETURN_FALSE_UNLESS(is_lwq(node));
     RETURN_FALSE_UNLESS(is_lwq(node->x()));
     RETURN_FALSE_UNLESS(is_lwq(node->y()));
@@ -176,6 +180,10 @@ private:
 
   bool visit(const luci::CircleMul *node)
   {
+    // Skip granularity check for indices
+    if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+      return true;
+
     RETURN_FALSE_UNLESS(is_lwq(node));
     RETURN_FALSE_UNLESS(is_lwq(node->x()));
     RETURN_FALSE_UNLESS(is_lwq(node->y()));
index cf86aca..3ce3255 100644 (file)
@@ -47,6 +47,10 @@ namespace luci
 template <loco::DataType Qtype, loco::DataType Btype>
 bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAdd *node)
 {
+  // Allow add of indices
+  if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+    return true;
+
   return group_has_type(node, Qtype);
 }
 
@@ -240,6 +244,10 @@ bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMirrorPa
 template <loco::DataType Qtype, loco::DataType Btype>
 bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMul *node)
 {
+  // Allow mul of indices
+  if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+    return true;
+
   return group_has_type(node, Qtype);
 }
 
index b80f085..10113e8 100644 (file)
@@ -57,6 +57,12 @@ public:
    */
   template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
 
+  /**
+   * @note Similar as with_commutative_args_of but not commutative.
+   *       _arg_1 and _arg_2 must match that of ARG_TYPE_1 and ARG_TYPE_2.
+   */
+  template <class COMM_NODE> bool with_args_of(const COMM_NODE *node);
+
 private:
   ARG_TYPE_1 **_arg_1;
   ARG_TYPE_2 **_arg_2;
@@ -101,4 +107,24 @@ bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NOD
   return false;
 }
 
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+template <class COMM_NODE>
+bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_args_of(const COMM_NODE *node)
+{
+  // X == ARG_TYPE_1 / Y == ARG_TYPE_2
+  {
+    auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
+    auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
+
+    if (x && y)
+    {
+      *_arg_1 = x;
+      *_arg_2 = y;
+      return true;
+    }
+  }
+
+  return false;
+}
+
 } // namespace luci
index fcd9bbc..e014304 100644 (file)
@@ -18,6 +18,7 @@
 #ifndef __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
 #define __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
 
+#include <cstddef>
 #include <cstdint>
 #include <vector>
 
index d020f6d..2628726 100644 (file)
@@ -77,6 +77,15 @@ loco::DataType str_to_dtype(const std::string &str)
   return loco::DataType::Unknown;
 }
 
+// Convert string to a vector of loco::DataType
+std::vector<loco::DataType> str_vec_to_dtype_vec(std::vector<std::string> &vec)
+{
+  std::vector<loco::DataType> res;
+  std::transform(vec.begin(), vec.end(), std::back_inserter(res),
+                 [](std::string s) -> loco::DataType { return str_to_dtype(to_lower_case(s)); });
+  return res;
+}
+
 QuantizationGranularity str_to_granularity(const std::string &str)
 {
   if (to_lower_case(str).compare("layer") == 0)
index 0e78185..485f379 100644 (file)
@@ -36,6 +36,8 @@ std::string to_lower_case(std::string);
 
 loco::DataType str_to_dtype(const std::string &);
 
+std::vector<loco::DataType> str_vec_to_dtype_vec(std::vector<std::string> &);
+
 QuantizationGranularity str_to_granularity(const std::string &);
 
 } // namespace luci
index d77b650..6d854ad 100644 (file)
@@ -48,3 +48,26 @@ TEST(StringsTest, str_to_granularity)
 
   EXPECT_THROW(luci::str_to_granularity("foo"), std::runtime_error);
 }
+
+TEST(StringsTest, str_vec_to_dtype_vec)
+{
+  std::vector<std::string> input1 = {"uint8", "int16", "float32"};
+  auto result1 = luci::str_vec_to_dtype_vec(input1);
+  ASSERT_EQ(3, result1.size());
+  ASSERT_EQ(loco::DataType::U8, result1[0]);
+  ASSERT_EQ(loco::DataType::S16, result1[1]);
+  ASSERT_EQ(loco::DataType::FLOAT32, result1[2]);
+
+  std::vector<std::string> input2 = {"uint8", "int16", "float32", ""};
+  auto result2 = luci::str_vec_to_dtype_vec(input2);
+  ASSERT_EQ(4, result2.size());
+  ASSERT_EQ(loco::DataType::U8, result2[0]);
+  ASSERT_EQ(loco::DataType::S16, result2[1]);
+  ASSERT_EQ(loco::DataType::FLOAT32, result2[2]);
+  ASSERT_EQ(loco::DataType::Unknown, result2[3]);
+
+  std::vector<std::string> input3 = {"uint8"};
+  auto result3 = luci::str_vec_to_dtype_vec(input3);
+  ASSERT_EQ(1, result3.size());
+  ASSERT_EQ(loco::DataType::U8, result3[0]);
+}
diff --git a/compiler/luci/pass/src/test/TestIOGraph.h b/compiler/luci/pass/src/test/TestIOGraph.h
deleted file mode 100644 (file)
index b1fc41f..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_PASS_TEST_IO_GRAPH_H__
-#define __LUCI_PASS_TEST_IO_GRAPH_H__
-
-#include "TestShape.h"
-
-#include <luci/IR/CircleNodes.h>
-
-namespace luci
-{
-namespace test
-{
-
-/**
- * @brief Graphlet with Inputs and loco::Graph for multiple inputs
- * @note  Every Graph will have Input(s) and Output(s)
- *        We put loco::Graph only in IsGraphlet not to declare separate
- *        class for loco::Graph
- */
-template <unsigned N> class TestIsGraphlet
-{
-public:
-  TestIsGraphlet()
-  {
-    for (uint32_t n = 0; n < N; ++n)
-    {
-      _graph_inputs[n] = nullptr;
-      _inputs[n] = nullptr;
-    }
-  }
-
-public:
-  virtual void init(loco::Graph *g, const ShapeU32 shape_in)
-  {
-    for (uint32_t n = 0; n < N; ++n)
-    {
-      _graph_inputs[n] = g->inputs()->create();
-
-      _inputs[n] = g->nodes()->create<luci::CircleInput>();
-      _inputs[n]->shape(shape_in);
-      _inputs[n]->shape_status(luci::ShapeStatus::VALID);
-      _inputs[n]->dtype(loco::DataType::FLOAT32);
-      _inputs[n]->name("input_" + std::to_string(n));
-
-      _inputs[n]->index(_graph_inputs[n]->index());
-
-      auto input_shape = std::make_unique<loco::TensorShape>();
-      set_shape_vector(input_shape.get(), shape_in);
-      _graph_inputs[n]->shape(std::move(input_shape));
-      _graph_inputs[n]->dtype(loco::DataType::FLOAT32);
-    }
-  }
-
-public:
-  loco::Graph *g(void) { return &_g; }
-  luci::CircleInput *input(int idx) { return _inputs[idx]; }
-
-protected:
-  loco::Graph _g;
-  std::array<loco::GraphInput *, N> _graph_inputs;
-  std::array<luci::CircleInput *, N> _inputs;
-};
-
-/**
- * @brief Graphlet with one Input
- */
-class TestIGraphlet : public TestIsGraphlet<1>
-{
-public:
-  luci::CircleInput *input() { return _inputs[0]; }
-};
-
-/**
- * @brief Graphlet with Outputs for multiple outputs
- */
-template <unsigned N> class TestOsGraphlet
-{
-public:
-  TestOsGraphlet()
-  {
-    for (uint32_t n = 0; n < N; ++n)
-    {
-      _graph_outputs[n] = nullptr;
-      _outputs[n] = nullptr;
-    }
-  }
-
-public:
-  virtual void init(loco::Graph *g, const ShapeU32 shape_out)
-  {
-    for (uint32_t n = 0; n < N; ++n)
-    {
-      _graph_outputs[n] = g->outputs()->create();
-
-      _outputs[n] = g->nodes()->create<luci::CircleOutput>();
-      _outputs[n]->shape(shape_out);
-      _outputs[n]->shape_status(luci::ShapeStatus::VALID);
-      _outputs[n]->dtype(loco::DataType::FLOAT32);
-      _outputs[n]->name("output_" + std::to_string(n));
-
-      _outputs[n]->index(_graph_outputs[n]->index());
-
-      auto output_shape = std::make_unique<loco::TensorShape>();
-      set_shape_vector(output_shape.get(), shape_out);
-      _graph_outputs[n]->shape(std::move(output_shape));
-      _graph_outputs[n]->dtype(loco::DataType::FLOAT32);
-    }
-  }
-
-public:
-  luci::CircleOutput *output(int idx) { return _outputs[idx]; }
-
-protected:
-  std::array<loco::GraphOutput *, N> _graph_outputs;
-  std::array<luci::CircleOutput *, N> _outputs;
-};
-
-/**
- * @brief Graphlet with one Output
- */
-class TestOGraphlet : public TestOsGraphlet<1>
-{
-public:
-  luci::CircleOutput *output() { return _outputs[0]; }
-};
-
-/**
- * @brief Graph with Input and Output
- */
-class TestIOGraph : public TestIGraphlet, public TestOGraphlet
-{
-public:
-  TestIOGraph() = default;
-
-public:
-  virtual void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
-  {
-    TestIsGraphlet<1>::init(g(), shape_in);
-    TestOsGraphlet<1>::init(g(), shape_out);
-  }
-};
-
-} // namespace test
-} // namespace luci
-
-#endif // __LUCI_PASS_TEST_IO_GRAPH_H__
diff --git a/compiler/luci/pass/src/test/TestIOGraph.test.cpp b/compiler/luci/pass/src/test/TestIOGraph.test.cpp
deleted file mode 100644 (file)
index e58a13f..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TestIOGraph.h"
-
-// This file validates "TestIOGraph.h". Pleaes DO NOT remove this file.
index 0a5e6a5..f3546b0 100644 (file)
@@ -5,7 +5,7 @@ require("locop")
 require("logo")
 require("logo-core")
 require("mio-circle04")
-require("mio-tflite280")
+require("luci-compute")
 require("oops")
 require("hermes")
 require("hermes-std")
index a368fae..9d4dbab 100644 (file)
@@ -682,30 +682,6 @@ loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
   auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
   auto weights_shape = luci::shape_get(node->weights()).as<loco::TensorShape>();
 
-// TODO Remove following unused code
-#if 0
-  // Checking shape capability for fully connected layer
-  // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
-  // Weight: [# of units, K]
-  // Output: [D1 * D2 * ... * Dn / K, # of units]
-  if (input_shape.rank() < 2 || weights_shape.rank() != 2)
-  {
-    // Return node own shape if shape inference is not possible
-    return use_own(node);
-  }
-
-  uint32_t input_size = 1;
-  for (uint32_t i = 0; i < input_shape.rank(); i++)
-  {
-    input_size = input_size * input_shape.dim(i).value();
-  }
-  const uint32_t batch_size = input_size / weights_shape.dim(1).value();
-  loco::TensorShape out_shape;
-  out_shape.rank(2);
-  out_shape.dim(0) = batch_size;
-  out_shape.dim(1) = weights_shape.dim(0);
-#endif
-
   loco::TensorShape out_shape;
 
   // NOTE Some recipes in some repositories are using rank 4 input for FullyConnected.
index 7616390..44c9330 100644 (file)
@@ -573,7 +573,14 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 
   loco::DataType visit(const luci::CircleOutputDummy *node) final { return node->dtype(); }
 
-  loco::DataType visit(const luci::CircleOutputExclude *node) final { return node->dtype(); }
+  loco::DataType visit(const luci::CircleOutputExclude *node) final
+  {
+    // NOTE We don't care CircleOutputExclude dtype, but set to FLOAT32
+    //      if it's Unknown to make type inference happy.
+    if (node->dtype() == loco::DataType::Unknown)
+      return loco::DataType::FLOAT32;
+    return node->dtype();
+  }
 
   loco::DataType visit(const luci::CircleCustomOut *node) final { return node->dtype(); }
 
index 277107b..7c37060 100644 (file)
@@ -31,6 +31,7 @@ luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleFullyConnected
   {
     cloned->fusedActivationFunction(node->fusedActivationFunction());
     cloned->weights_format(node->weights_format());
+    cloned->keep_num_dims(node->keep_num_dims());
   }
   return cloned;
 }
index fbd86df..2f78e05 100644 (file)
@@ -148,6 +148,78 @@ bool validate_shape_dtype(loco::Graph *g)
   return true;
 }
 
+class MultiOutNodeValidate final : public luci::CircleNodeVisitor<bool>
+{
+public:
+  MultiOutNodeValidate() {}
+
+private:
+  template <class T> bool check(const luci::CircleNode *node)
+  {
+    auto succs = loco::succs(node);
+    if (succs.size() < 1)
+      return false;
+    for (const auto &cnode : succs)
+    {
+      auto const child = dynamic_cast<const T *>(cnode);
+      if (child == nullptr)
+        return false;
+    }
+    return true;
+  }
+
+public:
+  bool visit(const luci::CircleBidirectionalSequenceLSTM *node) final
+  {
+    return check<luci::CircleBidirectionalSequenceLSTMOut>(node);
+  }
+  bool visit(const luci::CircleCustom *node) final { return check<luci::CircleCustomOut>(node); }
+  bool visit(const luci::CircleIf *node) final { return check<luci::CircleIfOut>(node); }
+  bool visit(const luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    return check<luci::CircleNonMaxSuppressionV4Out>(node);
+  }
+  bool visit(const luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    return check<luci::CircleNonMaxSuppressionV5Out>(node);
+  }
+  bool visit(const luci::CircleSplit *node) final { return check<luci::CircleSplitOut>(node); }
+  bool visit(const luci::CircleSplitV *node) final { return check<luci::CircleSplitVOut>(node); }
+  bool visit(const luci::CircleTopKV2 *node) final { return check<luci::CircleTopKV2Out>(node); }
+  bool visit(const luci::CircleUnique *node) final { return check<luci::CircleUniqueOut>(node); }
+  bool visit(const luci::CircleUnpack *node) final { return check<luci::CircleUnpackOut>(node); }
+  bool visit(const luci::CircleWhile *node) final { return check<luci::CircleWhileOut>(node); }
+
+  // default true for other nodes
+  bool visit(const luci::CircleNode *) final { return true; }
+};
+
+/**
+ * @brief Validate sequence of multi-output nodes are followed for specific
+ *        IRs such as CircleIfOut.
+ */
+bool validate_multi_outs(loco::Graph *g)
+{
+  LOGGER(l);
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto const cnode = loco::must_cast<luci::CircleNode *>(node);
+
+    MultiOutNodeValidate d;
+    if (cnode->accept(&d))
+      continue;
+
+    auto const name = cnode->name();
+    INFO(l) << "Node: " << name << ", " << (uint32_t)(cnode->opcode()) << " has invalid successor."
+            << std::endl;
+
+    return false;
+  }
+
+  return true;
+}
+
 class VirtualNodeDetector final : public luci::CircleNodeVisitor<bool>
 {
 public:
@@ -185,6 +257,9 @@ bool validate(loco::Graph *g)
   if (!validate_shape_dtype(g))
     return false;
 
+  if (!validate_multi_outs(g))
+    return false;
+
   // TODO add more validation
 
   return true;
index 09a25ff..8291f20 100644 (file)
@@ -193,6 +193,7 @@ addread(Transpose_000)
 addread(TransposeConv_000)
 addread(UnidirectionalSequenceLSTM_000)
 addread(UnidirectionalSequenceLSTM_001)
+addread(UnidirectionalSequenceLSTM_002)
 addread(Unique_000)
 addread(Unique_001)
 addread(Unique_002)
@@ -419,6 +420,7 @@ addwrite(Transpose_000)
 addwrite(TransposeConv_000)
 addwrite(UnidirectionalSequenceLSTM_000)
 addwrite(UnidirectionalSequenceLSTM_001)
+addwrite(UnidirectionalSequenceLSTM_002)
 addwrite(Unique_000)
 addwrite(Unique_001)
 addwrite(Unique_002)
index f48711e..edf75f4 100644 (file)
@@ -38,16 +38,6 @@ target_link_libraries(mio_tflite280_example mio_tflite280)
 add_executable(mio_tflite280_validate example.cpp)
 target_link_libraries(mio_tflite280_validate mio_tflite280)
 
-nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
-
-if(NOT TensorFlowGEMMLowpSource_FOUND)
-  return()
-endif(NOT TensorFlowGEMMLowpSource_FOUND)
-
-add_library(mio_tflite280_inc INTERFACE)
-target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
-target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
-
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
index 47a3147..ce99cdb 100644 (file)
@@ -62,7 +62,7 @@ PassData ConstantFoldTranspose::run(PassData data)
   auto matches = matcher.matchEdge(is_constant, is_transpose);
   while (!matches.empty())
   {
-    for (const auto match : matches)
+    for (const auto &match : matches)
     {
       auto constant_op = dynamic_cast<ops::ConstantOp *>(match.first);
       auto transpose_op = dynamic_cast<ops::TransposeOp *>(match.second);
index 6a464f8..c508cca 100644 (file)
@@ -1,8 +1,9 @@
+#include <string>
 int main()
 {
   Shape s{1, 2, 3};
   Tensor in_t(s);
-  NNModel model("nnmodel.params");
+  NNModel model(std::string("nnmodel.params"));
   model.set_in(in_t);
   model.doInference();
   std::shared_ptr<Tensor> out_t = model.get_out();
index 90e989a..7772b53 100644 (file)
@@ -1,3 +1,23 @@
+# NOTE find_package try to use at least python3.8 as follows depending on platform version
+#   Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+#   Ubuntu20.04; default python3.8
+#   Ubuntu22.04; default python3.10
+#   refer https://github.com/Samsung/ONE/issues/9962
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
+
+if(NOT ${PYTHONINTERP_FOUND})
+  message(STATUS "Build one-cmds: FALSE (Python3 is missing)")
+  return()
+endif()
+
+if(${PYTHON_VERSION_MINOR} LESS 8)
+  message(STATUS "Build one-cmds: FALSE (You need to install Python version higher than 3.8)")
+  return()
+endif()
+
+# NOTE these files should not have extensions.
+#      below code will remove extension when copy and install.
 set(ONE_COMMAND_FILES
     one-build
     one-import
@@ -12,10 +32,18 @@ set(ONE_COMMAND_FILES
     one-profile
     one-infer
     one-codegen
-    one-prepare-venv
     onecc
 )
 
+# TODO find better way for per-platform files
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  # NOTE copy one-prepare-venv.u2204 as build/../one-prepare-venv
+  #      and install build/../one-prepare-venv file
+  list(APPEND ONE_COMMAND_FILES one-prepare-venv.u2204)
+else()
+  list(APPEND ONE_COMMAND_FILES one-prepare-venv)
+endif()
+
 # pytorch importer is an experimental feature, it is not used in default configuration
 if(ENABLE_ONE_IMPORT_PYTORCH)
   list(APPEND ONE_COMMAND_FILES one-import-pytorch)
@@ -25,7 +53,9 @@ foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES})
 
   set(ONE_COMMAND_FILE ${ONE_COMMAND})
   set(ONE_COMMAND_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_COMMAND_FILE}")
-  set(ONE_COMMAND_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_COMMAND_FILE}")
+  # strip extension from the name
+  get_filename_component(ONE_COMMNAD_FILE_NAME ${ONE_COMMAND} NAME_WE)
+  set(ONE_COMMAND_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_COMMNAD_FILE_NAME}")
   set(ONE_COMMAND_TARGET "${ONE_COMMAND}_target")
 
   add_custom_command(OUTPUT ${ONE_COMMAND_BIN}
@@ -36,7 +66,7 @@ foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES})
 
   add_custom_target(${ONE_COMMAND_TARGET} ALL DEPENDS ${ONE_COMMAND_BIN})
 
-  install(FILES ${ONE_COMMAND}
+  install(FILES ${ONE_COMMAND_BIN}
           PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
                       GROUP_READ GROUP_EXECUTE
                       WORLD_READ WORLD_EXECUTE
@@ -47,7 +77,6 @@ endforeach(ONE_COMMAND)
 set(ONE_UTILITY_FILES
     one-build.template.cfg
     onecc.template.cfg
-    utils.py
     onnx_legalizer.py
 )
 
@@ -74,13 +103,24 @@ foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES})
 
 endforeach(ONE_UTILITY)
 
+# one-pack internally uses model2nnpkg tool
+set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py")
+install(FILES ${MODEL2NNPKG}
+              PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                          GROUP_READ GROUP_EXECUTE
+                          WORLD_READ WORLD_EXECUTE
+              DESTINATION bin
+              RENAME "model2nnpkg")
+
 # make python directory
 set(ONE_PYTHON_FILES constant.py
+                     export_constant.py
                      make_cmd.py
                      CfgRunner.py
                      OptionBuilder.py
                      TopologicalSortHelper.py
-                     WorkflowRunner.py)
+                     WorkflowRunner.py
+                     utils.py)
 
 foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES})
 
@@ -111,6 +151,28 @@ foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES})
 
 endforeach(ONE_PYTHON_FILE)
 
+set(CONSTANT_EXPORTING_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_PYTHON_DIR}/export_constant.py")
+set(O1_OPTION "O1")
+set(O1_CFG_FILE "${O1_OPTION}.cfg")
+set(O1_CFG_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}/${O1_CFG_FILE}")
+
+add_custom_command(OUTPUT ${O1_CFG_FILE_BIN}
+  COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT}
+          --constant ${O1_OPTION}
+          --format cfg
+          --output_path ${O1_CFG_FILE_BIN}
+  DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+  COMMENT "Generate ${O1_CFG_FILE}"
+)
+
+add_custom_target("O1_cfg_target" ALL DEPENDS ${O1_CFG_FILE_BIN})
+
+install(FILES ${O1_CFG_FILE_BIN}
+        PERMISSIONS OWNER_WRITE OWNER_READ
+                    GROUP_READ
+                    WORLD_READ
+        DESTINATION optimization)
+
 set(ONE_DOCUMENT_FILES
     how-to-use-one-commands.txt
     how-to-prepare-virtualenv.txt
index 2552a02..912798e 100644 (file)
@@ -6,6 +6,7 @@ set(DUMMY_INFER_V2_SRC src/dummy-inferV2.cpp)
 set(HELP_INFER_SRC src/help-infer.cpp)
 set(DUMMY_PROFILE_SRC src/dummy-profile.cpp)
 set(HELP_PROFILE_SRC src/help-profile.cpp)
+set(DUMMY_ENV_SRC src/dummyEnv-compile.cpp)
 
 add_executable(dummy-compile ${DUMMY_DRIVER_SRC})
 add_executable(help-compile ${HELP_DRIVER_SRC})
@@ -14,6 +15,7 @@ add_executable(dummy-inferV2 ${DUMMY_INFER_V2_SRC})
 add_executable(help-infer ${HELP_INFER_SRC})
 add_executable(dummy-profile ${DUMMY_PROFILE_SRC})
 add_executable(help-profile ${HELP_PROFILE_SRC})
+add_executable(dummyEnv-compile ${DUMMY_ENV_SRC})
 
 set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile")
 set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile")
@@ -22,6 +24,7 @@ set(DUMMY_INFER_V2 "${CMAKE_CURRENT_BINARY_DIR}/dummy-inferV2")
 set(HELP_INFER "${CMAKE_CURRENT_BINARY_DIR}/help-infer")
 set(DUMMY_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummy-profile")
 set(HELP_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/help-profile")
+set(DUMMY_ENV "${CMAKE_CURRENT_BINARY_DIR}/dummyEnv-compile")
 
 install(FILES ${DUMMY_DRIVER}
         PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
@@ -64,3 +67,9 @@ install(FILES ${HELP_PROFILE}
                     GROUP_READ GROUP_EXECUTE
                     WORLD_READ WORLD_EXECUTE
         DESTINATION test)
+
+install(FILES ${DUMMY_ENV}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
diff --git a/compiler/one-cmds/dummy-driver/src/dummyEnv-compile.cpp b/compiler/one-cmds/dummy-driver/src/dummyEnv-compile.cpp
new file mode 100644 (file)
index 0000000..b16ea30
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummyEnv-compile only tests its interface rather than its functionality.
+ *
+ * ./dummyEnv-compile ${DUMMY_OUTPUT}
+ */
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::string spm_size;
+
+  if (const char *env_p = std::getenv("SPM_SIZE"))
+    spm_size = std::string(env_p);
+
+  std::ofstream outfile(argv[1]);
+
+  outfile << "SPM_SIZE=" << spm_size;
+
+  outfile.close();
+
+  return EXIT_SUCCESS;
+}
index 8d6007f..c36650a 100644 (file)
@@ -1,15 +1,15 @@
 About
 -----
 
-Last update: 2020-09-15
+Last update: 2023-01-30
 
 This document explains about 'one-prepare-venv' command.
 
-'one-prepare-venv' will prepare python3.8 virtual environment with tensorflow-cpu
-version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
+'one-prepare-venv' will prepare python3 virtual environment with tensorflow-cpu
+version 2.8.0, recommanded 2.x version as of now, so that 'one-import-tf'
 command can execute properly.
 
-'one-prepare-venv' will also prepare onnx and onnx-tensorflow version 1.7.0 so
+'one-prepare-venv' will also prepare onnx and onnx-tensorflow version 1.10.0 so
 that 'one-import-onnx' command can execute properly.
 
 
index 2352bbd..028cde4 100644 (file)
@@ -169,6 +169,7 @@ Current transformation options are
 - fuse_instnorm: This will convert instance normalization related operators to
   one InstanceNormalization operator that our onert provides for faster
   execution.
+- fuse_prelu: This will fuse operators to PReLU operator
 - fuse_preactivation_batchnorm: This fuses batch normalization operators of pre-activations to Conv operators.
 - fuse_activation_function: This fuses Activation function to a preceding operator.
 - fuse_mean_with_mean: This fuses two consecutive ReduceMean operations into one.
index 4b1f980..b21a162 100644 (file)
@@ -24,7 +24,7 @@ import configparser
 import os
 import sys
 
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 # This suppression is applied only to `one-build`
@@ -35,9 +35,9 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to run ONE drivers in customized order')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
-    opt_name_list = _utils._get_optimization_list(get_name=True)
+    opt_name_list = oneutils.get_optimization_list(get_name=True)
     opt_name_list = ['-' + s for s in opt_name_list]
     if not opt_name_list:
         opt_help_message = '(No available optimization options)'
@@ -54,7 +54,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -62,12 +62,12 @@ def _parse_arg(parser):
 def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
-    if not _utils._is_valid_attr(args, 'config'):
+    if not oneutils.is_valid_attr(args, 'config'):
         parser.error('-C/--config argument is required')
     # check if given optimization option exists
-    opt_name_list = _utils._get_optimization_list(get_name=True)
-    opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list]
-    if _utils._is_valid_attr(args, 'O'):
+    opt_name_list = oneutils.get_optimization_list(get_name=True)
+    opt_name_list = [oneutils.remove_prefix(s, 'O') for s in opt_name_list]
+    if oneutils.is_valid_attr(args, 'O'):
         if ' ' in getattr(args, 'O'):
             parser.error('Not allowed to have space in the optimization name')
         if not getattr(args, 'O') in opt_name_list:
@@ -88,7 +88,7 @@ def _get_driver_name(driver_name):
     }[driver_name]
 
 
-def _parse_cfg(args):
+def parse_cfg(args):
     config = configparser.ConfigParser()
     config.optionxform = str
     parsed = config.read(os.path.expanduser(getattr(args, 'config')))
@@ -121,12 +121,12 @@ def _verify_cfg(driver_list, config):
 
 # verify given optimization option file
 def _verify_opt(args):
-    if _utils._is_valid_attr(args, 'O'):
+    if oneutils.is_valid_attr(args, 'O'):
         config = configparser.ConfigParser()
         config.optionxform = str
         opt_name_path_dic = dict(
-            zip(_utils._get_optimization_list(get_name=True),
-                _utils._get_optimization_list()))
+            zip(oneutils.get_optimization_list(get_name=True),
+                oneutils.get_optimization_list()))
         parsed = config.read(opt_name_path_dic['O' + getattr(args, 'O')])
         # check if given optimization option file exists
         if not parsed:
@@ -151,11 +151,11 @@ def main():
     _verify_arg(parser, args)
 
     # parse configuration file
-    config = _parse_cfg(args)
+    config = parse_cfg(args)
 
     # verify configuration file
     bin_dir = os.path.dirname(os.path.realpath(__file__))
-    import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
+    import_drivers_dict = oneutils.detect_one_import_drivers(bin_dir)
     transform_drivers = [
         'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile',
         'one-partition'
@@ -181,10 +181,10 @@ def main():
             driver_name = _get_driver_name(section)
         driver_path = os.path.join(dir_path, driver_name)
         cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section]
-        if section == 'one-optimize' and _utils._is_valid_attr(args, 'O'):
+        if section == 'one-optimize' and oneutils.is_valid_attr(args, 'O'):
             cmd += ['-O', getattr(args, 'O')]
-        _utils._run(cmd)
+        oneutils.run(cmd)
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index 86e1632..5ccff0f 100644 (file)
@@ -28,7 +28,7 @@ import os
 import sys
 import shutil
 
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -79,7 +79,7 @@ def _get_parser(backends_list):
     parser = argparse.ArgumentParser(
         description='command line tool for code generation', usage=codegen_usage)
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     # get backend list in the directory
     backends_name = [ntpath.basename(f) for f in backends_list]
@@ -98,7 +98,7 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'backend'):
+    if not oneutils.is_valid_attr(args, 'backend'):
         missing.append('-b/--backend')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -113,6 +113,8 @@ def _parse_arg(parser):
     del argv[0]
     # split by '--'
     args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+    if len(args) == 0:
+        codegen_args = parser.parse_args(codegen_args)
     # one-codegen has two interfaces
     # 1. one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] [COMMANDS FOR BACKEND]
     if len(args) == 1:
@@ -125,7 +127,7 @@ def _parse_arg(parser):
         codegen_args = parser.parse_args(codegen_args)
     # print version
     if len(args) and codegen_args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return codegen_args, backend_args, unknown_args
 
@@ -139,7 +141,7 @@ def main():
     args, backend_args, unknown_args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-codegen')
+    oneutils.parse_cfg(args.config, 'one-codegen', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -157,12 +159,12 @@ def main():
     if not codegen_path:
         raise FileNotFoundError(backend_base + ' not found')
     codegen_cmd = [codegen_path] + backend_args + unknown_args
-    if _utils._is_valid_attr(args, 'command'):
+    if oneutils.is_valid_attr(args, 'command'):
         codegen_cmd += getattr(args, 'command').split()
 
     # run backend driver
-    _utils._run(codegen_cmd, err_prefix=backend_base)
+    oneutils.run(codegen_cmd, err_prefix=backend_base)
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index efd60bb..880b831 100644 (file)
@@ -24,7 +24,7 @@ import os
 import subprocess
 import sys
 
-import utils as _utils
+import onelib.utils as oneutils
 
 
 def _get_parser():
@@ -79,7 +79,7 @@ def _convert(args, unknown_args):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     # make cmd
     cmd = [sys.executable, os.path.join(dir_path, _get_driver_name(args.driver))]
-    if _utils._is_valid_attr(args, 'config'):
+    if oneutils.is_valid_attr(args, 'config'):
         cmd.append('--config')
         cmd.append(os.path.expanduser(args.config))
     return_code = subprocess.call(cmd + unknown_args)
@@ -100,4 +100,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index c3ef0b2..fc0f75c 100644 (file)
@@ -25,7 +25,7 @@ import sys
 import tempfile
 
 import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
 import generate_bcq_output_arrays as _bcq_info_gen
 
 # TODO Find better way to suppress trackback on error
@@ -40,7 +40,7 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to convert TensorFlow with BCQ to circle')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     ## tf2tfliteV2 arguments
     tf2tfliteV2_group = parser.add_argument_group('converter arguments')
@@ -94,9 +94,9 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -106,7 +106,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -115,15 +115,15 @@ def _make_generate_bcq_metadata_cmd(args, driver_path, output_path):
     """make a command for running generate_bcq_metadata"""
     cmd = [sys.executable, driver_path]
     # input_path
-    if _utils._is_valid_attr(args, 'input_path'):
+    if oneutils.is_valid_attr(args, 'input_path'):
         cmd.append('--input_path')
         cmd.append(os.path.expanduser(getattr(args, 'input_path')))
     # output_path
-    if _utils._is_valid_attr(args, 'output_path'):
+    if oneutils.is_valid_attr(args, 'output_path'):
         cmd.append('--output_path')
         cmd.append(os.path.expanduser(output_path))
     # output_arrays
-    if _utils._is_valid_attr(args, 'output_arrays'):
+    if oneutils.is_valid_attr(args, 'output_arrays'):
         cmd.append('--output_arrays')
         cmd.append(getattr(args, 'output_arrays'))
 
@@ -147,7 +147,7 @@ def _convert(args):
         f.write((' '.join(generate_bcq_metadata_cmd) + '\n').encode())
 
         # generate BCQ information metadata
-        _utils._run(generate_bcq_metadata_cmd, logfile=f)
+        oneutils.run(generate_bcq_metadata_cmd, logfile=f)
 
         # get output_arrays with BCQ
         bcq_output_arrays = _bcq_info_gen.get_bcq_output_arrays(
@@ -171,7 +171,7 @@ def _convert(args):
         f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
 
         # convert tf to tflite
-        _utils._run(tf2tfliteV2_cmd, logfile=f)
+        oneutils.run(tf2tfliteV2_cmd, logfile=f)
 
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
@@ -182,7 +182,7 @@ def _convert(args):
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
         # convert tflite to circle
-        _utils._run(tflite2circle_cmd, logfile=f)
+        oneutils.run(tflite2circle_cmd, logfile=f)
 
 
 def main():
@@ -191,7 +191,7 @@ def main():
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-import-bcq')
+    oneutils.parse_cfg(args.config, 'one-import-bcq', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -201,4 +201,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index ad19c2f..24edea6 100644 (file)
@@ -35,7 +35,7 @@ except ImportError:
     _onnx_legalizer_enabled = False
 
 import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -49,7 +49,7 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to convert ONNX to circle')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     ## tf2tfliteV2 arguments
     tf2tfliteV2_group = parser.add_argument_group('converter arguments')
@@ -105,9 +105,9 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -117,7 +117,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -203,18 +203,18 @@ def _convert(args):
 
     with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
         # save intermediate
-        if _utils._is_valid_attr(args, 'save_intermediate'):
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
             tmpdir = os.path.dirname(logfile_path)
         # convert onnx to tf saved model
         onnx_model = onnx.load(getattr(args, 'input_path'))
         if _onnx_legalizer_enabled:
             options = onnx_legalizer.LegalizeOptions
-            options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
-            options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
+            options.unroll_rnn = oneutils.is_valid_attr(args, 'unroll_rnn')
+            options.unroll_lstm = oneutils.is_valid_attr(args, 'unroll_lstm')
             onnx_legalizer.legalize(onnx_model, options)
-        if _utils._is_valid_attr(args, 'keep_io_order'):
+        if oneutils.is_valid_attr(args, 'keep_io_order'):
             _remap_io_names(onnx_model)
-            if _utils._is_valid_attr(args, 'save_intermediate'):
+            if oneutils.is_valid_attr(args, 'save_intermediate'):
                 basename = os.path.basename(getattr(args, 'input_path'))
                 fixed_path = os.path.join(tmpdir,
                                           os.path.splitext(basename)[0] + '~.onnx')
@@ -238,7 +238,7 @@ def _convert(args):
         f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
 
         # convert tf to tflite
-        _utils._run(tf2tfliteV2_cmd, logfile=f)
+        oneutils.run(tf2tfliteV2_cmd, logfile=f)
 
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
@@ -249,7 +249,7 @@ def _convert(args):
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
         # convert tflite to circle
-        _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+        oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
 
 
 def main():
@@ -258,7 +258,7 @@ def main():
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-import-onnx')
+    oneutils.parse_cfg(args.config, 'one-import-onnx', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -268,4 +268,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index 7f39e61..3a61d22 100644 (file)
@@ -33,21 +33,21 @@ import zipfile
 
 import onnx_legalizer
 import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
 
 
 def get_driver_spec():
-    return ("one-import-pytorch", _utils.DriverType.IMPORTER)
+    return ("one-import-pytorch", oneutils.DriverType.IMPORTER)
 
 
 def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to convert PyTorch to Circle')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     ## converter arguments
     converter_group = parser.add_argument_group('converter arguments')
@@ -96,13 +96,13 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
-    if not _utils._is_valid_attr(args, 'input_shapes'):
+    if not oneutils.is_valid_attr(args, 'input_shapes'):
         missing.append('-s/--input_shapes')
-    if not _utils._is_valid_attr(args, 'input_types'):
+    if not oneutils.is_valid_attr(args, 'input_types'):
         missing.append('-t/--input_types')
 
     if len(missing):
@@ -113,7 +113,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -255,7 +255,7 @@ def _convert(args):
     logfile_path = os.path.realpath(args.output_path) + '.log'
     with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
         # save intermediate
-        if _utils._is_valid_attr(args, 'save_intermediate'):
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
             tmpdir = os.path.dirname(logfile_path)
         # convert pytorch to onnx model
         input_path = getattr(args, 'input_path')
@@ -310,8 +310,8 @@ def _convert(args):
         onnx_model = onnx.load(onnx_output_path)
 
         options = onnx_legalizer.LegalizeOptions()
-        options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
-        options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
+        options.unroll_rnn = oneutils.is_valid_attr(args, 'unroll_rnn')
+        options.unroll_lstm = oneutils.is_valid_attr(args, 'unroll_lstm')
         onnx_legalizer.legalize(onnx_model, options)
 
         tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
@@ -334,7 +334,7 @@ def _convert(args):
         f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
 
         # convert tf to tflite
-        _utils._run(tf2tfliteV2_cmd, logfile=f)
+        oneutils.run(tf2tfliteV2_cmd, logfile=f)
 
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
@@ -345,7 +345,7 @@ def _convert(args):
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
         # convert tflite to circle
-        _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+        oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
 
 
 def main():
@@ -354,7 +354,7 @@ def main():
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-import-pytorch')
+    oneutils.parse_cfg(args.config, 'one-import-pytorch', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -364,4 +364,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index 6623fa6..75d1968 100644 (file)
@@ -24,7 +24,7 @@ import os
 import tempfile
 
 import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
 
 
 def get_driver_cfg_section():
@@ -35,7 +35,7 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to convert TensorFlow to circle')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     ## tf2tfliteV2 arguments
     tf2tfliteV2_group = parser.add_argument_group('converter arguments')
@@ -111,6 +111,12 @@ def _get_parser():
         action='store_true',
         help='Save intermediate files to output folder')
 
+    # experimental options
+    parser.add_argument(
+        '--experimental_disable_batchmatmul_unfold',
+        action='store_true',
+        help='Experimental disable BatchMatMul unfold')
+
     return parser
 
 
@@ -118,9 +124,9 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -130,7 +136,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -142,7 +148,7 @@ def _convert(args):
 
     with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
         # save intermediate
-        if _utils._is_valid_attr(args, 'save_intermediate'):
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
             tmpdir = os.path.dirname(logfile_path)
         # make a command to convert from tf to tflite
         tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
@@ -156,7 +162,7 @@ def _convert(args):
         f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
 
         # convert tf to tflite
-        _utils._run(tf2tfliteV2_cmd, logfile=f)
+        oneutils.run(tf2tfliteV2_cmd, logfile=f)
 
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
@@ -167,7 +173,7 @@ def _convert(args):
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
         # convert tflite to circle
-        _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+        oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
 
 
 def main():
@@ -176,7 +182,7 @@ def main():
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-import-tf')
+    oneutils.parse_cfg(args.config, 'one-import-tf', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -186,4 +192,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index 3d96b11..8eba46d 100644 (file)
@@ -24,7 +24,7 @@ import os
 import sys
 
 import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -38,7 +38,7 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to convert TensorFlow lite to circle')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     ## tflite2circle arguments
     tflite2circle_group = parser.add_argument_group('converter arguments')
@@ -56,9 +56,9 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -68,7 +68,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -88,7 +88,7 @@ def _convert(args):
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
         # convert tflite to circle
-        _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+        oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
 
 
 def main():
@@ -97,7 +97,7 @@ def main():
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-import-tflite')
+    oneutils.parse_cfg(args.config, 'one-import-tflite', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -107,4 +107,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index c7fcd8a..125db3e 100644 (file)
@@ -27,51 +27,12 @@ import ntpath
 import os
 import sys
 
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
 
 
-def _get_backends_list():
-    """
-    [one hierarchy]
-    one
-    ├── backends
-    ├── bin
-    ├── doc
-    ├── include
-    ├── lib
-    ├── optimization
-    └── test
-
-    The list where `one-infer` finds its backends
-    - `bin` folder where `one-infer` exists
-    - `backends` folder
-
-    NOTE If there are backends of the same name in different places,
-     the closer to the top in the list, the higher the priority.
-    """
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    backend_set = set()
-
-    # bin folder
-    files = [f for f in glob.glob(dir_path + '/*-infer')]
-    # backends folder
-    files += [f for f in glob.glob(dir_path + '/../backends/**/*-infer', recursive=True)]
-    # TODO find backends in `$PATH`
-
-    backends_list = []
-    for cand in files:
-        base = ntpath.basename(cand)
-        if (not base in backend_set) and os.path.isfile(cand) and os.access(
-                cand, os.X_OK):
-            backend_set.add(base)
-            backends_list.append(cand)
-
-    return backends_list
-
-
 def _search_backend_driver(driver):
     """
     [one hierarchy]
@@ -108,28 +69,25 @@ def _search_backend_driver(driver):
     return None
 
 
-def _get_parser(backends_list):
-    infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER | -b BACKEND] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]'
+def _get_parser():
+    infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]'
+    infer_detail = """
+one-infer provides post-processing after invoking backend inference driver
+use python script and its arguments to '--post-process' argument as below
+one-infer -d dummy-infer --post-process "script.py arg1 arg2" -- [arguments for dummy-infer]
+"""
     parser = argparse.ArgumentParser(
-        description='command line tool to infer model', usage=infer_usage)
+        description='command line tool to infer model',
+        usage=infer_usage,
+        epilog=infer_detail,
+        formatter_class=argparse.RawTextHelpFormatter)
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
-    # TODO: add tflite/onnx-infer driver to helper message when it is implemented
     driver_help_message = 'backend inference driver name to execute'
     parser.add_argument('-d', '--driver', type=str, help=driver_help_message)
 
-    # get backend list in the directory
-    backends_name = [ntpath.basename(f) for f in backends_list]
-    if not backends_name:
-        backends_name_message = '(There is no available backend drivers)'
-    else:
-        backends_name_message = '(available backend drivers: ' + ', '.join(
-            backends_name) + ')'
-    backend_help_message = 'backend name to use ' + backends_name_message
-    parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
-
-    post_process_help_message = 'post processing script to convert I/O data to standard format'
+    post_process_help_message = 'post processing python script and arguments which can be used to convert I/O data to standard format'
     parser.add_argument('--post-process', type=str, help=post_process_help_message)
 
     return parser
@@ -137,15 +95,9 @@ def _get_parser(backends_list):
 
 def _verify_arg(parser, args):
     """verify given arguments"""
-    # `-d/--driver` and `-b/--backend` are mutually exclusive arguments.
-    if _utils._is_valid_attr(args, 'driver') and _utils._is_valid_attr(args, 'backend'):
-        parser.error(
-            '-d and -b options are mutually exclusive. Please use only one of them')
-
     missing = []
-    if not _utils._is_valid_attr(args, 'driver') and not _utils._is_valid_attr(
-            args, 'backend'):
-        missing.append('{-d/--driver | -b/--backend}')
+    if not oneutils.is_valid_attr(args, 'driver'):
+        missing.append('-d/--driver')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
 
@@ -159,66 +111,58 @@ def _parse_arg(parser):
     # split by '--'
     args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
 
-    # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [-b BACKEND] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER]
+    # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER]
     if len(args):
         infer_args = args[0]
         infer_args = parser.parse_args(infer_args)
         backend_args = backend_args if len(args) < 2 else args[1]
+    else:
+        infer_args = parser.parse_args(infer_args)
     # print version
     if len(args) and infer_args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return infer_args, backend_args
 
 
-def _get_executable(args, backends_list):
-    driver = _utils._is_valid_attr(args, 'driver')
-    if driver:
-        executable = _search_backend_driver(driver)
-        if executable:
-            return executable
-        else:
-            raise FileNotFoundError(driver + ' not found')
+def _get_executable(args):
+    driver = oneutils.is_valid_attr(args, 'driver')
 
-    if _utils._is_valid_attr(args, 'backend'):
-        backend_base = getattr(args, 'backend') + '-infer'
-        for cand in backends_list:
-            if ntpath.basename(cand) == backend_base:
-                return cand
-        raise FileNotFoundError(backend_base + ' not found')
+    executable = _search_backend_driver(driver)
+    if executable:
+        return executable
+    else:
+        raise FileNotFoundError(driver + ' not found')
 
 
 def main():
-    # get backend list
-    backends_list = _get_backends_list()
-
     # parse arguments
-    parser = _get_parser(backends_list)
+    parser = _get_parser()
     args, backend_args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-infer')
+    oneutils.parse_cfg(args.config, 'one-infer', args)
 
     # verify arguments
     _verify_arg(parser, args)
 
     # make a command to run given backend driver
-    driver_path = _get_executable(args, backends_list)
+    driver_path = _get_executable(args)
     infer_cmd = [driver_path] + backend_args
-    if _utils._is_valid_attr(args, 'command'):
+    if oneutils.is_valid_attr(args, 'command'):
         infer_cmd += getattr(args, 'command').split()
 
     # run backend driver
-    _utils._run(infer_cmd, err_prefix=ntpath.basename(driver_path))
+    oneutils.run(infer_cmd, err_prefix=ntpath.basename(driver_path))
 
     # run post process script if it's given
-    if _utils._is_valid_attr(args, 'post_process'):
+    if oneutils.is_valid_attr(args, 'post_process'):
         # NOTE: the given python script will be executed by venv of ONE
         python_path = sys.executable
         post_process_command = [python_path] + getattr(args,
                                                        'post_process').strip().split(' ')
-        _utils._run(post_process_command, err_prefix='one-infer')
+        oneutils.run(post_process_command, err_prefix='one-infer')
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index 04c4534..299255c 100644 (file)
@@ -28,12 +28,44 @@ import os
 import sys
 
 import configparser
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
 
 
+class InputOutputPath:
+    '''
+    Class that remembers input circle file and output circle file of section k,
+
+    After calling enter_new_section(),
+    output path in section k will be used as input path of section k+1
+    '''
+
+    def __init__(self, initial_input_path: str):
+        self._first_step = True
+        self._input_path = initial_input_path
+        self._output_path = ''
+
+    def enter_new_section(self, section_output_path: str):
+        '''
+        Call this when starting a section
+        '''
+        if self._first_step == True:
+            self._output_path = section_output_path
+        else:
+            self._input_path = self._output_path
+            self._output_path = section_output_path
+
+        self._first_step = False
+
+    def input_path(self):
+        return self._input_path
+
+    def output_path(self):
+        return self._output_path
+
+
 class CommentableConfigParser(configparser.ConfigParser):
     """
     ConfigParser where comment can be stored
@@ -120,7 +152,7 @@ def _get_parser(backends_list):
         'Currently tflite and onnx models are supported',
         usage=init_usage)
 
-    _utils._add_default_arg_no_CS(parser)
+    oneutils.add_default_arg_no_CS(parser)
 
     parser.add_argument(
         '-i', '--input_path', type=str, help='full filepath of the input model file')
@@ -165,23 +197,23 @@ def _get_parser(backends_list):
 def _verify_arg(parser, args):
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
-    if not _utils._is_valid_attr(args, 'backend'):
+    if not oneutils.is_valid_attr(args, 'backend'):
         missing.append('-b/--backend')
 
-    if _utils._is_valid_attr(args, 'model_type'):
+    if oneutils.is_valid_attr(args, 'model_type'):
         # TODO Support model types other than onnx and tflite (e.g., TF)
         if getattr(args, 'model_type') not in ['onnx', 'tflite']:
             parser.error('Allowed value for --model_type: "onnx" or "tflite"')
 
-    if _utils._is_valid_attr(args, 'nchw_to_nhwc_input_shape'):
-        if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'):
+    if oneutils.is_valid_attr(args, 'nchw_to_nhwc_input_shape'):
+        if not oneutils.is_valid_attr(args, 'convert_nchw_to_nhwc'):
             missing.append('--convert_nchw_to_nhwc')
-    if _utils._is_valid_attr(args, 'nchw_to_nhwc_output_shape'):
-        if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'):
+    if oneutils.is_valid_attr(args, 'nchw_to_nhwc_output_shape'):
+        if not oneutils.is_valid_attr(args, 'convert_nchw_to_nhwc'):
             missing.append('--convert_nchw_to_nhwc')
 
     if len(missing):
@@ -204,13 +236,13 @@ def _parse_arg(parser):
         backend_args = backend_args if len(args) < 2 else args[1]
     # print version
     if len(args) and init_args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return init_args, backend_args
 
 
 def _get_executable(args, backends_list):
-    if _utils._is_valid_attr(args, 'backend'):
+    if oneutils.is_valid_attr(args, 'backend'):
         backend_base = getattr(args, 'backend') + '-init'
         for cand in backends_list:
             if ntpath.basename(cand) == backend_base:
@@ -219,24 +251,65 @@ def _get_executable(args, backends_list):
 
 
 # TODO Support workflow format (https://github.com/Samsung/ONE/pull/9354)
-def _generate():
+def _generate(args, model_type: str, inout_path: InputOutputPath):
     # generate cfg file
     config = CommentableConfigParser()
+    model_dir = os.path.dirname(args.input_path)
+    model_name = os.path.basename(args.input_path).split('.')[0]
+
+    def _assert_section(section: str):
+        if not config.has_section(section):
+            raise RuntimeError(f'Cannot find section: {section}')
 
     def _add_onecc_sections():
-        pass  # NYI
+        '''
+        This adds all sections
+        '''
+        config.add_section('onecc')
+        sections = [
+            f'one-import-{model_type}', 'one-optimize', 'one-quantize', 'one-codegen'
+        ]
+
+        for section in sections:
+            config['onecc'][section] = 'True'
+            # add empty section as a preperation of next procedure
+            config.add_section(section)
 
     def _gen_import():
-        pass  # NYI
+        section = f'one-import-{model_type}'
+        _assert_section(section)
+
+        output_path = os.path.join(model_dir, f'{model_name}.circle')
+        inout_path.enter_new_section(section_output_path=output_path)
+        config[section]['input_path'] = inout_path.input_path()
+        config[section]['output_path'] = inout_path.output_path()
 
     def _gen_optimize():
-        pass  # NYI
+        section = 'one-optimize'
+        _assert_section(section)
+
+        output_path = os.path.join(model_dir, f'{model_name}.opt.circle')
+        inout_path.enter_new_section(section_output_path=output_path)
+        config[section]['input_path'] = inout_path.input_path()
+        config[section]['output_path'] = inout_path.output_path()
+
+        # TODO Add optimization optinos
 
     def _gen_quantize():
-        pass  # NYI
+        section = 'one-quantize'
+        _assert_section(section)
+
+        output_path = os.path.join(model_dir, f'{model_name}.q.circle')
+        inout_path.enter_new_section(section_output_path=output_path)
+        config[section]['input_path'] = inout_path.input_path()
+        config[section]['output_path'] = inout_path.output_path()
 
     def _gen_codegen():
-        pass  # NYI
+        section = 'one-codegen'
+        _assert_section(section)
+
+        # [backend]-init must provide default value for 'command'
+        config[section]['backend'] = args.backend
 
     #
     # NYI: one-profile, one-partition, one-pack, one-infer
@@ -253,6 +326,26 @@ def _generate():
         config.write(f)
 
 
+def _get_model_type(parser, args):
+    if oneutils.is_valid_attr(args, 'model_type'):
+        return args.model_type
+
+    if oneutils.is_valid_attr(args, 'input_path'):
+        _, ext = os.path.splitext(args.input_path)
+
+        # ext would be, e.g., '.tflite' or '.onnx'.
+        # Note: when args.input_path does not have an extension, e.g., '/home/foo'
+        # ext after os.path.splitext() is '' and ''[1:] is still ''.
+        # TODO support tensorflow model
+        ext = ext[1:]
+        if ext in ["tflite", "onnx"]:
+            return ext
+        else:
+            parser.error(f'following file extensions are supported: ".onnx" ".tflite"')
+
+    parser.error(f'the following argument is required: --input_path')
+
+
 def main():
     # get backend list
     backends_list = _get_backends_list()
@@ -264,17 +357,19 @@ def main():
     # verify arguments
     _verify_arg(parser, args)
 
+    model_type = _get_model_type(parser, args)
+    inout_path = InputOutputPath(args.input_path)
+    _generate(args, model_type, inout_path)
+
     # make a command to run given backend driver
     driver_path = _get_executable(args, backends_list)
     init_cmd = [driver_path] + backend_args
 
     # run backend driver
-    _utils._run(init_cmd, err_prefix=ntpath.basename(driver_path))
-
-    #TODO generate cfg file
+    oneutils.run(init_cmd, err_prefix=ntpath.basename(driver_path))
 
     raise NotImplementedError("NYI")
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index 481fc84..51668a8 100644 (file)
@@ -25,7 +25,7 @@ import sys
 
 import onelib.constant as _constant
 import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -35,7 +35,7 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to optimize circle model')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     ## utility arguments
     utility_group = parser.add_argument_group('arguments for utility')
@@ -75,9 +75,9 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -95,7 +95,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -113,34 +113,54 @@ def _optimize(args):
                                                              getattr(args, 'output_path'))
 
         # verbose
-        if _utils._is_valid_attr(args, 'verbose'):
+        if oneutils.is_valid_attr(args, 'verbose'):
             circle2circle_cmd.append('--verbose')
-        if _utils._is_valid_attr(args, 'change_outputs'):
+        if oneutils.is_valid_attr(args, 'change_outputs'):
             circle2circle_cmd.append('--change_outputs')
             circle2circle_cmd.append(getattr(args, 'change_outputs'))
 
         f.write((' '.join(circle2circle_cmd) + '\n').encode())
 
         # optimize
-        _utils._run(circle2circle_cmd, err_prefix="circle2circle", logfile=f)
+        oneutils.run(circle2circle_cmd, err_prefix="circle2circle", logfile=f)
 
 
 def _parse_opt(args):
-    if _utils._is_valid_attr(args, 'O'):
+    if oneutils.is_valid_attr(args, 'O'):
         opt_name_path_dic = dict(
-            zip(_utils._get_optimization_list(get_name=True),
-                _utils._get_optimization_list()))
+            zip(oneutils.get_optimization_list(get_name=True),
+                oneutils.get_optimization_list()))
         config_path = opt_name_path_dic['O' + getattr(args, 'O')]
-        _utils._parse_cfg_and_overwrite(config_path, 'one-optimize', args)
+        # group option do not overwrite existing args
+        oneutils.parse_cfg(config_path, 'one-optimize', args)
 
 
+# There are several cases to receive the optimization options:
+#  - Indivisual option
+#    1. From command line
+#    2. From cfg file
+#  - Group option
+#    3. From command line
+#
+# Their priority is as follows, since each option can be given simultaneously:
+#  1. Indivisual option from command line
+#  2. Indivisual option from cfg file
+#  3. Group option from command line
+#
+# To follow their priority, options with higher priority should be parsed first.
+#
+# DO NOT MODIFY the order of below function calls.
+#
+# NOTE. Assume all the optimization options must follow 'store_true' only.
+# NOTE. Group option from cfg file (`include` in `[onecc]` section) is passed
+#        as a command line argument.
 def main():
     # parse arguments
     parser = _get_parser()
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-optimize')
+    oneutils.parse_cfg(args.config, 'one-optimize', args)
 
     # parse optimization file
     # NOTE if there is a `one-optimize` section in above configuration file as well,
@@ -155,4 +175,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index 5cab7c7..db42466 100644 (file)
@@ -23,7 +23,7 @@ import argparse
 import os
 import sys
 
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -33,7 +33,7 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to package circle and metadata into nnpackage')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     ## model2nnpkg arguments
     model2nnpkg_group = parser.add_argument_group('arguments for packaging')
@@ -51,9 +51,9 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -63,7 +63,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -73,6 +73,7 @@ def _make_model2nnpkg_cmd(driver_path, input_path, output_path):
     cmd = [os.path.expanduser(driver_path)]
     cmd.append('-o')
     cmd.append(os.path.expanduser(output_path))
+    cmd.append('-m')
     cmd.append(os.path.expanduser(input_path))
     return cmd
 
@@ -84,7 +85,7 @@ def _pack(args):
 
     with open(logfile_path, 'wb') as f:
         # make a command to package circle model and metadata into nnpackage
-        model2nnpkg_path = os.path.join(dir_path, 'model2nnpkg.sh')
+        model2nnpkg_path = os.path.join(dir_path, 'model2nnpkg')
         model2nnpkg_cmd = _make_model2nnpkg_cmd(model2nnpkg_path,
                                                 getattr(args, 'input_path'),
                                                 getattr(args, 'output_path'))
@@ -92,7 +93,7 @@ def _pack(args):
         f.write((' '.join(model2nnpkg_cmd) + '\n').encode())
 
         # convert tflite to circle
-        _utils._run(model2nnpkg_cmd, err_prefix="model2nnpkg.sh", logfile=f)
+        oneutils.run(model2nnpkg_cmd, err_prefix="model2nnpkg", logfile=f)
 
 
 def main():
@@ -101,7 +102,7 @@ def main():
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-pack')
+    oneutils.parse_cfg(args.config, 'one-pack', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -111,4 +112,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index c0d71e5..62ab13d 100644 (file)
@@ -24,7 +24,7 @@ import configparser
 import os
 import sys
 
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -34,7 +34,7 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to partition circle model by multiple backends')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     parser.add_argument(
         '--backends', type=str, help='backends in CSV to use for partitioning')
@@ -55,7 +55,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -64,9 +64,9 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'part_file'):
+    if not oneutils.is_valid_attr(args, 'part_file'):
         missing.append('part_file')
-    if not _utils._is_valid_attr(args, 'input_file'):
+    if not oneutils.is_valid_attr(args, 'input_file'):
         missing.append('input_file')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -86,13 +86,13 @@ def _partition(args):
 
         cmd = [os.path.expanduser(circle_partitioner_path)]
 
-        if _utils._is_valid_attr(args, 'backends'):
+        if oneutils.is_valid_attr(args, 'backends'):
             cmd.append('--backends')
             cmd.append(getattr(args, 'backends'))
-        if _utils._is_valid_attr(args, 'default'):
+        if oneutils.is_valid_attr(args, 'default'):
             cmd.append('--default')
             cmd.append(getattr(args, 'default'))
-        if _utils._is_valid_attr(args, 'work_path'):
+        if oneutils.is_valid_attr(args, 'work_path'):
             cmd.append('--work_path')
             cmd.append(getattr(args, 'work_path'))
 
@@ -104,7 +104,7 @@ def _partition(args):
         f.write((' '.join(cmd) + '\n').encode())
 
         # run circle-partitoner
-        _utils._run(cmd, err_prefix='circle-partitioner', logfile=f)
+        oneutils.run(cmd, err_prefix='circle-partitioner', logfile=f)
 
 
 def main():
@@ -113,11 +113,7 @@ def main():
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-partition')
-
-    if _utils._is_valid_attr(args, 'config'):
-        config_path = getattr(args, 'config')
-        _utils._parse_cfg_and_overwrite(config_path, 'one-partition', args)
+    oneutils.parse_cfg(args.config, 'one-partition', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -127,4 +123,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index b435671..baa88be 100644 (file)
@@ -37,12 +37,13 @@ VER_TENSORFLOW=2.8.0
 VER_ONNX=1.11.0
 VER_ONNXRUNTIME=1.11.0
 VER_ONNX_TF=1.10.0
+VER_PYDOT=1.4.2
 
 # Install tensorflow
 
 PIP_TRUSTED_HOST="--trusted-host pypi.org "
 PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
-PIP_TRUSTED_HOST+="--trusted-host files.pythonhost.org "
+PIP_TRUSTED_HOST+="--trusted-host files.pythonhosted.org "
 PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
 
 PIP_TIMEOUT="--default-timeout=1000 "
@@ -65,6 +66,8 @@ fi
 ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
 # TODO remove version fix, https://github.com/Samsung/ONE/issues/9240
 ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.16.0
+# TODO remove version fix, https://github.com/Samsung/ONE/issues/10481
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_addons==0.16.1
 
 # Install PyTorch and ONNX related
 # NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
@@ -92,3 +95,6 @@ fi
 # NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051
 # TODO remove this when issue is resolved
 ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==3.20.1
+
+# Install pydot for visq
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install pydot==${VER_PYDOT}
diff --git a/compiler/one-cmds/one-prepare-venv.u2204 b/compiler/one-cmds/one-prepare-venv.u2204
new file mode 100644 (file)
index 0000000..d4b0467
--- /dev/null
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+VENV_ACTIVATE=${DRIVER_PATH}/venv/bin/activate
+# NOTE please use venv's python instead of python after `source activation`.
+# This script is called by debian maintainer script, i.e. `postinst`.
+# Since debian maintainer script is called with sudo, `source activation` is ignored.
+VENV_PYTHON=${DRIVER_PATH}/venv/bin/python
+
+if [ ! -f ${VENV_ACTIVATE} ]; then
+  # Create python virtual enviornment
+  python3 -m venv "${DRIVER_PATH}/venv"
+fi
+
+# NOTE version
+# - https://github.com/onnx/onnx/blob/master/docs/Versioning.md
+# - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
+
+VER_TENSORFLOW=2.10.1
+VER_ONNX=1.12.0
+VER_ONNXRUNTIME=1.12.1
+VER_ONNX_TF=1.10.0
+VER_PYDOT=1.4.2
+
+# Install tensorflow
+
+PIP_TRUSTED_HOST="--trusted-host pypi.org "
+PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
+PIP_TRUSTED_HOST+="--trusted-host files.pythonhosted.org "
+PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
+
+PIP_TIMEOUT="--default-timeout=1000 "
+
+PIP_OPTIONS="${PIP_TIMEOUT} ${PIP_TRUSTED_HOST}"
+
+# NOTE $ONE_PREPVENV_PIP_OPTION is to provide additional PIP options
+# such as ceritificate file behind firewall
+# ex) ONE_PREPVENV_PIP_OPTION="--cert SomePrivateCetificate.crt" ./one-prepare-venv
+if [[ ! -z "$ONE_PREPVENV_PIP_OPTION" ]]; then
+  PIP_OPTIONS+=" ${ONE_PREPVENV_PIP_OPTION} "
+fi
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade pip setuptools
+if [ -n "${EXT_TENSORFLOW_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_TENSORFLOW_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability
+
+# Install PyTorch and ONNX related
+# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
+#      torch_stable.html points to download URL of torch wheel file(s)
+#      but sometimes the server gets unstable, especially from in-house CI.
+TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
+if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
+  TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
+fi
+# TODO remove torch message
+echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'"
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.13.1+cpu -f ${TORCH_STABLE_URL}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnxruntime==${VER_ONNXRUNTIME}
+
+# Provide install of custom onnx-tf
+if [ -n "${EXT_ONNX_TF_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_ONNX_TF_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
+fi
+
+# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051
+# TODO remove this when issue is resolved
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==3.19.6
+
+# Install pydot for visq
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install pydot==${VER_PYDOT}
index b19c215..18a0456 100644 (file)
@@ -27,7 +27,7 @@ import ntpath
 import os
 import sys
 
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -77,7 +77,7 @@ def _get_parser(backends_list):
     parser = argparse.ArgumentParser(
         description='command line tool for profiling backend model', usage=profile_usage)
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     # get backend list in the directory
     backends_name = [ntpath.basename(f) for f in backends_list]
@@ -96,7 +96,7 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'backend'):
+    if not oneutils.is_valid_attr(args, 'backend'):
         missing.append('-b/--backend')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
@@ -111,6 +111,8 @@ def _parse_arg(parser):
     del argv[0]
     # split by '--'
     args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+    if len(args) == 0:
+        profile_args = parser.parse_args(profile_args)
     # one-profile has two interfaces
     # 1. one-profile [-h] [-v] [-C CONFIG] [-b BACKEND] [COMMANDS FOR BACKEND]
     if len(args) == 1:
@@ -123,7 +125,7 @@ def _parse_arg(parser):
         profile_args = parser.parse_args(profile_args)
     # print version
     if len(args) and profile_args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return profile_args, backend_args, unknown_args
 
@@ -137,7 +139,7 @@ def main():
     args, backend_args, unknown_args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-profile')
+    oneutils.parse_cfg(args.config, 'one-profile', args)
 
     # verify arguments
     _verify_arg(parser, args)
@@ -151,11 +153,11 @@ def main():
     if not profile_path:
         raise FileNotFoundError(backend_base + ' not found')
     profile_cmd = [profile_path] + backend_args + unknown_args
-    if _utils._is_valid_attr(args, 'command'):
+    if oneutils.is_valid_attr(args, 'command'):
         profile_cmd += getattr(args, 'command').split()
 
     # run backend driver
-    _utils._run(profile_cmd, err_prefix=backend_base)
+    oneutils.run(profile_cmd, err_prefix=backend_base)
 
 
 if __name__ == '__main__':
index 9282007..060892d 100644 (file)
@@ -25,8 +25,8 @@ import sys
 import tempfile
 import json
 
-import utils as _utils
-from utils import Command
+import onelib.utils as oneutils
+from onelib.Command import Command
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -36,7 +36,7 @@ def _get_parser():
     parser = argparse.ArgumentParser(
         description='command line tool to quantize circle model')
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
     # input and output path.
     parser.add_argument(
@@ -221,12 +221,13 @@ def _get_parser():
 
 
 def _set_default_values(args):
-    if not _utils._is_valid_attr(args, 'input_model_dtype') and not _utils._is_valid_attr(
-            args, 'input_dtype'):
+    if not oneutils.is_valid_attr(args,
+                                  'input_model_dtype') and not oneutils.is_valid_attr(
+                                      args, 'input_dtype'):
         setattr(args, 'input_model_dtype', 'float32')
-    if not _utils._is_valid_attr(args, 'quantized_dtype'):
+    if not oneutils.is_valid_attr(args, 'quantized_dtype'):
         setattr(args, 'quantized_dtype', 'uint8')
-        if _utils._is_valid_attr(args, 'quant_config'):
+        if oneutils.is_valid_attr(args, 'quant_config'):
             # Get quantized_dtype from qconfig file
             try:
                 with open(getattr(args, 'quant_config')) as f:
@@ -237,9 +238,9 @@ def _set_default_values(args):
             except json.decoder.JSONDecodeError:
                 print('Failed to decode ' + getattr(args, 'quant_config') +
                       '. Please check it is a json file.')
-    if not _utils._is_valid_attr(args, 'granularity'):
+    if not oneutils.is_valid_attr(args, 'granularity'):
         setattr(args, 'granularity', 'layer')
-        if _utils._is_valid_attr(args, 'quant_config'):
+        if oneutils.is_valid_attr(args, 'quant_config'):
             # Get granularity from qconfig file
             try:
                 with open(getattr(args, 'quant_config')) as f:
@@ -249,11 +250,11 @@ def _set_default_values(args):
             except json.decoder.JSONDecodeError:
                 print('Failed to decode ' + getattr(args, 'quant_config') +
                       '. Please check it is a json file.')
-    if not _utils._is_valid_attr(args, 'mode'):
+    if not oneutils.is_valid_attr(args, 'mode'):
         setattr(args, 'mode', 'percentile')
-    if not _utils._is_valid_attr(args, 'min_percentile'):
+    if not oneutils.is_valid_attr(args, 'min_percentile'):
         setattr(args, 'min_percentile', '1.0')
-    if not _utils._is_valid_attr(args, 'max_percentile'):
+    if not oneutils.is_valid_attr(args, 'max_percentile'):
         setattr(args, 'max_percentile', '99.0')
 
 
@@ -261,32 +262,32 @@ def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
     missing = []
-    if not _utils._is_valid_attr(args, 'input_path'):
+    if not oneutils.is_valid_attr(args, 'input_path'):
         missing.append('-i/--input_path')
-    if not _utils._is_valid_attr(args, 'output_path'):
+    if not oneutils.is_valid_attr(args, 'output_path'):
         missing.append('-o/--output_path')
-    if _utils._is_valid_attr(args, 'force_quantparam'):
-        if not _utils._is_valid_attr(args, 'tensor_name'):
+    if oneutils.is_valid_attr(args, 'force_quantparam'):
+        if not oneutils.is_valid_attr(args, 'tensor_name'):
             missing.append('--tensor_name')
-        if not _utils._is_valid_attr(args, 'scale'):
+        if not oneutils.is_valid_attr(args, 'scale'):
             missing.append('--scale')
-        if not _utils._is_valid_attr(args, 'zero_point'):
+        if not oneutils.is_valid_attr(args, 'zero_point'):
             missing.append('--zero_point')
-    if _utils._is_valid_attr(args, 'copy_quantparam'):
-        if not _utils._is_valid_attr(args, 'src_tensor_name'):
+    if oneutils.is_valid_attr(args, 'copy_quantparam'):
+        if not oneutils.is_valid_attr(args, 'src_tensor_name'):
             missing.append('--src_tensor_name')
-        if not _utils._is_valid_attr(args, 'dst_tensor_name'):
+        if not oneutils.is_valid_attr(args, 'dst_tensor_name'):
             missing.append('--dst_tensor_name')
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
-    if _utils._is_valid_attr(args, 'force_quantparam'):
+    if oneutils.is_valid_attr(args, 'force_quantparam'):
         tensors = getattr(args, 'tensor_name')
         scales = getattr(args, 'scale')
         zerops = getattr(args, 'zero_point')
         if len(tensors) != len(scales) or len(tensors) != len(zerops):
             parser.error(
                 'The same number of tensor_name, scale, and zero_point should be given.')
-    if _utils._is_valid_attr(args, 'copy_quantparam'):
+    if oneutils.is_valid_attr(args, 'copy_quantparam'):
         src_tensors = getattr(args, 'src_tensor_name')
         dst_tensors = getattr(args, 'dst_tensor_name')
         if len(src_tensors) != len(dst_tensors):
@@ -298,23 +299,23 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
 
 def _quantize(args):
-    if _utils._is_valid_attr(args, 'force_quantparam'):
+    if oneutils.is_valid_attr(args, 'force_quantparam'):
         # write quantization parameters
         _write_qparam(args)
         return
 
-    if _utils._is_valid_attr(args, 'copy_quantparam'):
+    if oneutils.is_valid_attr(args, 'copy_quantparam'):
         # copy quantization parameters
         _copy_qparam(args)
         return
 
-    if _utils._is_valid_attr(args, 'fake_quantize'):
+    if oneutils.is_valid_attr(args, 'fake_quantize'):
         # fake-quantize model
         _fake_quantize(args)
         return
@@ -324,7 +325,7 @@ def _quantize(args):
     logfile_path = os.path.realpath(args.output_path) + '.log'
 
     with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
-        if _utils._is_valid_attr(args, 'save_intermediate'):
+        if oneutils.is_valid_attr(args, 'save_intermediate'):
             tmpdir = os.path.dirname(logfile_path)
         # get driver path
         circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
@@ -333,26 +334,26 @@ def _quantize(args):
         ## make a command to quantize and dequantize the weights of the model
         circle_quantizer_cmd = [circle_quantizer_path]
         # verbose
-        if _utils._is_valid_attr(args, 'verbose'):
+        if oneutils.is_valid_attr(args, 'verbose'):
             circle_quantizer_cmd.append('--verbose')
         # quantize_dequantize_weights
         circle_quantizer_cmd.append('--quantize_dequantize_weights')
         # Use input_model_dtype if it exists. Use input_dtype otherwise.
-        if _utils._is_valid_attr(args, 'input_model_dtype'):
+        if oneutils.is_valid_attr(args, 'input_model_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
-        elif _utils._is_valid_attr(args, 'input_dtype'):
+        elif oneutils.is_valid_attr(args, 'input_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
-        if _utils._is_valid_attr(args, 'quantized_dtype'):
+        if oneutils.is_valid_attr(args, 'quantized_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
-        if _utils._is_valid_attr(args, 'granularity'):
+        if oneutils.is_valid_attr(args, 'granularity'):
             circle_quantizer_cmd.append(getattr(args, 'granularity'))
-        if _utils._is_valid_attr(args, 'quant_config'):
+        if oneutils.is_valid_attr(args, 'quant_config'):
             # NOTE --config conflicts with --config option in onecc, so
             # we use quant_config for one-quantize
             circle_quantizer_cmd.append('--config')
             circle_quantizer_cmd.append(getattr(args, 'quant_config'))
         # input and output path
-        if _utils._is_valid_attr(args, 'input_path'):
+        if oneutils.is_valid_attr(args, 'input_path'):
             circle_quantizer_cmd.append(getattr(args, 'input_path'))
         tmp_weights_fake_quant_path = os.path.join(
             tmpdir,
@@ -360,13 +361,13 @@ def _quantize(args):
                 args.input_path))[0]) + '.weights_fake_quant.circle'
         circle_quantizer_cmd.append(tmp_weights_fake_quant_path)
         # profiling
-        if _utils._is_valid_attr(args, 'generate_profile_data'):
+        if oneutils.is_valid_attr(args, 'generate_profile_data'):
             circle_quantizer_cmd.append('--generate_profile_data')
 
         f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
 
         # run circle-quantizer
-        _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
         tmp_minmax_recorded_path = os.path.join(
             tmpdir,
@@ -389,50 +390,50 @@ def _quantize(args):
         ## make a second command to quantize the model using the embedded information
         circle_quantizer_cmd = [circle_quantizer_path]
         # verbose
-        if _utils._is_valid_attr(args, 'verbose'):
+        if oneutils.is_valid_attr(args, 'verbose'):
             circle_quantizer_cmd.append('--verbose')
         # quantize_dequantize_weights
         circle_quantizer_cmd.append('--quantize_with_minmax')
         # Use input_model_dtype if it exists. Use input_dtype otherwise.
-        if _utils._is_valid_attr(args, 'input_model_dtype'):
+        if oneutils.is_valid_attr(args, 'input_model_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
-        elif _utils._is_valid_attr(args, 'input_dtype'):
+        elif oneutils.is_valid_attr(args, 'input_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
-        if _utils._is_valid_attr(args, 'quantized_dtype'):
+        if oneutils.is_valid_attr(args, 'quantized_dtype'):
             circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
-        if _utils._is_valid_attr(args, 'granularity'):
+        if oneutils.is_valid_attr(args, 'granularity'):
             circle_quantizer_cmd.append(getattr(args, 'granularity'))
-        if _utils._is_valid_attr(args, 'TF-style_maxpool'):
+        if oneutils.is_valid_attr(args, 'TF-style_maxpool'):
             circle_quantizer_cmd.append('--TF-style_maxpool')
-        if _utils._is_valid_attr(args, 'input_type'):
+        if oneutils.is_valid_attr(args, 'input_type'):
             circle_quantizer_cmd.append('--input_type')
             circle_quantizer_cmd.append(getattr(args, 'input_type'))
-        if _utils._is_valid_attr(args, 'output_type'):
+        if oneutils.is_valid_attr(args, 'output_type'):
             circle_quantizer_cmd.append('--output_type')
             circle_quantizer_cmd.append(getattr(args, 'output_type'))
-        if _utils._is_valid_attr(args, 'quant_config'):
+        if oneutils.is_valid_attr(args, 'quant_config'):
             # NOTE --config conflicts with --config option in onecc, so
             # we use quant_config for one-quantize
             circle_quantizer_cmd.append('--config')
             circle_quantizer_cmd.append(getattr(args, 'quant_config'))
         # input and output path
         circle_quantizer_cmd.append(tmp_minmax_recorded_path)
-        if _utils._is_valid_attr(args, 'output_path'):
+        if oneutils.is_valid_attr(args, 'output_path'):
             circle_quantizer_cmd.append(getattr(args, 'output_path'))
         # profiling
-        if _utils._is_valid_attr(args, 'generate_profile_data'):
+        if oneutils.is_valid_attr(args, 'generate_profile_data'):
             circle_quantizer_cmd.append('--generate_profile_data')
 
         f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
 
         # run circle-quantizer
-        _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
         # evaluate
-        if _utils._is_valid_attr(args, 'evaluate_result'):
+        if oneutils.is_valid_attr(args, 'evaluate_result'):
             circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff')
             quant_model = ""
-            if _utils._is_valid_attr(args, 'output_path'):
+            if oneutils.is_valid_attr(args, 'output_path'):
                 quant_model = getattr(args, 'output_path')
             tmp_fake_quant_model = os.path.join(
                 tmpdir,
@@ -473,13 +474,13 @@ def _write_qparam(args):
         # make a command to write qparams to the tensors
         circle_quantizer_cmd = [circle_quantizer_path]
         # verbose
-        if _utils._is_valid_attr(args, 'verbose'):
+        if oneutils.is_valid_attr(args, 'verbose'):
             circle_quantizer_cmd.append('--verbose')
-        if _utils._is_valid_attr(args, 'tensor_name'):
+        if oneutils.is_valid_attr(args, 'tensor_name'):
             tensor_name = getattr(args, 'tensor_name')
-        if _utils._is_valid_attr(args, 'scale'):
+        if oneutils.is_valid_attr(args, 'scale'):
             scale = getattr(args, 'scale')
-        if _utils._is_valid_attr(args, 'zero_point'):
+        if oneutils.is_valid_attr(args, 'zero_point'):
             zero_point = getattr(args, 'zero_point')
         for (t, s, zp) in zip(tensor_name, scale, zero_point):
             circle_quantizer_cmd.append('--force_quantparam')
@@ -487,15 +488,15 @@ def _write_qparam(args):
             circle_quantizer_cmd.append(str(s))
             circle_quantizer_cmd.append(str(zp))
         # input and output path
-        if _utils._is_valid_attr(args, 'input_path'):
+        if oneutils.is_valid_attr(args, 'input_path'):
             circle_quantizer_cmd.append(getattr(args, 'input_path'))
-        if _utils._is_valid_attr(args, 'output_path'):
+        if oneutils.is_valid_attr(args, 'output_path'):
             circle_quantizer_cmd.append(getattr(args, 'output_path'))
 
         f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
 
         # run circle-quantizer
-        _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
 
 def _copy_qparam(args):
@@ -510,26 +511,26 @@ def _copy_qparam(args):
         # make a command to write qparams to the tensors
         circle_quantizer_cmd = [circle_quantizer_path]
         # verbose
-        if _utils._is_valid_attr(args, 'verbose'):
+        if oneutils.is_valid_attr(args, 'verbose'):
             circle_quantizer_cmd.append('--verbose')
-        if _utils._is_valid_attr(args, 'src_tensor_name'):
+        if oneutils.is_valid_attr(args, 'src_tensor_name'):
             src_tensor_name = getattr(args, 'src_tensor_name')
-        if _utils._is_valid_attr(args, 'dst_tensor_name'):
+        if oneutils.is_valid_attr(args, 'dst_tensor_name'):
             dst_tensor_name = getattr(args, 'dst_tensor_name')
         for (src, dst) in zip(src_tensor_name, dst_tensor_name):
             circle_quantizer_cmd.append('--copy_quantparam')
             circle_quantizer_cmd.append(src)
             circle_quantizer_cmd.append(dst)
         # input and output path
-        if _utils._is_valid_attr(args, 'input_path'):
+        if oneutils.is_valid_attr(args, 'input_path'):
             circle_quantizer_cmd.append(getattr(args, 'input_path'))
-        if _utils._is_valid_attr(args, 'output_path'):
+        if oneutils.is_valid_attr(args, 'output_path'):
             circle_quantizer_cmd.append(getattr(args, 'output_path'))
 
         f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
 
         # run circle-quantizer
-        _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+        oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
 
 def _fake_quantize(args):
@@ -556,7 +557,7 @@ def main():
     args = _parse_arg(parser)
 
     # parse configuration file
-    _utils._parse_cfg(args, 'one-quantize')
+    oneutils.parse_cfg(args.config, 'one-quantize', args)
 
     # set default values
     _set_default_values(args)
@@ -569,4 +570,4 @@ def main():
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index a5ba636..4cc7740 100644 (file)
@@ -27,7 +27,7 @@ import sys
 
 from onelib.CfgRunner import CfgRunner
 from onelib.WorkflowRunner import WorkflowRunner
-import utils as _utils
+import onelib.utils as oneutils
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -53,7 +53,7 @@ def _call_driver(driver_name, options):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     driver_path = os.path.join(dir_path, driver_name)
     cmd = [driver_path] + options
-    _utils._run(cmd)
+    oneutils.run(cmd)
 
 
 def _check_subtool_exists():
@@ -71,9 +71,9 @@ def _get_parser():
     onecc_desc = 'Run ONE driver via several commands or configuration file'
     parser = argparse.ArgumentParser(description=onecc_desc, usage=onecc_usage)
 
-    _utils._add_default_arg(parser)
+    oneutils.add_default_arg(parser)
 
-    opt_name_list = _utils._get_optimization_list(get_name=True)
+    opt_name_list = oneutils.get_optimization_list(get_name=True)
     opt_name_list = ['-' + s for s in opt_name_list]
     if not opt_name_list:
         opt_help_message = '(No available optimization options)'
@@ -106,7 +106,7 @@ def _parse_arg(parser):
     args = parser.parse_args()
     # print version
     if args.version:
-        _utils._print_version_and_exit(__file__)
+        oneutils.print_version_and_exit(__file__)
 
     return args
 
@@ -114,13 +114,13 @@ def _parse_arg(parser):
 def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
-    if not _utils._is_valid_attr(args, 'config') and not _utils._is_valid_attr(
+    if not oneutils.is_valid_attr(args, 'config') and not oneutils.is_valid_attr(
             args, 'workflow'):
         parser.error('-C/--config or -W/--workflow argument is required')
     # check if given optimization option exists
-    opt_name_list = _utils._get_optimization_list(get_name=True)
-    opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list]
-    if _utils._is_valid_attr(args, 'O'):
+    opt_name_list = oneutils.get_optimization_list(get_name=True)
+    opt_name_list = [oneutils.remove_prefix(s, 'O') for s in opt_name_list]
+    if oneutils.is_valid_attr(args, 'O'):
         if ' ' in getattr(args, 'O'):
             parser.error('Not allowed to have space in the optimization name')
         if not getattr(args, 'O') in opt_name_list:
@@ -147,16 +147,16 @@ def main():
     _verify_arg(parser, args)
 
     bin_dir = os.path.dirname(os.path.realpath(__file__))
-    if _utils._is_valid_attr(args, 'config'):
+    if oneutils.is_valid_attr(args, 'config'):
         runner = CfgRunner(args.config)
         runner.detect_import_drivers(bin_dir)
-        if _utils._is_valid_attr(args, 'O'):
+        if oneutils.is_valid_attr(args, 'O'):
             runner.add_opt(getattr(args, 'O'))
         runner.run(bin_dir)
-    elif _utils._is_valid_attr(args, 'workflow'):
+    elif oneutils.is_valid_attr(args, 'workflow'):
         runner = WorkflowRunner(args.workflow)
         runner.run(bin_dir)
 
 
 if __name__ == '__main__':
-    _utils._safemain(main, __file__)
+    oneutils.safemain(main, __file__)
index 6f6a4e2..c9968b4 100644 (file)
@@ -1,3 +1,7 @@
+; set environment variables
+[Environment]
+ONECC_ENV="ONECC"
+
 ; To activate a step (or task),
 ; set True for the step in [onecc] section and fill options in the corresponding section
 [onecc]
@@ -20,6 +24,10 @@ one-codegen=False
 one-profile=False
 ; infer
 one-infer=False
+; group option
+; multiple group options are allowed
+include=O1
+# include=O1 O2 OMY_OPT
 
 [one-import-tf]
 # mandatory
index c66e5b4..fe1c41a 100644 (file)
@@ -18,7 +18,7 @@ import configparser
 import os
 import warnings
 
-import utils as oneutils
+import onelib.utils as oneutils
 
 
 def _simple_warning(message, category, filename, lineno, file=None, line=None):
@@ -46,6 +46,15 @@ class CfgRunner:
         self.import_drivers = [
             'one-import-bcq', 'one-import-onnx', 'one-import-tf', 'one-import-tflite'
         ]
+        # parse group option
+        GROUP_OPTION_KEY = 'include'
+        if self.cfgparser.has_option('onecc', GROUP_OPTION_KEY):
+            groups = self.cfgparser['onecc'][GROUP_OPTION_KEY].split()
+            for o in groups:
+                if o == 'O' or not o.startswith('O'):
+                    raise ValueError('Invalid group option')
+                # add_opt receives group name except first 'O'
+                self.add_opt(o[1:])
 
     def _verify_cfg(self, cfgparser):
         if not cfgparser.has_section('onecc'):
@@ -67,8 +76,8 @@ class CfgRunner:
         # make option names case sensitive
         self.optparser.optionxform = str
         opt_book = dict(
-            zip(oneutils._get_optimization_list(get_name=True),
-                oneutils._get_optimization_list()))
+            zip(oneutils.get_optimization_list(get_name=True),
+                oneutils.get_optimization_list()))
         parsed = self.optparser.read(opt_book['O' + opt])
         if not parsed:
             raise FileNotFoundError('Not found given optimization configuration file')
@@ -80,9 +89,15 @@ class CfgRunner:
         self.opt = opt
 
     def detect_import_drivers(self, dir):
-        self.import_drivers = list(oneutils._detect_one_import_drivers(dir).keys())
+        self.import_drivers = list(oneutils.detect_one_import_drivers(dir).keys())
 
     def run(self, working_dir, verbose=False):
+        # set environment
+        CFG_ENV_SECTION = 'Environment'
+        if self.cfgparser.has_section(CFG_ENV_SECTION):
+            for key in self.cfgparser[CFG_ENV_SECTION]:
+                os.environ[key] = self.cfgparser[CFG_ENV_SECTION][key]
+
         section_to_run = []
         for d in self.import_drivers + self.driver_sequence:
             if self._is_available(d):
@@ -96,4 +111,4 @@ class CfgRunner:
                 options.append('--verbose')
             driver_path = os.path.join(working_dir, section)
             cmd = [driver_path] + options
-            oneutils._run(cmd)
+            oneutils.run(cmd)
diff --git a/compiler/one-cmds/onelib/Command.py b/compiler/one-cmds/onelib/Command.py
new file mode 100644 (file)
index 0000000..35a9567
--- /dev/null
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onelib.utils as oneutils
+
+
+class Command:
+    def __init__(self, driver, args, log_file):
+        self.cmd = [driver]
+        self.driver = driver
+        self.args = args
+        self.log_file = log_file
+
+    # Add option if attrs are valid
+    # Option values are collected from self.args
+    def add_option_with_valid_args(self, option, attrs):
+        for attr in attrs:
+            if not oneutils.is_valid_attr(self.args, attr):
+                return self
+        self.cmd.append(option)
+        for attr in attrs:
+            self.cmd.append(getattr(self.args, attr))
+        return self
+
+    # Add option and values without any condition
+    def add_option_with_values(self, option, values):
+        self.cmd.append(option)
+        for value in values:
+            self.cmd.append(value)
+        return self
+
+    # Add option with no argument (ex: --verbose) if attr is valid
+    def add_noarg_option_if_valid_arg(self, option, attr):
+        if oneutils.is_valid_attr(self.args, attr):
+            self.cmd.append(option)
+        return self
+
+    # Run cmd and save logs
+    def run(self):
+        self.log_file.write((' '.join(self.cmd) + '\n').encode())
+        oneutils.run(self.cmd, err_prefix=self.driver, logfile=self.log_file)
index 0482dd9..52bd253 100644 (file)
@@ -20,7 +20,7 @@ import os
 from onelib.OptionBuilder import OptionBuilder
 from onelib.TopologicalSortHelper import TopologicalSortHelper
 from onelib.CfgRunner import CfgRunner
-import utils as oneutils
+import onelib.utils as oneutils
 
 
 class WorkflowRunner:
@@ -124,7 +124,7 @@ class WorkflowRunner:
                     # get the absolute path of the caller
                     driver_path = os.path.join(working_dir, driver_name)
                     cmd = [driver_path] + options
-                    oneutils._run(cmd)
+                    oneutils.run(cmd)
             elif self.CFG_REFERENCE_K in workflow:
                 cfg_path = workflow[self.CFG_REFERENCE_K]['path']
                 runner = CfgRunner(cfg_path)
index 7dd79b6..0f338fc 100644 (file)
 
 class CONSTANT:
     __slots__ = ()  # This prevents access via __dict__.
+
+    # Basic optimization passes
+    # These passes do not change the execution result of the model
+    O1 = (
+        # Constant folding
+        'fold_add_v2',
+        'fold_cast',
+        'fold_densify',
+        'fold_dequantize',
+        'fold_dwconv',
+        'fold_fully_connected',
+        'fold_gather',
+        'fold_sparse_to_dense',
+
+        # Operator fusion
+        'fuse_add_with_tconv',
+        'fuse_add_with_fully_connected',
+        'fuse_batchnorm_with_conv',
+        'fuse_batchnorm_with_dwconv',
+        'fuse_batchnorm_with_tconv',
+        'fuse_activation_function',
+        'fuse_instnorm',
+        'fuse_prelu',
+        'fuse_mean_with_mean',
+        'fuse_transpose_with_mean',
+        'transform_min_max_to_relu6',
+        'transform_min_relu_to_relu6',
+
+        # Remove redundant operators
+        'remove_redundant_reshape',
+        'remove_redundant_transpose',
+        'remove_unnecessary_reshape',
+        'remove_unnecessary_slice',
+        'remove_unnecessary_strided_slice',
+        'remove_unnecessary_split',
+
+        # Canonicalization
+        # (passes to help further optimization)
+        'resolve_customop_add',
+        'resolve_customop_batchmatmul',
+        'resolve_customop_matmul',
+        'resolve_customop_max_pool_with_argmax',
+        'resolve_customop_splitv',
+        'substitute_pack_to_reshape',
+        'substitute_padv2_to_pad',
+        'substitute_splitv_to_split',
+        'substitute_squeeze_to_reshape',
+        'substitute_strided_slice_to_reshape',
+        'substitute_transpose_to_reshape',
+        'forward_reshape_to_unaryop',
+        'forward_transpose_op',
+        'replace_non_const_fc_with_batch_matmul',  # For quantization
+    )
+
     OPTIMIZATION_OPTS = (
         # (OPTION_NAME, HELP_MESSAGE)
         ('convert_nchw_to_nhwc',
@@ -32,9 +86,11 @@ class CONSTANT:
         ('fold_densify', 'fold Densify op with sparse constant input'),
         ('fold_dequantize', 'fold Dequantize op'),
         ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
+        ('fold_fully_connected', 'fold FullyConnected op with constant inputs'),
         ('fold_gather', 'fold Gather op'),
         ('fold_sparse_to_dense', 'fold SparseToDense op'),
         ('forward_reshape_to_unaryop', 'Forward Reshape op'),
+        ('forward_transpose_op', 'Forward Transpose op'),
         ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
         ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
         ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
@@ -52,6 +108,7 @@ class CONSTANT:
          ' So, use it only when the impact is known to be acceptable.'),
         ('fuse_activation_function', 'fuse Activation function to a preceding operator'),
         ('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
+        ('fuse_prelu', 'fuse ops to PReLU operator'),
         ('replace_cw_mul_add_with_depthwise_conv',
          'replace channel-wise Mul/Add with DepthwiseConv2D'),
         ('remove_fakequant', 'remove FakeQuant ops'),
@@ -85,7 +142,8 @@ class CONSTANT:
         ('substitute_transpose_to_reshape',
          'convert certain condition Transpose to Reshape'),
         ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'),
-        ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'))
+        ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'),
+        ('unroll_unidirseqlstm', 'unroll UnidirectionalSequenceLSTM op'))
 
 
 CONSTANT = CONSTANT()
diff --git a/compiler/one-cmds/onelib/export_constant.py b/compiler/one-cmds/onelib/export_constant.py
new file mode 100644 (file)
index 0000000..7a2de1e
--- /dev/null
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from constant import CONSTANT
+
+import argparse
+import configparser
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Export CONSTANT value with given file format.')
+    parser.add_argument(
+        '-c', '--constant', type=str, required=True, help='Constant name to export')
+    parser.add_argument(
+        '-f',
+        '--format',
+        type=str,
+        required=True,
+        choices=['cfg', 'txt'],
+        help=
+        'File format to export. The created cfg file contains CONSTANT under the one-optimize section.'
+    )
+    parser.add_argument(
+        '--exclusive',
+        action='store_true',
+        help='Exports the rest of the options except for the given constant')
+    parser.add_argument(
+        '-o', '--output_path', type=str, required=True, help='Path to output')
+
+    args = parser.parse_args()
+
+    if not hasattr(CONSTANT, args.constant):
+        raise NameError('Not found given constant name')
+
+    if args.exclusive:
+        constant_to_exclude = getattr(CONSTANT, args.constant)
+        constant_to_export = []
+        for opt in CONSTANT.OPTIMIZATION_OPTS:
+            if opt[0] in constant_to_exclude:
+                continue
+            constant_to_export.append(opt[0])
+    else:
+        constant_to_export = getattr(CONSTANT, args.constant)
+
+    if args.format == 'cfg':
+        SECTION_TO_EXPORT = 'one-optimize'
+        config = configparser.ConfigParser()
+        config[SECTION_TO_EXPORT] = dict()
+        for constant in constant_to_export:
+            config[SECTION_TO_EXPORT][constant] = 'True'
+
+        with open(args.output_path, 'w') as f:
+            config.write(f)
+
+    if args.format == 'txt':
+        with open(args.output_path, 'w') as f:
+            for constant in constant_to_export:
+                f.write(f"{constant}\n")
+
+
+if __name__ == '__main__':
+    main()
index 0015e83..068b549 100644 (file)
@@ -20,7 +20,7 @@ import sys
 import onelib.constant as _constant
 
 
-def _is_valid_attr(args, attr):
+def is_valid_attr(args, attr):
     return hasattr(args, attr) and getattr(args, attr)
 
 
@@ -28,45 +28,45 @@ def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path):
     """make a command for running tf2tfliteV2.py"""
     cmd = [sys.executable, os.path.expanduser(driver_path)]
     # verbose
-    if _is_valid_attr(args, 'verbose'):
+    if is_valid_attr(args, 'verbose'):
         cmd.append('--verbose')
     # model_format
-    if _is_valid_attr(args, 'model_format_cmd'):
+    if is_valid_attr(args, 'model_format_cmd'):
         cmd.append(getattr(args, 'model_format_cmd'))
-    elif _is_valid_attr(args, 'model_format'):
+    elif is_valid_attr(args, 'model_format'):
         cmd.append('--' + getattr(args, 'model_format'))
     else:
         cmd.append('--graph_def')  # default value
     # converter version
-    if _is_valid_attr(args, 'converter_version_cmd'):
+    if is_valid_attr(args, 'converter_version_cmd'):
         cmd.append(getattr(args, 'converter_version_cmd'))
-    elif _is_valid_attr(args, 'converter_version'):
+    elif is_valid_attr(args, 'converter_version'):
         cmd.append('--' + getattr(args, 'converter_version'))
     else:
         cmd.append('--v1')  # default value
     # input_path
-    if _is_valid_attr(args, 'input_path'):
+    if is_valid_attr(args, 'input_path'):
         cmd.append('--input_path')
         cmd.append(os.path.expanduser(input_path))
     # output_path
-    if _is_valid_attr(args, 'output_path'):
+    if is_valid_attr(args, 'output_path'):
         cmd.append('--output_path')
         cmd.append(os.path.expanduser(output_path))
     # input_arrays
-    if _is_valid_attr(args, 'input_arrays'):
+    if is_valid_attr(args, 'input_arrays'):
         cmd.append('--input_arrays')
         cmd.append(getattr(args, 'input_arrays'))
     # input_shapes
-    if _is_valid_attr(args, 'input_shapes'):
+    if is_valid_attr(args, 'input_shapes'):
         cmd.append('--input_shapes')
         cmd.append(getattr(args, 'input_shapes'))
     # output_arrays
-    if _is_valid_attr(args, 'output_arrays'):
+    if is_valid_attr(args, 'output_arrays'):
         cmd.append('--output_arrays')
         cmd.append(getattr(args, 'output_arrays'))
 
     # experimental options
-    if _is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'):
+    if is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'):
         cmd.append('--experimental_disable_batchmatmul_unfold')
 
     return cmd
@@ -82,12 +82,12 @@ def make_circle2circle_cmd(args, driver_path, input_path, output_path):
     """make a command for running circle2circle"""
     cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
     # profiling
-    if _is_valid_attr(args, 'generate_profile_data'):
+    if is_valid_attr(args, 'generate_profile_data'):
         cmd.append('--generate_profile_data')
     # optimization pass(only true/false options)
     # TODO support options whose number of arguments is more than zero
     for opt in _constant.CONSTANT.OPTIMIZATION_OPTS:
-        if _is_valid_attr(args, opt[0]):
+        if is_valid_attr(args, opt[0]):
             # ./driver --opt[0]
             if type(getattr(args, opt[0])) is bool:
                 cmd.append('--' + opt[0])
diff --git a/compiler/one-cmds/onelib/utils.py b/compiler/one-cmds/onelib/utils.py
new file mode 100644 (file)
index 0000000..3d37f9d
--- /dev/null
@@ -0,0 +1,273 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import configparser
+import glob
+import importlib
+import ntpath
+import os
+import subprocess
+import sys
+
+from typing import Union
+
+import onelib.constant as _constant
+
+
+def add_default_arg(parser):
+    # version
+    parser.add_argument(
+        '-v',
+        '--version',
+        action='store_true',
+        help='show program\'s version number and exit')
+
+    # verbose
+    parser.add_argument(
+        '-V',
+        '--verbose',
+        action='store_true',
+        help='output additional information to stdout or stderr')
+
+    # configuration file
+    parser.add_argument('-C', '--config', type=str, help='run with configuation file')
+    # section name that you want to run in configuration file
+    parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
+
+
+def add_default_arg_no_CS(parser):
+    """
+    This adds -v -V args only (no -C nor -S)
+    """
+    # version
+    parser.add_argument(
+        '-v',
+        '--version',
+        action='store_true',
+        help='show program\'s version number and exit')
+
+    # verbose
+    parser.add_argument(
+        '-V',
+        '--verbose',
+        action='store_true',
+        help='output additional information to stdout or stderr')
+
+
+def is_accumulated_arg(arg, driver):
+    if driver == "one-quantize":
+        accumulables = [
+            "tensor_name", "scale", "zero_point", "src_tensor_name", "dst_tensor_name"
+        ]
+        if arg in accumulables:
+            return True
+
+    return False
+
+
+def is_valid_attr(args, attr):
+    return hasattr(args, attr) and getattr(args, attr)
+
+
+def parse_cfg_and_overwrite(config_path, section, args):
+    """
+    parse given section of configuration file and set the values of args.
+    Even if the values parsed from the configuration file already exist in args,
+    the values are overwritten.
+    """
+    if config_path == None:
+        # DO NOTHING
+        return
+    config = configparser.ConfigParser()
+    # make option names case sensitive
+    config.optionxform = str
+    parsed = config.read(config_path)
+    if not parsed:
+        raise FileNotFoundError('Not found given configuration file')
+    if not config.has_section(section):
+        raise AssertionError('configuration file doesn\'t have \'' + section +
+                             '\' section')
+    for key in config[section]:
+        setattr(args, key, config[section][key])
+    # TODO support accumulated arguments
+
+
+def parse_cfg(config_path: Union[str, None], section_to_parse: str, args):
+    """
+    parse configuration file and store the information to args
+    
+    :param config_path: path to configuration file
+    :param section_to_parse: section name to parse
+    :param args: object to store the parsed information
+    """
+    if config_path is None:
+        return
+
+    parser = configparser.ConfigParser()
+    parser.optionxform = str
+    parser.read(config_path)
+
+    if not parser.has_section(section_to_parse):
+        raise AssertionError('configuration file must have \'' + section_to_parse +
+                             '\' section')
+
+    for key in parser[section_to_parse]:
+        if is_accumulated_arg(key, section_to_parse):
+            if not is_valid_attr(args, key):
+                setattr(args, key, [parser[section_to_parse][key]])
+            else:
+                getattr(args, key).append(parser[section_to_parse][key])
+            continue
+        if hasattr(args, key) and getattr(args, key):
+            continue
+        setattr(args, key, parser[section_to_parse][key])
+
+
+def print_version_and_exit(file_path):
+    """print version of the file located in the file_path"""
+    script_path = os.path.realpath(file_path)
+    dir_path = os.path.dirname(script_path)
+    script_name = os.path.splitext(os.path.basename(script_path))[0]
+    # run one-version
+    subprocess.call([os.path.join(dir_path, 'one-version'), script_name])
+    sys.exit()
+
+
+def safemain(main, mainpath):
+    """execute given method and print with program name for all uncaught exceptions"""
+    try:
+        main()
+    except Exception as e:
+        prog_name = os.path.basename(mainpath)
+        print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+        sys.exit(255)
+
+
+def run(cmd, err_prefix=None, logfile=None):
+    """Execute command in subprocess
+
+    Args:
+        cmd: command to be executed in subprocess
+        err_prefix: prefix to be put before every stderr lines
+        logfile: file stream to which both of stdout and stderr lines will be written
+    """
+    with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
+        import select
+        inputs = set([p.stdout, p.stderr])
+        while inputs:
+            readable, _, _ = select.select(inputs, [], [])
+            for x in readable:
+                line = x.readline()
+                if len(line) == 0:
+                    inputs.discard(x)
+                    continue
+                if x == p.stdout:
+                    out = sys.stdout
+                if x == p.stderr:
+                    out = sys.stderr
+                    if err_prefix:
+                        line = f"{err_prefix}: ".encode() + line
+                out.buffer.write(line)
+                out.buffer.flush()
+                if logfile != None:
+                    logfile.write(line)
+    if p.returncode != 0:
+        sys.exit(p.returncode)
+
+
+def remove_prefix(str, prefix):
+    if str.startswith(prefix):
+        return str[len(prefix):]
+    return str
+
+
+def remove_suffix(str, suffix):
+    if str.endswith(suffix):
+        return str[:-len(suffix)]
+    return str
+
+
+def get_optimization_list(get_name=False):
+    """
+    returns a list of optimization. If `get_name` is True,
+    only basename without extension is returned rather than full file path.
+
+    [one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test
+
+    Optimization options must be placed in `optimization` folder
+    """
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+
+    # optimization folder
+    files = [
+        f for f in glob.glob(dir_path + '/../../optimization/O*.cfg', recursive=True)
+    ]
+    # exclude if the name has space
+    files = [s for s in files if not ' ' in s]
+
+    opt_list = []
+    for cand in files:
+        base = ntpath.basename(cand)
+        if os.path.isfile(cand) and os.access(cand, os.R_OK):
+            opt_list.append(cand)
+
+    if get_name == True:
+        # NOTE the name includes prefix 'O'
+        # e.g. O1, O2, ONCHW not just 1, 2, NCHW
+        opt_list = [ntpath.basename(f) for f in opt_list]
+        opt_list = [remove_suffix(s, '.cfg') for s in opt_list]
+
+    return opt_list
+
+
+def detect_one_import_drivers(search_path):
+    """Looks for import drivers in given directory
+
+    Args:
+        search_path: path to the directory where to search import drivers
+
+    Returns:
+    dict: each entry is related to single detected driver,
+          key is a config section name, value is a driver name
+
+    """
+    import_drivers_dict = {}
+    for module_name in os.listdir(search_path):
+        full_path = os.path.join(search_path, module_name)
+        if not os.path.isfile(full_path):
+            continue
+        if module_name.find("one-import-") != 0:
+            continue
+        module_loader = importlib.machinery.SourceFileLoader(module_name, full_path)
+        module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+        module = importlib.util.module_from_spec(module_spec)
+        try:
+            module_loader.exec_module(module)
+            if hasattr(module, "get_driver_cfg_section"):
+                section = module.get_driver_cfg_section()
+                import_drivers_dict[section] = module_name
+        except:
+            pass
+    return import_drivers_dict
index 17f55ec..8c006c1 100644 (file)
@@ -111,3 +111,42 @@ add_subdirectory(onnx-operations)
 if(ENABLE_ONE_IMPORT_PYTORCH)
   add_subdirectory(pytorch-operations)
 endif(ENABLE_ONE_IMPORT_PYTORCH)
+
+# Generate group option list for tests
+get_filename_component(ONE_CMDS_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
+set(ONE_PYTHON_DIR "onelib")
+set(CONSTANT_EXPORTING_SCRIPT "${ONE_CMDS_DIR}/${ONE_PYTHON_DIR}/export_constant.py")
+set(O1_OPTION "O1")
+set(O1_TXT_FILE "${O1_OPTION}.list")
+set(O1_TXT_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${O1_TXT_FILE}")
+set(NON_O1_TXT_FILE "non-${O1_OPTION}.list")
+set(NON_O1_TXT_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${NON_O1_TXT_FILE}")
+
+add_custom_command(OUTPUT ${O1_TXT_FILE_BIN}
+  COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT} --constant ${O1_OPTION}
+                                    --format txt --output_path ${O1_TXT_FILE_BIN}
+  DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+  COMMENT "Generate ${O1_TXT_FILE}"
+)
+
+add_custom_command(OUTPUT ${NON_O1_TXT_FILE_BIN}
+  COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT} --constant ${O1_OPTION}
+                                    --format txt --output_path ${NON_O1_TXT_FILE_BIN}
+                                    --exclusive
+  DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+  COMMENT "Generate ${NON_O1_TXT_FILE}"
+)
+
+add_custom_target("O1_txt_target" ALL DEPENDS ${O1_TXT_FILE_BIN} ${NON_O1_TXT_FILE_BIN})
+
+install(FILES ${O1_TXT_FILE_BIN}
+        PERMISSIONS OWNER_WRITE OWNER_READ
+                         GROUP_READ
+                         WORLD_READ
+        DESTINATION test)
+
+install(FILES ${NON_O1_TXT_FILE_BIN}
+        PERMISSIONS OWNER_WRITE OWNER_READ
+                         GROUP_READ
+                         WORLD_READ
+        DESTINATION test)
index 30f48e1..e6c6ee7 100644 (file)
@@ -30,8 +30,11 @@ trap trap_err_onexit ERR
 configfile="one-build_001.cfg"
 outputfile="inception_v3.opt.circle"
 
+rm -f ${filename}.log
+rm -f ${outputfile}
+
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 1485df9..c29d422 100644 (file)
@@ -30,8 +30,11 @@ trap trap_err_onexit ERR
 configfile="one-build_002.cfg"
 outputfile="inception_v3_pkg"
 
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index fd9585b..6337b50 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="one-build_003.cfg"
 outputfile="inception_v3.quantized.circle"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 3e72978..5406355 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="one-build_004.cfg"
 outputfile="sample.tvn"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 3a69cc3..f003be5 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="one-build_005.cfg"
 outputfile="sample.tvn"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index ff389a8..0777015 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="one-build_006.cfg"
 outputfile="sample.tvn"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 4ad68b5..4fd6a2a 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="one-build_007.cfg"
 outputfile="inception_v3_pkg"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 2f8c4c2..d19b95f 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="one-build_008.cfg"
 outputfile="test_onnx_model.bin"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 56c0505..ae53519 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="one-build_009.cfg"
 outputfile="onnx_conv2d_conv2d.bin"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index ec31639..b76e81c 100644 (file)
@@ -31,11 +31,12 @@ configfile="one-build_010.cfg"
 outputfile="inception_v3.alt.circle"
 intermfile="inception_v3.alt.tflite"
 
-rm -rf ${outputfile}
-rm -rf ${intermfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
+rm -f ${intermfile}
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 29b4213..efd7143 100644 (file)
@@ -31,11 +31,12 @@ configfile="one-build_011.cfg"
 outputfile="test_onnx_model.circle"
 intermfile="test_onnx_model.tflite"
 
-rm -rf ${outputfile}
-rm -rf ${intermfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
+rm -f ${intermfile}
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 445a948..5f123cd 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="one-build_012.cfg"
 outputfile="inception_v3.list.quantized.circle"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 1b405fb..9a71f02 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="one-build_013.cfg"
 outputfile="inception_v3.dir.quantized.circle"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 # run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index b3acbf5..10e6cc6 100644 (file)
@@ -55,7 +55,8 @@ trap trap_err_onexit ERR
 configfile="one-build_014.cfg"
 outputfile="inception_v3.opt.circle"
 
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
 
 if [ ! -d "../optimization" ]; then
   mkdir -p ../optimization
@@ -69,7 +70,7 @@ LUCI_LOG=5 one-build -C ${configfile} -OONE-BUILD_014 > ${filename}.log 2>&1
 
 clean_envir
 
-if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
     echo "${filename_ext} SUCCESS"
     exit 0
 fi
index 15550d8..dece9c0 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="one-build_neg_002.cfg"
 
+rm -f ${filename}.log
+
 # run test
 one-build -C ${configfile} > ${filename}.log 2>&1
 
index bcbd2f9..c793d13 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="one-build_neg_003.cfg"
 
+rm -f ${filename}.log
+
 # run test
 one-build -C ${configfile} > ${filename}.log 2>&1
 
index aebdfe5..b0d4e61 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="one-build_neg_004.cfg"
 
+rm -f ${filename}.log
+
 # run test
 one-build -C ${configfile} > ${filename}.log 2>&1
 
index b4e43d0..9b25111 100644 (file)
@@ -33,6 +33,7 @@ intermfile="inception_v3.alt.tflite"
 
 rm -rf ${outputfile}
 rm -rf ${intermfile}
+rm -f ${filename}.log
 
 # run test
 one-build -C ${configfile} > ${filename}.log 2>&1
index 8723df2..d228f8f 100644 (file)
@@ -33,6 +33,7 @@ intermfile="test_onnx_model.tflite"
 
 rm -rf ${outputfile}
 rm -rf ${intermfile}
+rm -f ${filename}.log
 
 # run test
 one-build -C ${configfile} > ${filename}.log 2>&1
index 5c5d9af..59a5d25 100644 (file)
@@ -62,6 +62,8 @@ touch ../optimization/OONE_BUILD_NEG_007.cfg
 
 configfile=".."
 
+rm -f ${filename}.log
+
 # run test
 one-build -C ${configfile} -OONE_BUILD_NEG_007 > ${filename}.log 2>&1
 
index 8ed2871..2b41222 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile=".."
 
+rm -f ${filename}.log
+
 # run test
 one-build -C ${configfile} -OONE_BUILD_NEG_008 > ${filename}.log 2>&1
 
index 8d9c831..5e3698d 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile=".."
 
+rm -f ${filename}.log
+
 # run test
 one-build -C ${configfile} "-O SPACE OPTION" > ${filename}.log 2>&1
 
index 80d149e..dbc4ad7 100644 (file)
@@ -25,6 +25,8 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # copy help-compile to bin folder
 cp help-compile ../bin/help-compile
 
index 8af539d..95e7b7b 100644 (file)
@@ -31,6 +31,7 @@ trap trap_err_onexit ERR
 outputfile="sample.tvn"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
index 889c389..f283ec3 100644 (file)
@@ -31,6 +31,7 @@ trap trap_err_onexit ERR
 outputfile="sample.tvn"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
index 384689f..485f591 100644 (file)
@@ -27,6 +27,8 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # run test
 one-codegen -h > ${filename}.log 2>&1
 
index fd5d0cb..27db028 100644 (file)
@@ -32,6 +32,8 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # run test
 one-codegen > ${filename}.log 2>&1
 
index fcca10e..a414eed 100644 (file)
@@ -28,6 +28,7 @@ trap trap_err_onexit ERR
 inputfile="./bcq.pb"
 outputfile="./bcq.circle"
 
+rm -f ${filename}.log
 rm -rf $outputfile
 
 # run test
@@ -35,7 +36,7 @@ one-import-bcq \
 --input_path ${inputfile} \
 --output_path ${outputfile} \
 --input_arrays Placeholder \
---output_arrays MatMul > /dev/null 2>&1
+--output_arrays MatMul > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 6d0d9b2..cb8fe9e 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./bcq.pb"
 outputfile="./bcq.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index b50c72c..25c772f 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./bcq.pb"
 outputfile="./bcq.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index 0f75af8..20ef67a 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./bcq_null.pb"
 outputfile="./bcq.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index 62bfa44..44a8ae1 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./while_3.pbtxt"
 outputfile="./bcq.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index 0e6d52c..550804f 100644 (file)
@@ -35,7 +35,7 @@ trap trap_err_onexit ERR
 inputfile="./bcq.pb"
 outputfile="."
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index df7116b..7b872eb 100644 (file)
@@ -35,7 +35,7 @@ trap trap_err_onexit ERR
 inputfile="./bcq.pb"
 outputfile="./bcq.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index 521238b..50d47c0 100644 (file)
@@ -35,7 +35,7 @@ trap trap_err_onexit ERR
 inputfile="./bcq.pb"
 outputfile="./bcq.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index 7729caf..2edd33d 100644 (file)
@@ -35,7 +35,7 @@ trap trap_err_onexit ERR
 inputfile="./bcq.pb"
 outputfile="./bcq.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index e48c86d..72c1a87 100644 (file)
@@ -35,7 +35,7 @@ trap trap_err_onexit ERR
 inputfile="./bcq.pb"
 outputfile="./bcq.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-bcq \
index 6119b68..39a9607 100644 (file)
@@ -29,12 +29,12 @@ inputfile="./test_onnx_model.onnx"
 outputfile="./test_onnx_model.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-import-onnx \
 --input_path ${inputfile} \
---output_path ${outputfile} > ${outputfile}.log 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index a6a38ee..0430a8e 100644 (file)
@@ -28,7 +28,7 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 inputfile="./reshape_matmul.onnx"
-outputfile="./reshape_matmul.circle"
+outputfile="./reshape_matmul.one-import-onnx_002.circle"
 
 rm -rf ${outputfile}
 rm -rf ${outputfile}.log
@@ -42,7 +42,7 @@ if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
 fi
 
-circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
 
 if ! grep -q "FULLY_CONNECTED" "${outputfile}.log"; then
   trap_err_onexit
@@ -61,7 +61,7 @@ if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
 fi
 
-circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
 
 if ! grep -q "BATCH_MATMUL" "${outputfile}.log"; then
   trap_err_onexit
index 35c9c3c..2647337 100644 (file)
@@ -29,13 +29,14 @@ inputfile="./inception_v3.pb"
 outputfile="./inception_v3.circle"
 
 # Note: Do not remove output circle file as it's used for quantize tests
+rm -f ${filename}.log
 
 # run test
 one-import tf \
 --input_path ${inputfile} \
 --output_path ${outputfile} \
 --input_arrays input --input_shapes "1,299,299,3" \
---output_arrays InceptionV3/Predictions/Reshape_1 > /dev/null 2>&1
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 8d6ae2c..e7ede7b 100644 (file)
@@ -9,7 +9,7 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3_cfg.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
index 232becf..24f673c 100644 (file)
@@ -28,13 +28,18 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 configfile="one-import_002.cfg"
-outputfile="inception_v3_cmd.circle"
+outputfile_cmd="inception_v3_cmd.circle"
+outputfile_cfg="inception_v3_cfg.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile_cmd}
+rm -f ${outputfile_cfg}
 
 # run test
 one-import tf -C ${configfile} \
---output_path=inception_v3_cmd.circle > /dev/null 2>&1
+--output_path=${outputfile_cmd} > ${filename}.log 2>&1
 
-if [[ ! -s "${outputfile}" ]]; then
+if [[ ! -s "${outputfile_cmd}" ]]; then
   trap_err_onexit
 fi
 
index cd10698..92e4b15 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="one-import_003.cfg"
 outputfile="test_saved_model.circle"
 
+rm -f ${filename}.log
 rm -f ${outputfile}
 
 # run test
-one-import tf -C ${configfile} > /dev/null 2>&1
+one-import tf -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index fb30d40..42c1692 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="one-import_004.cfg"
 outputfile="test_keras_model.circle"
 
+rm -f ${filename}.log
 rm -f ${outputfile}
 
 # run test
-one-import tf -C ${configfile} > /dev/null 2>&1
+one-import tf -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 385afb5..75122d6 100644 (file)
@@ -31,6 +31,7 @@ configfile="one-import_005.cfg"
 outputfile="test_onnx_model.circle"
 
 rm -f ${outputfile}
+rm -f ${filename}.log
 
 # run test
 one-import onnx -C ${configfile} > ${filename}.log 2>&1
index 88c6da5..a9be173 100644 (file)
@@ -31,6 +31,7 @@ inputfile="test_onnx_model.onnx"
 outputfile="test_onnx_model.circle"
 
 rm -f ${outputfile}
+rm -f ${filename}.log
 
 # run test
 one-import onnx -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
index 3986ee4..82a3e33 100644 (file)
@@ -35,7 +35,8 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.tflite"
 outputfile="./inception_v3.circle"
 
-rm -rf ${outputfile}
+# do not remove output file
+# rm -rf ${outputfile}
 rm -rf ${filename}.log
 
 # run test
index df83033..9561cea 100644 (file)
@@ -43,7 +43,8 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.pb"
 outputfile="./inception_v3.circle"
 
-rm -rf ${outputfile}
+# do not remove output file
+# rm -rf ${outputfile}
 rm -rf ${filename}.log
 
 # run test
index 99badad..8626eb9 100644 (file)
@@ -35,7 +35,8 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.pb"
 outputfile="./inception_v3.circle"
 
-rm -rf ${outputfile}
+# do not remove output file
+# rm -rf ${outputfile}
 rm -rf ${filename}.log
 
 # run test
index 5e0b9a5..f73826f 100644 (file)
@@ -33,7 +33,7 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_005.circle"
 
 rm -rf ${outputfile}
 rm -rf ${filename}.log
index 3fb5c7d..985bd1c 100644 (file)
@@ -33,7 +33,7 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_006.circle"
 
 rm -rf ${outputfile}
 rm -rf ${filename}.log
index 09e9ed8..2552157 100644 (file)
@@ -33,7 +33,7 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_007.circle"
 
 rm -rf ${outputfile}
 rm -rf ${filename}.log
index 7ad678b..d62899d 100644 (file)
@@ -33,7 +33,7 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_008.circle"
 
 rm -rf ${outputfile}
 rm -rf ${filename}.log
index ab039ee..11512fd 100644 (file)
@@ -33,7 +33,7 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_010.circle"
 
 rm -rf ${outputfile}
 rm -rf ${filename}.log
index e7b5695..e8f1bc7 100644 (file)
@@ -29,8 +29,10 @@ trap trap_err_onexit ERR
 # copy help-infer to bin folder
 cp help-infer ../bin/help-infer
 
+rm -f ${filename}.log
+
 # run test
-one-infer -b help -- -h > ${filename}.log
+one-infer -d help-infer -- -h > ${filename}.log
 
 rm -rf ../bin/help-infer
 
index 22070de..6d22fb3 100644 (file)
@@ -35,6 +35,8 @@ fi
 # copy dummy-infer to bin folder
 cp dummy-infer ../bin/dummy-infer
 
+rm -f ${filename}.log
+
 # run test
 one-infer -d dummy-infer -- ${inputfile} > ${filename}.log
 
index e2aa459..b8fbe93 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# print one-infer's help message
+
 filename_ext="$(basename -- $0)"
 filename="${filename_ext%.*}"
 
 trap_err_onexit()
 {
   echo "${filename_ext} FAILED"
-  rm -rf ../bin/dummy-infer
   exit 255
 }
 
 trap trap_err_onexit ERR
 
-inputfile="sample.tvn"
-
-if [[ ! -s "${inputfile}" ]]; then
-  touch ${inputfile}
-fi
-
-# copy dummy-infer to bin folder
-cp dummy-infer ../bin/dummy-infer
+rm -f ${filename}.log
 
 # run test
-one-infer -b dummy -- ${inputfile} > ${filename}.log
-
-rm -rf ../bin/dummy-infer
+one-infer -h > ${filename}.log
 
-if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+if grep -q "command line tool to infer model" "${filename}.log"; then
   echo "${filename_ext} SUCCESS"
   exit 0
 fi
diff --git a/compiler/one-cmds/tests/one-infer_004.cfg b/compiler/one-cmds/tests/one-infer_004.cfg
new file mode 100644 (file)
index 0000000..fd5353b
--- /dev/null
@@ -0,0 +1,3 @@
+[one-infer]
+driver=dummy-infer
+command=sample.tvn
index a4cb76c..249728c 100644 (file)
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# print one-infer's help message
+# one-infer with configuration input
 
 filename_ext="$(basename -- $0)"
 filename="${filename_ext%.*}"
@@ -22,15 +22,30 @@ filename="${filename_ext%.*}"
 trap_err_onexit()
 {
   echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
   exit 255
 }
 
 trap trap_err_onexit ERR
 
+configfile="one-infer_004.cfg"
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
 # run test
-one-infer -h > ${filename}.log
+one-infer -C ${configfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
 
-if grep -q "command line tool to infer model" "${filename}.log"; then
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
   echo "${filename_ext} SUCCESS"
   exit 0
 fi
diff --git a/compiler/one-cmds/tests/one-infer_005.cfg b/compiler/one-cmds/tests/one-infer_005.cfg
deleted file mode 100644 (file)
index aca6878..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-[one-infer]
-backend=dummy
-command=sample.tvn
index a44dd0e..7a921fb 100644 (file)
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# one-infer with configuration input
+# one-infer with post process script
 
 filename_ext="$(basename -- $0)"
 filename="${filename_ext%.*}"
@@ -28,7 +28,6 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
-configfile="one-infer_005.cfg"
 inputfile="sample.tvn"
 
 if [[ ! -s "${inputfile}" ]]; then
@@ -38,14 +37,19 @@ fi
 # copy dummy-infer to bin folder
 cp dummy-infer ../bin/dummy-infer
 
+rm -f ${filename}.log
+
 # run test
-one-infer -C ${configfile} > ${filename}.log
+one-infer -d dummy-infer --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1
+return_code=$?
 
 rm -rf ../bin/dummy-infer
 
 if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
-  echo "${filename_ext} SUCCESS"
-  exit 0
+  if [ "$return_code" -eq "0" ]; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
 fi
 
 trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_006.test b/compiler/one-cmds/tests/one-infer_006.test
deleted file mode 100644 (file)
index 2612133..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# one-infer with post process script
-
-filename_ext="$(basename -- $0)"
-filename="${filename_ext%.*}"
-
-trap_err_onexit()
-{
-  echo "${filename_ext} FAILED"
-  rm -rf ../bin/dummy-infer
-  exit 255
-}
-
-trap trap_err_onexit ERR
-
-inputfile="sample.tvn"
-
-if [[ ! -s "${inputfile}" ]]; then
-  touch ${inputfile}
-fi
-
-# copy dummy-infer to bin folder
-cp dummy-infer ../bin/dummy-infer
-
-# run test
-one-infer -b dummy --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1
-return_code=$?
-
-rm -rf ../bin/dummy-infer
-
-if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
-  if [ "$return_code" -eq "0" ]; then
-    echo "${filename_ext} SUCCESS"
-    exit 0
-  fi
-fi
-
-trap_err_onexit
index 62e7211..15df58a 100644 (file)
@@ -21,7 +21,7 @@ filename="${filename_ext%.*}"
 
 trap_err_onexit()
 {
-  if grep -q "error: the following arguments are required: {-d/--driver | -b/--backend}" "${filename}.log"; then
+  if grep -q "error: the following arguments are required: -d/--driver" "${filename}.log"; then
     echo "${filename_ext} SUCCESS"
     exit 0
   fi
@@ -32,6 +32,8 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # run test
 one-infer > ${filename}.log 2>&1
 
index fa88876..1fd16f2 100644 (file)
@@ -33,8 +33,10 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # run test
-one-infer -d ${driver_name} -- -h> ${filename}.log 2>&1
+one-infer -d ${driver_name} -- -h > ${filename}.log 2>&1
 
 echo "${filename_ext} FAILED"
 exit 255
index a000552..1de4d68 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# passed backend is not found
+# one-infer with invalid post process script
 
 filename_ext="$(basename -- $0)"
 filename="${filename_ext%.*}"
-backend_name="neg"
 
 trap_err_onexit()
 {
-  if grep -q "FileNotFoundError: ${backend_name}-infer not found" "${filename}.log"; then
-    echo "${filename_ext} SUCCESS"
-    exit 0
+  return_code=$?
+  if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+    # Case of succeed of inference driver but error after it
+    if [ "$return_code" -ne "0" ]; then
+      echo "${filename_ext} SUCCESS"
+      exit 0
+    fi
   fi
 
   echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
   exit 255
 }
 
 trap trap_err_onexit ERR
 
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
 # run test
-one-infer -b ${backend_name} -- -h> ${filename}.log 2>&1
+one-infer -d dummy-infer --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1
 
+rm -rf ../bin/dummy-infer
 echo "${filename_ext} FAILED"
 exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_004.test b/compiler/one-cmds/tests/one-infer_neg_004.test
deleted file mode 100644 (file)
index b9130d0..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# both -b and -d option drivers are given as argument
-
-filename_ext="$(basename -- $0)"
-filename="${filename_ext%.*}"
-backend_name="neg"
-driver_name="neg2"
-
-trap_err_onexit()
-{
-  if grep -q "\-d and -b options are mutually exclusive. Please use only one of them" "${filename}.log"; then
-    echo "${filename_ext} SUCCESS"
-    exit 0
-  fi
-
-  echo "${filename_ext} FAILED"
-  exit 255
-}
-
-trap trap_err_onexit ERR
-
-# run test
-one-infer -d ${driver_name} -b ${backend_name} -- -h> ${filename}.log 2>&1
-
-echo "${filename_ext} FAILED"
-exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_005.test b/compiler/one-cmds/tests/one-infer_neg_005.test
deleted file mode 100644 (file)
index 9074deb..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# one-infer with invalid post process script
-
-filename_ext="$(basename -- $0)"
-filename="${filename_ext%.*}"
-
-trap_err_onexit()
-{
-  return_code=$?
-  if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
-    # Case of succeed of inference driver but error after it
-    if [ "$return_code" -ne "0" ]; then
-      echo "${filename_ext} SUCCESS"
-      exit 0
-    fi
-  fi
-
-  echo "${filename_ext} FAILED"
-  rm -rf ../bin/dummy-infer
-  exit 255
-}
-
-trap trap_err_onexit ERR
-
-inputfile="sample.tvn"
-
-if [[ ! -s "${inputfile}" ]]; then
-  touch ${inputfile}
-fi
-
-# copy dummy-infer to bin folder
-cp dummy-infer ../bin/dummy-infer
-
-# run test
-one-infer -b dummy --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1
-
-rm -rf ../bin/dummy-infer
-echo "${filename_ext} FAILED"
-exit 255
index 4152fa3..94c9062 100644 (file)
@@ -28,21 +28,13 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3-opt.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test
 one-optimize --resolve_customop_add \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 58f792b..f0a8336 100644 (file)
@@ -28,22 +28,14 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3-opt.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test
 one-optimize --resolve_customop_add \
 --change_outputs InceptionV3/Logits/SpatialSqueeze1 \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-optimize_003.test b/compiler/one-cmds/tests/one-optimize_003.test
new file mode 100644 (file)
index 0000000..11a9fb4
--- /dev/null
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./UnidirSeqLSTM.tflite"
+intermfile="./UnidirSeqLSTM.circle"
+outputfile="./UnidirSeqLSTM-opt.circle"
+
+rm -f ${intermfile}
+rm -f ${outputfile}
+rm -f ${intermfile}.log
+rm -f ${outputfile}.log
+
+# run test
+one-import-tflite \
+--input_path ${inputfile} \
+--output_path ${intermfile} > /dev/null 2>&1
+
+one-optimize --unroll_unidirseqlstm \
+--input_path ${intermfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+# check UNIDIRECTIONAL_SEQUENCE_LSTM exist
+circle-operator --code ${intermfile} > ${intermfile}.log 2>&1
+if ! grep -q "UNIDIRECTIONAL_SEQUENCE_LSTM" "${intermfile}.log"; then
+  trap_err_onexit
+fi
+
+# check UNIDIRECTIONAL_SEQUENCE_LSTM absent
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
+if grep -q "UNIDIRECTIONAL_SEQUENCE_LSTM" "${outputfile}.log"; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
index c67e3d4..f88b4f8 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./inception_v3.pb"
 outputfile="./inception_v3-opt.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-optimize --resolve_customop_add \
index a1ef702..3f37e62 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./inception_v3.circletxt"
 outputfile="./inception_v3-opt.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-optimize --resolve_customop_add \
index 668a6c2..9d6483c 100644 (file)
@@ -34,14 +34,7 @@ trap trap_err_onexit ERR
 
 inputfile="./inception_v3.circle"
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-optimize --resolve_customop_add \
index 858d3c7..b05d0fe 100644 (file)
@@ -28,21 +28,13 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfolder="nnpack"
 
+rm -f ${filename}.log
 rm -rf ${outputfolder}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test
 one-pack \
 -i ${inputfile} \
--o ${outputfolder} > /dev/null 2>&1
+-o ${outputfolder} > ${filename}.log 2>&1
 
 if [[ ! -d "${outputfolder}" ]]; then
   trap_err_onexit
index a6fba07..ddd2ae0 100644 (file)
@@ -30,6 +30,7 @@ inputfile="${testmodel}.circle"
 partfile="${testmodel}.part"
 outputfile="${testmodel}.conn.json"
 
+rm -f ${filename}.log
 rm -rf  ${testmodel}.000*
 rm -rf  ${testmodel}.conn.*
 rm -rf  ${testmodel}.*.log
@@ -37,7 +38,7 @@ rm -rf  ${testmodel}.*.log
 # run test
 one-partition \
 --input_file ${inputfile} \
---part_file ${partfile} > /dev/null 2>&1
+--part_file ${partfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index d54a94f..b594eba 100644 (file)
@@ -26,6 +26,11 @@ trap_err_onexit()
     echo "${filename_ext} SUCCESS"
     exit 0
   fi
+  # for debug build test
+  if grep -1 "std::runtime_error" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
 
   echo "${filename_ext} FAILED"
   exit 255
index 823a731..b4bdc72 100644 (file)
@@ -29,6 +29,8 @@ trap trap_err_onexit ERR
 # copy help-profile to bin folder
 cp help-profile ../bin/help-profile
 
+rm -f ${filename}.log
+
 # run test
 one-profile -b help -- -h > ${filename}.log
 
index 1365f9d..3b85dee 100644 (file)
@@ -37,6 +37,8 @@ fi
 # copy dummy-profile to bin folder
 cp dummy-profile ../bin/dummy-profile
 
+rm -f ${filename}.log
+
 # run test
 one-profile -b dummy ${inputfile} > ${filename}.log
 
index 39e2f06..ad98bb4 100644 (file)
@@ -27,6 +27,8 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # run test
 one-profile -h > ${filename}.log
 
index 0c9e15f..7b77caa 100644 (file)
@@ -38,6 +38,8 @@ fi
 # copy dummy-profile to bin folder
 cp dummy-profile ../bin/dummy-profile
 
+rm -f ${filename}.log
+
 # run test
 one-profile -C ${configfile} > ${filename}.log
 
index 0de1e3b..f5b2dce 100644 (file)
@@ -32,6 +32,8 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # run test
 one-profile > ${filename}.log 2>&1
 
index 60a9de2..96804bf 100644 (file)
@@ -28,24 +28,16 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.quantized.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test
 one-quantize \
 --input_dtype float32 \
 --quantized_dtype uint8 \
 --input_path ./inception_v3.circle \
 --input_data ./inception_v3_test_data.h5 \
---output_path ./inception_v3.quantized.circle > /dev/null 2>&1
+--output_path ./inception_v3.quantized.circle > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 08a8305..0ddaad1 100644 (file)
@@ -28,23 +28,15 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.random.quantized.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test without input data
 one-quantize \
 --input_dtype float32 \
 --quantized_dtype uint8 \
---input_path ./inception_v3.circle \
---output_path ./inception_v3.random.quantized.circle > /dev/null 2>&1
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index a387113..b96263e 100644 (file)
@@ -28,16 +28,17 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.list.quantized.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test with list-format input data (datalist.txt)
 one-quantize \
 --input_dtype float32 \
 --quantized_dtype uint8 \
---input_path ./inception_v3.circle \
+--input_path ${inputfile} \
 --input_data ./datalist.txt \
 --input_data_format list \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index a489e8c..afb4080 100644 (file)
@@ -28,6 +28,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.directory.quantized.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test with directory-format input data (raw_files)
@@ -37,7 +38,7 @@ one-quantize \
 --input_path ${inputfile} \
 --input_data ./raw_files \
 --input_data_format directory \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 8449df6..e5403e2 100644 (file)
@@ -28,6 +28,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.mat.q8.circle"
 outputfile="./inception_v3.one-quantize_005.q8.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test with force_quantparam option
@@ -37,7 +38,7 @@ one-quantize \
 --scale 2.3 \
 --zero_point 33 \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 92b9ebe..2521a96 100644 (file)
@@ -28,6 +28,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.mat.q8.circle"
 outputfile="./inception_v3.one-quantize_006.q8.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test with force_quantparam option (multi tensors)
@@ -40,7 +41,7 @@ one-quantize \
 --scale 2.3 \
 --zero_point 33 \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 34ae92d..771b857 100644 (file)
@@ -28,17 +28,9 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.random.quantized.q16.iq8.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test without input data
 one-quantize \
 --input_dtype float32 \
@@ -46,7 +38,7 @@ one-quantize \
 --granularity channel \
 --input_type uint8 \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index aff6bcf..fcc4141 100644 (file)
@@ -28,17 +28,9 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.random.quantized.q16.oq8.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test without input data
 one-quantize \
 --input_dtype float32 \
@@ -46,7 +38,7 @@ one-quantize \
 --granularity channel \
 --output_type uint8 \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index aa06703..0c13292 100644 (file)
@@ -28,17 +28,9 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.random.quantized.mixed.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test without input data
 one-quantize \
 --input_dtype float32 \
@@ -46,7 +38,7 @@ one-quantize \
 --granularity channel \
 --quant_config one-quantize_009.qconf.json \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 1095ba0..83d5f37 100644 (file)
@@ -39,17 +39,9 @@ inputfile="./inception_v3.circle"
 outputfile="./inception_v3.one-quantize_010.q.circle"
 datafile="./inception_v3_test_data.h5"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test > /dev/null 2>&1
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 # run test
 one-quantize \
 --input_dtype float32 \
index 34d7f57..88abe4e 100644 (file)
@@ -39,6 +39,7 @@ inputfile="./inception_v3.circle"
 outputfile="./inception_v3.one-quantize_011.q.circle"
 datafile="./inception_v3_test_data.h5"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
index fba18ac..db3bb17 100644 (file)
@@ -28,6 +28,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.one-quantize_012.q.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test without input data
@@ -37,7 +38,7 @@ one-quantize \
 --granularity channel \
 --quant_config one-quantize_012.qconf.json \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index fd443d6..0d985ff 100644 (file)
@@ -31,6 +31,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.one-quantize_013.q.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test without input data
@@ -39,7 +40,7 @@ one-quantize \
 --input_dtype float32 \
 --quant_config one-quantize_013.qconf.json \
 --input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 518c328..40b79fa 100644 (file)
@@ -41,6 +41,7 @@ inputfile="./inception_v3.circle"
 outputfile="./inception_v3.one-quantize_014.q.circle"
 datadir="./raw_files/"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
index bb45b57..a069601 100644 (file)
@@ -30,6 +30,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.mat.q8.circle"
 outputfile="./inception_v3.one-quantize_015.fq.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
diff --git a/compiler/one-cmds/tests/one-quantize_016.test b/compiler/one-cmds/tests/one-quantize_016.test
new file mode 100644 (file)
index 0000000..8bf0dd0
--- /dev/null
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+# TODO Resolve circledump not found
+# https://github.com/Samsung/ONE/issues/10550
+if ! command -v circledump &> /dev/null
+then
+  echo "${filename_ext} SKIPPED"
+  exit 0
+fi
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+outputfile="./reshape_matmul.one-quantize_016.circle"
+
+rm -f ${filename}.log
+rm -f ${filename}.first.cdump
+rm -f ${filename}.second.cdump
+rm -f ${outputfile}
+
+# run test with different input_type
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_type uint8,int16 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+circledump ${outputfile} | grep serving_default_l.1:0$ > ${filename}.first.cdump
+circledump ${outputfile} | grep serving_default_r.1:0$ > ${filename}.second.cdump
+
+# check dtype of the first input (uint8)
+if ! grep -q "UINT8" "${filename}.first.cdump"; then
+  trap_err_onexit
+fi
+
+# check dtype of the second input (int16)
+if ! grep -q "INT16" "${filename}.second.cdump"; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
index 4e42faa..dd2617f 100644 (file)
@@ -37,19 +37,10 @@ inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # test begin
 
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
-
 one-quantize \
 --input_dtype float64 \
 --quantized_dtype uint8 \
index 3c0d676..c5ad693 100644 (file)
@@ -37,16 +37,7 @@ inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 156ed84..e8b9648 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 outputfile="./inception_v3.quantized.circle"
 
 rm -rf ${outputfile}
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 5a78a81..eb50564 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="."
 
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index adcaf66..6f5f489 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./while_3.pbtxt"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 42890a8..f5c965f 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./inception_v2.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index f096dd6..50b5495 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3.circle"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 70752cf..f2675f4 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 595dbdc..2190160 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 6236caf..bd7c0dd 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index cf6a3c0..14552f0 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 646c9d3..963783c 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 768c032..4b81c87 100644 (file)
@@ -36,16 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
-  /bin/bash one-import_001.test >> /dev/null
-  return_code=$?
-  if [[ ${return_code} != 0 ]]; then
-    trap_err_onexit
-  fi
-fi
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index af1f8bb..9115370 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./datalist.txt"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 73864cc..1acb20f 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 4090b15..b419ff5 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./datalist.txt"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index b666ace..5326f18 100644 (file)
@@ -36,7 +36,7 @@ inputfile="./inception_v3.circle"
 inputdata="./inception_v3_test_data.h5"
 outputfile="./inception_v3.quantized.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 6937caf..6470efc 100644 (file)
@@ -35,7 +35,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.mat.q8.circle"
 outputfile="./inception_v3.neg_018.q8.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index e182edf..9f6e35f 100644 (file)
@@ -35,7 +35,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.quantized.neg_019.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
index 27b11c3..46a4f9d 100644 (file)
@@ -35,7 +35,7 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./inception_v3.quantized.neg_020.circle"
 
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
 
 # run test
 one-quantize \
diff --git a/compiler/one-cmds/tests/one-quantize_neg_021.test b/compiler/one-cmds/tests/one-quantize_neg_021.test
new file mode 100644 (file)
index 0000000..31a3182
--- /dev/null
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Wrong number of input_type in one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid number of input dtype" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+outputfile="./reshape_matmul.quantized.neg_021.circle"
+
+rm -f ${filename}.log
+
+# run test with wrong number of input_type
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_type uint8,int16,uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
index 4776ea8..f331010 100644 (file)
@@ -9,12 +9,12 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_001.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v2
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_001.circle
 output_path=inception_v3.opt.circle
index ece0aa2..73fbdd6 100644 (file)
@@ -30,8 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_001.cfg"
 outputfile="inception_v3.opt.circle"
 
+rm -f ${filename}.log
+rm -f ${outputfile}
+
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index fedaf11..0338ccb 100644 (file)
@@ -9,14 +9,14 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_002.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v2
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_002.circle
 output_path=inception_v3.opt.circle
 
 [one-pack]
index 4075b79..f154f66 100644 (file)
@@ -30,8 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_002.cfg"
 outputfile="inception_v3_pkg"
 
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index d0b3a0c..da5c73e 100644 (file)
@@ -9,13 +9,13 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_003.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v1
 
 [one-quantize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_003.circle
 output_path=inception_v3.quantized.circle
 input_data=inception_v3_test_data.h5
index 9ee6877..140a99f 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_003.cfg"
 outputfile="inception_v3.quantized.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 614ba13..e155430 100644 (file)
@@ -9,7 +9,7 @@ one-codegen=True
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_004.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
@@ -17,4 +17,4 @@ converter_version=v1
 
 [one-codegen]
 backend=dummy
-command=-o sample.tvn inception_v3.circle
+command=-o sample.tvn inception_v3.onecc_004.circle
index 2eaffa6..b532d19 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="onecc_004.cfg"
 outputfile="sample.tvn"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 83a8230..ff4ed84 100644 (file)
@@ -9,10 +9,10 @@ one-codegen=True
 
 [one-import-tflite]
 input_path=inception_v3.tflite
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_005.circle
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_005.circle
 output_path=inception_v3.opt.circle
 
 [one-codegen]
index a98c3e4..48c81e5 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="onecc_005.cfg"
 outputfile="sample.tvn"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index a7c8938..dd58e6b 100644 (file)
@@ -9,14 +9,14 @@ one-codegen=True
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_006.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v1
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_006.circle
 output_path=inception_v3.opt.circle
 
 [one-quantize]
index 096c861..451d34f 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="onecc_006.cfg"
 outputfile="sample.tvn"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 8775f8e..2d3ecac 100644 (file)
@@ -9,14 +9,14 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_007.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v1
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_007.circle
 output_path=inception_v3.opt.circle
 
 [one-quantize]
index 883a9a7..ee3c262 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_007.cfg"
 outputfile="inception_v3_pkg"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 5bd7831..f2d25f1 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="onecc_008.cfg"
 outputfile="test_onnx_model.bin"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 921ebeb..0b4537e 100644 (file)
@@ -31,13 +31,14 @@ trap trap_err_onexit ERR
 configfile="onecc_009.cfg"
 outputfile="onnx_conv2d_conv2d.bin"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 87d7436..85192ed 100644 (file)
@@ -31,11 +31,12 @@ configfile="onecc_010.cfg"
 outputfile="inception_v3.alt.circle"
 intermfile="inception_v3.alt.tflite"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 rm -rf ${intermfile}
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index ff8c109..eeb59b4 100644 (file)
@@ -31,11 +31,12 @@ configfile="onecc_011.cfg"
 outputfile="test_onnx_model.circle"
 intermfile="test_onnx_model.tflite"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 rm -rf ${intermfile}
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 498b602..92f61a1 100644 (file)
@@ -9,14 +9,14 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_012.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v1
 
 [one-quantize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_012.circle
 output_path=inception_v3.list.quantized.circle
 input_data=datalist.txt
 input_data_format=list
index 2c71e30..f5abe94 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_012.cfg"
 outputfile="inception_v3.list.quantized.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index f909ab7..4ed6871 100644 (file)
@@ -1,6 +1,6 @@
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_13.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
index bcb2720..bca0cbe 100644 (file)
@@ -28,12 +28,13 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 configfile="onecc_013.cfg"
-outputfile="inception_v3.circle"
+outputfile="inception_v3.onecc_13.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc import tf -C ${configfile} > /dev/null 2>&1
+onecc import tf -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index c9a37f1..8478be7 100644 (file)
@@ -1,6 +1,6 @@
 [one-import-tflite]
 input_path=inception_v3.tflite
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_014.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
index 9d42cc3..3e93a69 100644 (file)
@@ -28,12 +28,13 @@ trap_err_onexit()
 trap trap_err_onexit ERR
 
 configfile="onecc_014.cfg"
-outputfile="inception_v3.circle"
+outputfile="inception_v3.onecc_014.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc import tflite -C ${configfile} > /dev/null 2>&1
+onecc import tflite -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index d74ba32..d92bf78 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_015.cfg"
 outputfile="bcq.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc import bcq -C ${configfile} > /dev/null 2>&1
+onecc import bcq -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 582cd06..09e5143 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_016.cfg"
 outputfile="test_onnx_model.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc import onnx -C ${configfile} > /dev/null 2>&1
+onecc import onnx -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index d996a28..359a6e7 100644 (file)
@@ -35,10 +35,11 @@ if [[ ! -s "${inputfile}" ]]; then
   trap_err_onexit
 fi
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc optimize -i ${inputfile} -o ${outputfile} > /dev/null 2>&1
+onecc optimize -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index ed97603..cd2ac59 100644 (file)
@@ -36,10 +36,11 @@ if [[ ! -s "${inputfile}" && ! -s "${inputdata}" ]]; then
   trap_err_onexit
 fi
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc quantize -i ${inputfile} -o ${outputfile} -d ${inputdata} > /dev/null 2>&1
+onecc quantize -i ${inputfile} -o ${outputfile} -d ${inputdata} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 9d66e3a..23e0e3f 100644 (file)
@@ -35,10 +35,11 @@ if [[ ! -s "${inputfile}" ]]; then
   trap_err_onexit
 fi
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc pack -i ${inputfile} -o ${outputfile} > /dev/null 2>&1
+onecc pack -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 9afa910..d914e9b 100644 (file)
@@ -38,13 +38,14 @@ if [[ ! -f "${inputfile}" ]]; then
   trap_err_onexit
 fi
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
 
 # run test
-onecc codegen -b dummy -o ${outputfile} ${inputfile} > /dev/null 2>&1
+onecc codegen -b dummy -o ${outputfile} ${inputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 906ed18..85e9aa7 100644 (file)
@@ -33,8 +33,10 @@ configfile="onecc_021.cfg"
 # copy dummy-profile to bin folder
 cp dummy-profile ../bin/dummy-profile
 
+rm -f ${filename}.log
+
 # run test
-onecc -C ${configfile} > ${filename}.log
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 rm -rf ../bin/dummy-profile
 
index 3aaa26f..6f5d565 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_022.cfg"
 outputfile="inception_v3.onecc_022.q8.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 50b3b1c..9448f40 100644 (file)
@@ -30,10 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_023.cfg"
 outputfile="inception_v3.onecc_023.q16.iq8.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 7b4b1a8..053758f 100644 (file)
@@ -10,13 +10,13 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_024.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v1
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_024.circle
 output_path=inception_v3.opt.circle
 make_batchnorm_gamma_positive=False
index 1f5daa1..d1e1d92 100644 (file)
@@ -55,6 +55,7 @@ trap trap_err_onexit ERR
 configfile="onecc_024.cfg"
 outputfile="inception_v3.opt.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 if [ ! -d "../optimization" ]; then
@@ -69,7 +70,7 @@ LUCI_LOG=5 onecc -C ${configfile} -OONECC_024 > ${filename}.log 2>&1
 
 clean_envir
 
-if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
     echo "${filename_ext} SUCCESS"
     exit 0
 fi
index 4776ea8..02e54b0 100644 (file)
@@ -9,12 +9,12 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_025.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v2
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_025.circle
 output_path=inception_v3.opt.circle
index 396f40c..fff9440 100644 (file)
@@ -30,8 +30,11 @@ trap trap_err_onexit ERR
 configfile="onecc_001.cfg"
 outputfile="inception_v3.opt.circle"
 
+rm -f ${filename}.log
+rm -f ${outputfile}
+
 # run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 84cfa41..cd09e11 100644 (file)
@@ -38,6 +38,7 @@ trap trap_err_onexit ERR
 configfile="onecc_026.cfg"
 outputfile="inception_v3.onecc_026.q.circle"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
index d3f6b5e..1b9a2b9 100644 (file)
@@ -11,5 +11,5 @@ one-profile=False
 one-infer=True
 
 [one-infer]
-backend=dummy
+driver=dummy-infer
 command=test_onnx_model.bin
index e727359..54f0d25 100644 (file)
@@ -33,8 +33,10 @@ configfile="onecc_027.cfg"
 # copy dummy-infer to bin folder
 cp dummy-infer ../bin/dummy-infer
 
+rm -f ${filename}.log
+
 # run test
-onecc -C ${configfile} > ${filename}.log
+onecc -C ${configfile} > ${filename}.log 2>&1
 
 rm -rf ../bin/dummy-infer
 
index 10ce158..ea23579 100644 (file)
@@ -30,6 +30,7 @@ trap trap_err_onexit ERR
 workflowfile="onecc_028.workflow.json"
 outputfile="inception_v3_pkg"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
index 84bfd01..9836482 100644 (file)
@@ -12,7 +12,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_028.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -22,7 +22,7 @@
         "OPTIMIZE": {
             "one-cmd": "one-optimize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_028.circle",
                 "output_path": "inception_v3.opt.circle"
             }
         },
index 9bab1a1..9fb0ec3 100644 (file)
@@ -31,6 +31,7 @@ workflowfile="onecc_029.workflow.json"
 outputfile="inception_v3.quantized.circle"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 65c9ea6..826888d 100644 (file)
@@ -11,7 +11,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_029.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -21,7 +21,7 @@
         "QUANTIZE": {
             "one-cmd": "one-quantize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_029.circle",
                 "output_path": "inception_v3.quantized.circle",
                 "input_data": "inception_v3_test_data.h5"
             }
index c0aa56a..88f0ab8 100644 (file)
@@ -32,6 +32,7 @@ workflowfile="onecc_030.workflow.json"
 outputfile="sample.tvn"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
index 111a1b0..e4a2467 100644 (file)
@@ -11,7 +11,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_030.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -22,7 +22,7 @@
             "one-cmd": "one-codegen",
             "commands": {
                 "backend": "dummy",
-                "command": "-o sample.tvn inception_v3.circle"
+                "command": "-o sample.tvn inception_v3.onecc_030.circle"
             }
         }
     }
index 7a1c670..33c9664 100644 (file)
@@ -32,6 +32,7 @@ workflowfile="onecc_031.workflow.json"
 outputfile="sample.tvn"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
index 83d52b9..7018ca6 100644 (file)
             "one-cmd": "one-import-tflite",
             "commands": {
                 "input_path": "inception_v3.tflite",
-                "output_path": "inception_v3.circle"
+                "output_path": "inception_v3.onecc_031.circle"
             }
         },
         "optimize": {
             "one-cmd": "one-optimize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_031.circle",
                 "output_path": "inception_v3.opt.circle"
             }
         },
index 89b6c41..5884f57 100644 (file)
@@ -32,6 +32,7 @@ workflowfile="onecc_032.workflow.json"
 outputfile="sample.tvn"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
index 08d3f0f..7a794c8 100644 (file)
             "one-cmd": "one-import-tflite",
             "commands": {
                 "input_path": "inception_v3.tflite",
-                "output_path": "inception_v3.circle"
+                "output_path": "inception_v3.onecc_032.circle"
             }
         },
         "optimize": {
             "one-cmd": "one-optimize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_032.circle",
                 "output_path": "inception_v3.opt.circle"
             }
         },
         "quantize": {
             "one-cmd": "one-quantize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_032.circle",
                 "output_path": "inception_v3.quantized.circle",
                 "input_data": "inception_v3_test_data.h5"
             }
index 635582f..b2655fe 100644 (file)
@@ -31,6 +31,7 @@ workflowfile="onecc_033.workflow.json"
 outputfile="inception_v3_pkg"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 01233ff..2edd5a8 100644 (file)
             "one-cmd": "one-import-tflite",
             "commands": {
                 "input_path": "inception_v3.tflite",
-                "output_path": "inception_v3.circle"
+                "output_path": "inception_v3.onecc_033.circle"
             }
         },
         "optimize": {
             "one-cmd": "one-optimize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_033.circle",
                 "output_path": "inception_v3.opt.circle"
             }
         },
         "quantize": {
             "one-cmd": "one-quantize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_033.circle",
                 "output_path": "inception_v3.quantized.circle",
                 "input_data": "inception_v3_test_data.h5"
             }
index e766548..9c88dff 100644 (file)
@@ -32,6 +32,7 @@ workflowfile="onecc_034.workflow.json"
 outputfile="onnx_conv2d_conv2d.bin"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # copy dummy-compile to bin folder
 cp dummy-compile ../bin/dummy-compile
index 762cdd3..851da65 100644 (file)
@@ -33,6 +33,7 @@ intermfile="inception_v3.alt.tflite"
 
 rm -rf ${outputfile}
 rm -rf ${intermfile}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 865255e..00f4641 100644 (file)
@@ -33,6 +33,7 @@ intermfile="test_onnx_model.tflite"
 
 rm -rf ${outputfile}
 rm -rf ${intermfile}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 52ea9e4..596bbfd 100644 (file)
@@ -31,6 +31,7 @@ workflowfile="onecc_037.workflow.json"
 outputfile="inception_v3.opt.circle"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 3317fb2..ebd6b34 100644 (file)
@@ -11,7 +11,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_037.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -21,7 +21,7 @@
         "OPTIMIZE": {
             "one-cmd": "one-optimize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_037.circle",
                 "output_path": "inception_v3.opt.circle"
             }
         }
index 6b8f7cf..41f6333 100644 (file)
@@ -31,6 +31,7 @@ workflowfile="onecc_038.workflow.json"
 outputfile="inception_v3.list.quantized.circle"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 5ac515d..d31045e 100644 (file)
@@ -11,7 +11,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_038.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -21,7 +21,7 @@
         "QUANTIZE": {
             "one-cmd": "one-quantize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_038.circle",
                 "output_path": "inception_v3.list.quantized.circle",
                 "input_data": "datalist.txt",
                 "input_data_format": "list"
index 7db9d90..b922636 100644 (file)
@@ -41,6 +41,7 @@ workflowfile="onecc_039.workflow.json"
 outputfile="inception_v3.onecc_039.q.circle"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 4776ea8..b9f39fd 100644 (file)
@@ -9,12 +9,12 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_040.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v2
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_040.circle
 output_path=inception_v3.opt.circle
index 2f75677..4d23025 100644 (file)
@@ -31,6 +31,7 @@ workflowfile="onecc_040.workflow.json"
 outputfile="inception_v3.opt.circle"
 
 rm -rf ${outputfile}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 791dd12..c504e69 100644 (file)
@@ -45,6 +45,7 @@ outputfile2="inception_v3.opt.circle"
 cp dummy-inferV2 ../bin/dummy-inferV2
 
 rm -rf ${outputfile1} {outputfile2}
+rm -f ${filename}.log
 
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
index 7dfc1c6..e19494c 100644 (file)
@@ -42,7 +42,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_041.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -52,7 +52,7 @@
         "OPTIMIZE": {
             "one-cmd": "one-optimize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_041.circle",
                 "output_path": "inception_v3.opt.circle"
             }
         }
diff --git a/compiler/one-cmds/tests/onecc_042.cfg b/compiler/one-cmds/tests/onecc_042.cfg
new file mode 100644 (file)
index 0000000..988c768
--- /dev/null
@@ -0,0 +1,9 @@
+[Environment]
+SPM_SIZE=256KB
+
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backend=dummyEnv
+command=dummy_env.bin
diff --git a/compiler/one-cmds/tests/onecc_042.test b/compiler/one-cmds/tests/onecc_042.test
new file mode 100644 (file)
index 0000000..c8a15fa
--- /dev/null
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-codegen with Environment section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+outputfile="dummy_env.bin"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummyEnv-compile
+  rm -rf ${outputfile}
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_042.cfg"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# copy dummyEnv-compile to bin folder
+cp dummyEnv-compile ../bin/dummyEnv-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+if grep -q "SPM_SIZE=256KB" "${outputfile}"; then
+  echo "${filename_ext} SUCCESS"
+  rm -rf ../bin/dummyEnv-compile
+  rm -rf ${outputfile}
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_043.cfg b/compiler/one-cmds/tests/onecc_043.cfg
new file mode 100644 (file)
index 0000000..7d5ee87
--- /dev/null
@@ -0,0 +1,14 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_043.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_043.test b/compiler/one-cmds/tests/onecc_043.test
new file mode 100644 (file)
index 0000000..f99039e
--- /dev/null
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test for "O1=True" option in onecc config file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_043.cfg"
+outputfile="inception_v3.onecc_043.opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+# O1.list is dynamically created from onelib/export_constant.py
+readarray -t O1_OPTS < "O1.list"
+readarray -t NO_O1_OPTS < "non-O1.list"
+
+for opt in "${O1_OPTS[@]}"
+do
+  if ! grep -q ${opt} ${outputfile}.log; then
+    trap_err_onexit
+  fi
+done
+
+for no_opt in "${NO_O1_OPTS[@]}"
+do
+  if grep -q ${no_opt} ${outputfile}.log; then
+    trap_err_onexit
+  fi
+done
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_044.cfg b/compiler/one-cmds/tests/onecc_044.cfg
new file mode 100644 (file)
index 0000000..f7cdde8
--- /dev/null
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O1
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_044.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
diff --git a/compiler/one-cmds/tests/onecc_044.test b/compiler/one-cmds/tests/onecc_044.test
new file mode 100644 (file)
index 0000000..1706cf9
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test for "O1=True" option with other options
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_044.cfg"
+outputfile="test_onnx_model.onecc_044.opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+readarray -t OPTS < "O1.list"
+readarray -t NO_OPTS < "non-O1.list"
+
+OPTS+=("convert_nchw_to_nhwc")
+for i in "${!NO_OPTS[@]}"; do
+  if [[ ${NO_OPTS[i]} = "convert_nchw_to_nhwc" ]]; then
+    unset 'NO_OPTS[i]'
+  fi
+done
+
+NO_OPTS+=("fold_add_v2")
+for i in "${!OPTS[@]}"; do
+  if [[ ${OPTS[i]} = "fold_add_v2" ]]; then
+    unset 'OPTS[i]'
+  fi
+done
+
+for opt in "${OPTS[@]}"
+do
+  if ! grep -q ${opt} ${outputfile}.log; then
+    trap_err_onexit
+  fi
+done
+
+for no_opt in "${NO_OPTS[@]}"
+do
+  if grep -q ${no_opt} ${outputfile}.log; then
+    trap_err_onexit
+  fi
+done
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_045.cfg b/compiler/one-cmds/tests/onecc_045.cfg
new file mode 100644 (file)
index 0000000..d0ee39f
--- /dev/null
@@ -0,0 +1,13 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=reshape_matmul.circle
+output_path=reshape_matmul.onecc_045.q.circle
+input_type=uint8,int16
diff --git a/compiler/one-cmds/tests/onecc_045.test b/compiler/one-cmds/tests/onecc_045.test
new file mode 100644 (file)
index 0000000..74e21f6
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+# TODO Resolve circledump not found
+# https://github.com/Samsung/ONE/issues/10550
+if ! command -v circledump &> /dev/null
+then
+  echo "${filename_ext} SKIPPED"
+  exit 0
+fi
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+configfile="onecc_045.cfg"
+outputfile="reshape_matmul.onecc_045.q.circle"
+
+rm -f ${filename}.log
+rm -f ${filename}.first.cdump
+rm -f ${filename}.second.cdump
+rm -f ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+circledump ${outputfile} | grep serving_default_l.1:0$ > ${filename}.first.cdump
+circledump ${outputfile} | grep serving_default_r.1:0$ > ${filename}.second.cdump
+
+# check dtype of the first input (uint8)
+if ! grep -q "UINT8" "${filename}.first.cdump"; then
+  trap_err_onexit
+fi
+
+# check dtype of the second input (int16)
+if ! grep -q "INT16" "${filename}.second.cdump"; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
index 0456a42..fe83e35 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="onecc_neg_001.cfg"
 
+rm -f ${filename}.log
+
 # run test
 onecc -C ${configfile} > ${filename}.log 2>&1
 
index 46347ea..e597a39 100644 (file)
@@ -9,12 +9,12 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_neg_002.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v2
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_neg_002.circle
 output_path=inception_v3.opt.circle
index 7c81bab..04918ab 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="onecc_neg_002.cfg"
 
+rm -f ${filename}.log
+
 # run test
 onecc -C ${configfile} > ${filename}.log 2>&1
 
index fa027cb..e4718ef 100644 (file)
@@ -1,13 +1,13 @@
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_neg_003.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v2
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_neg_003.circle
 output_path=inception_v3.opt.circle
 
 [one-pack]
index b36bf8c..4c64c5a 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="onecc_neg_003.cfg"
 
+rm -f ${filename}.log
+
 # run test
 onecc -C ${configfile} > ${filename}.log 2>&1
 
index dac8cb7..53e8191 100644 (file)
@@ -9,14 +9,14 @@ one-codegen=False
 
 [one-import-tf]
 input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_neg_004.circle
 input_arrays=input
 input_shapes=1,299,299,3
 output_arrays=InceptionV3/Predictions/Reshape_1
 converter_version=v2
 
 [one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_neg_004.circle
 output_path=inception_v3.opt.circle
 
 [one-optimize]
index 5efb7c6..30fb345 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="onecc_neg_004.cfg"
 
+rm -f ${filename}.log
+
 # run test
 onecc -C ${configfile} > ${filename}.log 2>&1
 
index 684d940..1d22485 100644 (file)
@@ -33,6 +33,7 @@ intermfile="inception_v3.alt.tflite"
 
 rm -rf ${outputfile}
 rm -rf ${intermfile}
+rm -f ${filename}.log
 
 # run test
 onecc -C ${configfile} > ${filename}.log 2>&1
index 27b4f50..194a7ea 100644 (file)
@@ -33,6 +33,7 @@ intermfile="test_onnx_model.tflite"
 
 rm -rf ${outputfile}
 rm -rf ${intermfile}
+rm -f ${filename}.log
 
 # run test
 onecc -C ${configfile} > ${filename}.log 2>&1
index bc32f23..264a74e 100644 (file)
@@ -32,6 +32,8 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # run test
 onecc wronginput > ${filename}.log 2>&1
 
index ff39302..67b2c6f 100644 (file)
@@ -32,6 +32,8 @@ trap_err_onexit()
 
 trap trap_err_onexit ERR
 
+rm -f ${filename}.log
+
 # run test
 onecc > ${filename}.log 2>&1
 
index 54dd129..26ad7da 100644 (file)
@@ -62,6 +62,8 @@ touch ../optimization/OONECC_NEG_009.cfg
 
 configfile=".."
 
+rm -f ${filename}.log
+
 # run test
 onecc -C ${configfile} -OONECC_NEG_009 > ${filename}.log 2>&1
 
index ddad5e6..9860590 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile=".."
 
+rm -f ${filename}.log
+
 # run test
 onecc -C ${configfile} -OONECC_NEG_010 > ${filename}.log 2>&1
 
index 3f043a7..5df6fc8 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="onecc_neg_011.cfg"
 
+rm -f ${filename}.log
+
 # run test
 onecc -C ${configfile} > ${filename}.log 2>&1
 
index fdc73ef..7324091 100644 (file)
@@ -11,5 +11,4 @@ one-infer=True
 
 [one-infer]
 driver=dummy-infer
-backend=dummy
 command="dummy arguments"
index 9feca5f..45ed13d 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Check driver and backend option is mutually exclusive
+# Check the case when driver does not exist
 
 filename_ext="$(basename -- $0)"
 filename="${filename_ext%.*}"
 
 trap_err_onexit()
 {
-  if grep -q "\-d and -b options are mutually exclusive" "${filename}.log"; then
+  if grep -q "dummy-infer not found" "${filename}.log"; then
     echo "${filename_ext} SUCCESS"
     exit 0
   fi
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 configfile="onecc_neg_012.cfg"
 
+rm -f ${filename}.log
+
 # run test
 onecc -C ${configfile} > ${filename}.log 2>&1
 
index 0dd8a0f..95ac3c9 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_013.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index 2ed5dcb..704acfa 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_014.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index 079ba67..d15a2f3 100644 (file)
@@ -35,6 +35,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_015.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index c52763f..23964e9 100644 (file)
@@ -35,6 +35,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_016.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index 2f173d2..a345d62 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_017.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index bc2297e..b70fae4 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_018.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index 58cb88e..e0754d3 100644 (file)
@@ -13,7 +13,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_neg_018.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
index 11ef3a9..430438d 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_019.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index aedeeec..995c6bf 100644 (file)
@@ -10,7 +10,7 @@
             "one-cmddddddddd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_neg_019.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
index 7f5073d..b86a231 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_020.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index d3446d3..89f0d59 100644 (file)
@@ -10,7 +10,7 @@
             "one-cmd": "one-import-tf",
             "commandssssssssss": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_neg_020.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
index e9d4baa..ef023b1 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_021.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index 6d21111..c326d41 100644 (file)
@@ -14,7 +14,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_neg_021.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -33,7 +33,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_neg_021.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
index 5400717..002908a 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_022.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index 2e056ac..1f45081 100644 (file)
@@ -45,7 +45,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_neg_022.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -55,7 +55,7 @@
         "OPTIMIZE": {
             "one-cmd": "one-optimize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_neg_022.circle",
                 "output_path": "inception_v3.opt.circle"
             }
         }
index 09717e8..436c7c3 100644 (file)
@@ -34,6 +34,8 @@ trap trap_err_onexit ERR
 
 workflowfile="onecc_neg_023.workflow.json"
 
+rm -f ${filename}.log
+
 # run test
 onecc -W ${workflowfile} > ${filename}.log 2>&1
 
index 056e704..2d763f0 100644 (file)
@@ -11,7 +11,7 @@
             "one-cmd": "one-import-tf",
             "commands": {
                 "input_path": "inception_v3.pb",
-                "output_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_neg_023.circle",
                 "input_arrays": "input",
                 "input_shapes": "1,299,299,3",
                 "output_arrays": "InceptionV3/Predictions/Reshape_1",
@@ -21,7 +21,7 @@
         "OPTIMIZE": {
             "one-cmd": "one-optimize",
             "commands": {
-                "input_path": "inception_v3.circle",
+                "input_path": "inception_v3.onecc_neg_023.circle",
                 "output_path": "inception_v3.opt.circle",
                 "change_outputs": "non_existing_node_name"
             }
diff --git a/compiler/one-cmds/tests/onecc_neg_024.cfg b/compiler/one-cmds/tests/onecc_neg_024.cfg
new file mode 100644 (file)
index 0000000..0e9ebc6
--- /dev/null
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O # invalid (too short group option)
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_neg_024.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
diff --git a/compiler/one-cmds/tests/onecc_neg_024.test b/compiler/one-cmds/tests/onecc_neg_024.test
new file mode 100644 (file)
index 0000000..16952ba
--- /dev/null
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid group option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid group option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_024.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_025.cfg b/compiler/one-cmds/tests/onecc_neg_025.cfg
new file mode 100644 (file)
index 0000000..e41c6e6
--- /dev/null
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=A1 # invalid (must start with 'O')
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_neg_024.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
diff --git a/compiler/one-cmds/tests/onecc_neg_025.test b/compiler/one-cmds/tests/onecc_neg_025.test
new file mode 100644 (file)
index 0000000..4ddc310
--- /dev/null
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid group option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid group option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_025.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_026.cfg b/compiler/one-cmds/tests/onecc_neg_026.cfg
new file mode 100644 (file)
index 0000000..2efddb1
--- /dev/null
@@ -0,0 +1,13 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=reshape_matmul.circle
+output_path=reshape_matmul.onecc_045.q.circle
+input_type=uint8,int16,uint8
diff --git a/compiler/one-cmds/tests/onecc_neg_026.test b/compiler/one-cmds/tests/onecc_neg_026.test
new file mode 100644 (file)
index 0000000..f90c1ec
--- /dev/null
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Wrong number of input_type in one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid number of input dtype" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+configfile="onecc_neg_026.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
index c171cfe..97bc6eb 100644 (file)
@@ -21,34 +21,34 @@ pushd $SCRIPT_PATH > /dev/null
 
 if [[ ! -s "inception_v3.pb" ]]; then
     rm -rf inception_v3_2018_04_27.tgz
-    wget https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz
+    wget -nv https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz
     tar zxvf inception_v3_2018_04_27.tgz
 fi
 
 if [[ ! -s "while_3.pbtxt" ]]; then
     rm -rf while_3.zip
-    wget https://github.com/Samsung/ONE/files/5095630/while_3.zip
+    wget -nv https://github.com/Samsung/ONE/files/5095630/while_3.zip
     unzip while_3.zip
     # https://github.com/Samsung/ONE/issues/4155#issuecomment-689320297
 fi
 
 if [[ ! -s "mobilenet_test_data.h5" ]]; then
     rm -rf mobilenet_test_data.zip
-    wget https://github.com/Samsung/ONE/files/5139460/mobilenet_test_data.zip
+    wget -nv https://github.com/Samsung/ONE/files/5139460/mobilenet_test_data.zip
     unzip mobilenet_test_data.zip
     # https://github.com/Samsung/ONE/issues/4155#issuecomment-689321538
 fi
 
 if [[ ! -s "bcq.pb" ]]; then
     rm -rf bcq.pb.zip
-    wget https://github.com/Samsung/ONE/files/5153842/bcq.pb.zip
+    wget -nv https://github.com/Samsung/ONE/files/5153842/bcq.pb.zip
     unzip bcq.pb.zip
     # https://github.com/Samsung/ONE/issues/4155#issuecomment-689324597
 fi
 
 if [[ ! -s "img_files" ]]; then
     rm -rf img_files.zip
-    wget https://github.com/Samsung/ONE/files/5499172/img_files.zip
+    wget -nv https://github.com/Samsung/ONE/files/5499172/img_files.zip
     unzip img_files.zip
     # https://github.com/Samsung/ONE/issues/3213#issuecomment-722757499
 fi
@@ -65,46 +65,61 @@ fi
 
 if [[ ! -d "test_saved_model" ]]; then
     rm -rf test_saved_model.zip
-    wget https://github.com/Samsung/ONE/files/5516226/test_saved_model.zip
+    wget -nv https://github.com/Samsung/ONE/files/5516226/test_saved_model.zip
     unzip test_saved_model.zip
     # https://github.com/Samsung/ONE/issues/4268#issuecomment-724578237
 fi
 
 if [[ ! -s "test_keras_model.h5" ]]; then
     rm -rf test_keras_model.zip
-    wget https://github.com/Samsung/ONE/files/5520777/test_keras_model.zip
+    wget -nv https://github.com/Samsung/ONE/files/5520777/test_keras_model.zip
     unzip test_keras_model.zip
     # https://github.com/Samsung/ONE/issues/4268#issuecomment-725025805
 fi
 
 if [[ ! -s "test_onnx_model.onnx" ]]; then
     rm -rf test_onnx_model.zip
-    wget https://github.com/Samsung/ONE/files/5768243/test_onnx_model.zip
+    wget -nv https://github.com/Samsung/ONE/files/5768243/test_onnx_model.zip
     unzip test_onnx_model.zip
     # https://github.com/Samsung/ONE/issues/5548#issuecomment-754373360
 fi
 
 if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
     rm -rf onnx_conv2d_conv2d.zip
-    wget https://github.com/Samsung/ONE/files/5774648/onnx_conv2d_conv2d.zip
+    wget -nv https://github.com/Samsung/ONE/files/5774648/onnx_conv2d_conv2d.zip
     unzip onnx_conv2d_conv2d.zip
     # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
 fi
 
 if [[ ! -s "reshape_matmul.onnx" ]]; then
     rm -rf reshape_matmul.zip
-    wget https://github.com/Samsung/ONE/files/9082878/reshape_matmul.zip
+    wget -nv https://github.com/Samsung/ONE/files/9082878/reshape_matmul.zip
     unzip reshape_matmul.zip
     # https://github.com/Samsung/ONE/issues/9405#issuecomment-1180198137
 fi
 
+# prepare 'reshape_matmul.circle' file used for tests
+if [[ ! -s "reshape_matmul.circle" ]]; then
+    ../bin/one-import onnx \
+    --experimental_disable_batchmatmul_unfold \
+    -i reshape_matmul.onnx \
+    -o reshape_matmul.circle
+fi
+
 if [[ ! -s "Net_InstanceNorm_003.part" ]]; then
     rm -rf Net_InstanceNorm_003.zip
-    wget https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip
+    wget -nv https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip
     unzip Net_InstanceNorm_003.zip
     # https://github.com/Samsung/ONE/issues/8570#issuecomment-1115804257
 fi
 
+if [[ ! -s "UnidirSeqLSTM.tflite" ]]; then
+    rm -rf UnidirSeqLSTM.zip
+    wget -nv https://github.com/Samsung/ONE/files/10055255/UnidirSeqLSTM.zip
+    unzip UnidirSeqLSTM.zip
+    # https://github.com/Samsung/ONE/issues/9940#issuecomment-1293282484
+fi
+
 function files_missing() {
     condition="test "
 
@@ -124,7 +139,7 @@ declare -a TEST_RECCURENT_MODELS=(\
 
 if files_missing "${TEST_RECCURENT_MODELS[@]}"; then
     rm -rf test_onnx_recurrent_models.zip
-    wget https://github.com/Samsung/ONE/files/8067909/test_onnx_recurrent_models.zip
+    wget -nv https://github.com/Samsung/ONE/files/8067909/test_onnx_recurrent_models.zip
     unzip test_onnx_recurrent_models.zip
     # https://github.com/Samsung/ONE/issues/8395#issuecomment-1040072097
 fi
@@ -133,7 +148,7 @@ declare -a NEG_TEST_RECCURENT_MODELS=("rnn_variable.onnx" "lstm_variable.onnx")
 
 if files_missing "${NEG_TEST_RECCURENT_MODELS[@]}"; then
     rm -rf neg_test_onnx_recurrent_models.zip
-    wget https://github.com/Samsung/ONE/files/8137183/neg_test_onnx_recurrent_models.zip
+    wget -nv https://github.com/Samsung/ONE/files/8137183/neg_test_onnx_recurrent_models.zip
     unzip neg_test_onnx_recurrent_models.zip
     # https://github.com/Samsung/ONE/issues/8395#issuecomment-1050364375
 fi
index 887d81b..ceefcf7 100644 (file)
@@ -27,12 +27,13 @@ trap trap_err_onexit ERR
 
 outputfile="./output_testdata.h5"
 
+rm -f ${filename}.log
 rm -rf ${outputfile}
 
 # run test
 rawdata2hdf5 \
 --data_list datalist.txt \
---output_path ./output_testdata.h5 >> /dev/null
+--output_path ${outputfile} > ${filename}.log 2>&1
 
 if [[ ! -s "${outputfile}" ]]; then
   trap_err_onexit
index 45ad88f..fb78037 100644 (file)
@@ -34,6 +34,7 @@ inputfile="./wronglist.txt"
 outputfile="./output_testdata.h5"
 
 rm -rf ${inputfile}
+rm -f ${filename}.log
 
 touch ${inputfile}
 echo "non-existing-file.data" >> ${inputfile}
index 6bd078f..7a9e231 100644 (file)
@@ -33,6 +33,8 @@ trap trap_err_onexit ERR
 inputfile="./inception_v3.circle"
 outputfile="./output_testdata.h5"
 
+rm -f ${filename}.log
+
 # run test
 rawdata2hdf5 \
 --data_list ${inputfile} \
index 64559c2..c69f935 100644 (file)
@@ -32,6 +32,8 @@ trap trap_err_onexit ERR
 
 outputfile="./output_testdata.h5"
 
+rm -f ${filename}.log
+
 # run test
 rawdata2hdf5 \
 --output_path ${outputfile} > ${filename}.log 2>&1
index 277bc0e..df06201 100644 (file)
@@ -32,6 +32,8 @@ trap trap_err_onexit ERR
 
 outputfile="./non_existing_dir/output_testdata.h5"
 
+rm -f ${filename}.log
+
 # run test
 rawdata2hdf5 \
 --data_list datalist.txt \
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py
deleted file mode 100644 (file)
index d204447..0000000
+++ /dev/null
@@ -1,315 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import configparser
-import glob
-import importlib
-import ntpath
-import os
-import subprocess
-import sys
-
-import onelib.constant as _constant
-
-
-def _add_default_arg(parser):
-    # version
-    parser.add_argument(
-        '-v',
-        '--version',
-        action='store_true',
-        help='show program\'s version number and exit')
-
-    # verbose
-    parser.add_argument(
-        '-V',
-        '--verbose',
-        action='store_true',
-        help='output additional information to stdout or stderr')
-
-    # configuration file
-    parser.add_argument('-C', '--config', type=str, help='run with configuation file')
-    # section name that you want to run in configuration file
-    parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
-
-
-def _add_default_arg_no_CS(parser):
-    """
-    This adds -v -V args only (no -C nor -S)
-    """
-    # version
-    parser.add_argument(
-        '-v',
-        '--version',
-        action='store_true',
-        help='show program\'s version number and exit')
-
-    # verbose
-    parser.add_argument(
-        '-V',
-        '--verbose',
-        action='store_true',
-        help='output additional information to stdout or stderr')
-
-
-def is_accumulated_arg(arg, driver):
-    if driver == "one-quantize":
-        accumulables = [
-            "tensor_name", "scale", "zero_point", "src_tensor_name", "dst_tensor_name"
-        ]
-        if arg in accumulables:
-            return True
-
-    return False
-
-
-def _is_valid_attr(args, attr):
-    return hasattr(args, attr) and getattr(args, attr)
-
-
-class Command:
-    def __init__(self, driver, args, log_file):
-        self.cmd = [driver]
-        self.driver = driver
-        self.args = args
-        self.log_file = log_file
-
-    # Add option if attrs are valid
-    # Option values are collected from self.args
-    def add_option_with_valid_args(self, option, attrs):
-        for attr in attrs:
-            if not _is_valid_attr(self.args, attr):
-                return self
-        self.cmd.append(option)
-        for attr in attrs:
-            self.cmd.append(getattr(self.args, attr))
-        return self
-
-    # Add option and values without any condition
-    def add_option_with_values(self, option, values):
-        self.cmd.append(option)
-        for value in values:
-            self.cmd.append(value)
-        return self
-
-    # Add option with no argument (ex: --verbose) if attr is valid
-    def add_noarg_option_if_valid_arg(self, option, attr):
-        if _is_valid_attr(self.args, attr):
-            self.cmd.append(option)
-        return self
-
-    # Run cmd and save logs
-    def run(self):
-        self.log_file.write((' '.join(self.cmd) + '\n').encode())
-        _run(self.cmd, err_prefix=self.driver, logfile=self.log_file)
-
-
-def _parse_cfg_and_overwrite(config_path, section, args):
-    """
-    parse given section of configuration file and set the values of args.
-    Even if the values parsed from the configuration file already exist in args,
-    the values are overwritten.
-    """
-    if config_path == None:
-        # DO NOTHING
-        return
-    config = configparser.ConfigParser()
-    # make option names case sensitive
-    config.optionxform = str
-    parsed = config.read(config_path)
-    if not parsed:
-        raise FileNotFoundError('Not found given configuration file')
-    if not config.has_section(section):
-        raise AssertionError('configuration file doesn\'t have \'' + section +
-                             '\' section')
-    for key in config[section]:
-        setattr(args, key, config[section][key])
-    # TODO support accumulated arguments
-
-
-def _parse_cfg(args, driver_name):
-    """parse configuration file. If the option is directly given to the command line,
-       the option is processed prior to the configuration file.
-       That is, if the values parsed from the configuration file already exist in args,
-       the values are ignored."""
-    if _is_valid_attr(args, 'config'):
-        config = configparser.ConfigParser()
-        config.optionxform = str
-        config.read(args.config)
-        # if section is given, verify given section
-        if _is_valid_attr(args, 'section'):
-            if not config.has_section(args.section):
-                raise AssertionError('configuration file must have \'' + driver_name +
-                                     '\' section')
-            for key in config[args.section]:
-                if is_accumulated_arg(key, driver_name):
-                    if not _is_valid_attr(args, key):
-                        setattr(args, key, [config[args.section][key]])
-                    else:
-                        getattr(args, key).append(config[args.section][key])
-                    continue
-                if not _is_valid_attr(args, key):
-                    setattr(args, key, config[args.section][key])
-        # if section is not given, section name is same with its driver name
-        else:
-            if not config.has_section(driver_name):
-                raise AssertionError('configuration file must have \'' + driver_name +
-                                     '\' section')
-            secton_to_run = driver_name
-            for key in config[secton_to_run]:
-                if is_accumulated_arg(key, driver_name):
-                    if not _is_valid_attr(args, key):
-                        setattr(args, key, [config[secton_to_run][key]])
-                    else:
-                        getattr(args, key).append(config[secton_to_run][key])
-                    continue
-                if not _is_valid_attr(args, key):
-                    setattr(args, key, config[secton_to_run][key])
-
-
-def _print_version_and_exit(file_path):
-    """print version of the file located in the file_path"""
-    script_path = os.path.realpath(file_path)
-    dir_path = os.path.dirname(script_path)
-    script_name = os.path.splitext(os.path.basename(script_path))[0]
-    # run one-version
-    subprocess.call([os.path.join(dir_path, 'one-version'), script_name])
-    sys.exit()
-
-
-def _safemain(main, mainpath):
-    """execute given method and print with program name for all uncaught exceptions"""
-    try:
-        main()
-    except Exception as e:
-        prog_name = os.path.basename(mainpath)
-        print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
-        sys.exit(255)
-
-
-def _run(cmd, err_prefix=None, logfile=None):
-    """Execute command in subprocess
-
-    Args:
-        cmd: command to be executed in subprocess
-        err_prefix: prefix to be put before every stderr lines
-        logfile: file stream to which both of stdout and stderr lines will be written
-    """
-    with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
-        import select
-        inputs = set([p.stdout, p.stderr])
-        while inputs:
-            readable, _, _ = select.select(inputs, [], [])
-            for x in readable:
-                line = x.readline()
-                if len(line) == 0:
-                    inputs.discard(x)
-                    continue
-                if x == p.stdout:
-                    out = sys.stdout
-                if x == p.stderr:
-                    out = sys.stderr
-                    if err_prefix:
-                        line = f"{err_prefix}: ".encode() + line
-                out.buffer.write(line)
-                out.buffer.flush()
-                if logfile != None:
-                    logfile.write(line)
-    if p.returncode != 0:
-        sys.exit(p.returncode)
-
-
-def _remove_prefix(str, prefix):
-    if str.startswith(prefix):
-        return str[len(prefix):]
-    return str
-
-
-def _remove_suffix(str, suffix):
-    if str.endswith(suffix):
-        return str[:-len(suffix)]
-    return str
-
-
-def _get_optimization_list(get_name=False):
-    """
-    returns a list of optimization. If `get_name` is True,
-    only basename without extension is returned rather than full file path.
-
-    [one hierarchy]
-    one
-    ├── backends
-    ├── bin
-    ├── doc
-    ├── include
-    ├── lib
-    ├── optimization
-    └── test
-
-    Optimization options must be placed in `optimization` folder
-    """
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-
-    # optimization folder
-    files = [f for f in glob.glob(dir_path + '/../optimization/O*.cfg', recursive=True)]
-    # exclude if the name has space
-    files = [s for s in files if not ' ' in s]
-
-    opt_list = []
-    for cand in files:
-        base = ntpath.basename(cand)
-        if os.path.isfile(cand) and os.access(cand, os.R_OK):
-            opt_list.append(cand)
-
-    if get_name == True:
-        # NOTE the name includes prefix 'O'
-        # e.g. O1, O2, ONCHW not just 1, 2, NCHW
-        opt_list = [ntpath.basename(f) for f in opt_list]
-        opt_list = [_remove_suffix(s, '.cfg') for s in opt_list]
-
-    return opt_list
-
-
-def _detect_one_import_drivers(search_path):
-    """Looks for import drivers in given directory
-
-    Args:
-        search_path: path to the directory where to search import drivers
-
-    Returns:
-    dict: each entry is related to single detected driver,
-          key is a config section name, value is a driver name
-
-    """
-    import_drivers_dict = {}
-    for module_name in os.listdir(search_path):
-        full_path = os.path.join(search_path, module_name)
-        if not os.path.isfile(full_path):
-            continue
-        if module_name.find("one-import-") != 0:
-            continue
-        module_loader = importlib.machinery.SourceFileLoader(module_name, full_path)
-        module_spec = importlib.util.spec_from_loader(module_name, module_loader)
-        module = importlib.util.module_from_spec(module_spec)
-        try:
-            module_loader.exec_module(module)
-            if hasattr(module, "get_driver_cfg_section"):
-                section = module.get_driver_cfg_section()
-                import_drivers_dict[section] = module_name
-        except:
-            pass
-    return import_drivers_dict
diff --git a/compiler/onecc-docker/README.md b/compiler/onecc-docker/README.md
new file mode 100644 (file)
index 0000000..3d7aa89
--- /dev/null
@@ -0,0 +1,36 @@
+# onecc-docker
+
+_onecc-docker_ broadens ONE tools to be used in other platforms.
+
+## Description
+
+For now, ONE tools only support Ubuntu 18.04 and 20.04(not officially).
+So, it is difficult for people in different environments to use our tools without using ubuntu 18.04.
+
+To overcome this limitation, we provide _onecc-docker_ that runs using a Docker so that users can use ONE tools more widely.
+
+This tool aims at the following objectives.
+
+- Unsupported Ubuntu OS supports ONE tools
+- Install and use ONE tools lightly and quickly using Docker
+
+## Requirements
+
+- Any Linux distribution
+- Docker
+    - Requires root privileges.
+           - _onecc-docker_ requires the current `user ID` to be included in the `Docker group` because it requires the Docker-related commands to be executed without `sudo` privileges.
+             - See "[Post-installation steps for Linux](https://docs.docker.com/engine/install/linux-postinstall/)"
+- Python 3.8
+  - requests
+
+## Note
+
+_onecc-docker_ is currently in incubation stage.
+
+The onecc-docker debian package should be created with one-compiler debian package when ONE
+compiler project builds. To this end, it is correct to configure the onecc-docker debian codes in
+./infra/debian/compiler directory. However, we are currently working on the code, so we will
+temporarily implement it in this location.
+
+TODO: Merge this debian directory into ./infra/debian/compiler code.
diff --git a/compiler/onecc-docker/debian/changelog b/compiler/onecc-docker/debian/changelog
new file mode 100644 (file)
index 0000000..501d0ec
--- /dev/null
@@ -0,0 +1,6 @@
+onecc-docker (0.1.0) bionic; urgency=medium
+
+  * Introduce onecc-docker
+
+ -- Seunghui Lee <dltmdgml456654@gmail.com>  Wed, 23 Sep 2022 12:00:00 +0900
+
diff --git a/compiler/onecc-docker/debian/compat b/compiler/onecc-docker/debian/compat
new file mode 100644 (file)
index 0000000..ec63514
--- /dev/null
@@ -0,0 +1 @@
+9
diff --git a/compiler/onecc-docker/debian/control b/compiler/onecc-docker/debian/control
new file mode 100644 (file)
index 0000000..4687d10
--- /dev/null
@@ -0,0 +1,13 @@
+Source: onecc-docker
+Section: devel
+Priority: extra
+Maintainer: Neural Network Acceleration Solution Developers <nnfw@samsung.com>
+Build-Depends: debhelper (>=9)
+Standards-Version: 4.5.1
+Homepage: https://github.com/Samsung/ONE
+
+Package: onecc-docker
+Architecture: amd64
+Multi-Arch: foreign
+Depends: ${misc:Depends}, ${shlibs:Depends}, python3.8
+Description: On-device Neural Engine docker package
diff --git a/compiler/onecc-docker/debian/copyright b/compiler/onecc-docker/debian/copyright
new file mode 100644 (file)
index 0000000..837bb7d
--- /dev/null
@@ -0,0 +1,3 @@
+Files: *
+License: Proprietary
+Copyright (c) <2022> <Samsung Electronics Co.,Ltd.>
diff --git a/compiler/onecc-docker/debian/onecc-docker.install b/compiler/onecc-docker/debian/onecc-docker.install
new file mode 100644 (file)
index 0000000..4036253
--- /dev/null
@@ -0,0 +1,2 @@
+compiler/onecc-docker/onecc-docker /usr/share/one/bin/
+compiler/onecc-docker/docker/Dockerfile /usr/share/one/bin/docker/
diff --git a/compiler/onecc-docker/debian/onecc-docker.links b/compiler/onecc-docker/debian/onecc-docker.links
new file mode 100644 (file)
index 0000000..2374663
--- /dev/null
@@ -0,0 +1 @@
+/usr/share/one/bin/onecc-docker /usr/bin/onecc-docker
diff --git a/compiler/onecc-docker/debian/rules b/compiler/onecc-docker/debian/rules
new file mode 100644 (file)
index 0000000..cfd26cf
--- /dev/null
@@ -0,0 +1,8 @@
+#!/usr/bin/make -f
+
+%:
+       dh $@
+
+override_dh_fixperms:
+       dh_fixperms
+       chmod +x debian/onecc-docker/usr/share/one/bin/onecc-docker
diff --git a/compiler/onecc-docker/docker/Dockerfile b/compiler/onecc-docker/docker/Dockerfile
new file mode 100644 (file)
index 0000000..9752985
--- /dev/null
@@ -0,0 +1,26 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved 
+# 
+# Licensed under the Apache License, Version 2.0 (the "License"); 
+# you may not use this file except in compliance with the License. 
+# You may obtain a copy of the License at 
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0 
+# 
+# Unless required by applicable law or agreed to in writing, software 
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+# See the License for the specific language governing permissions and 
+# limitations under the License 
+
+FROM ubuntu:18.04 
+
+ARG VERSION
+
+RUN apt-get update && apt-get install -qqy --no-install-recommends \ 
+    wget \
+    ca-certificates \ 
+    && wget https://github.com/Samsung/ONE/releases/download/${VERSION}/one-compiler_${VERSION}_amd64.deb \ 
+    && apt-get install -y ./one-compiler_${VERSION}_amd64.deb \ 
+    && rm -rf /var/lib/apt/lists/* 
+ENTRYPOINT ["onecc"]
diff --git a/compiler/onecc-docker/onecc-docker b/compiler/onecc-docker/onecc-docker
new file mode 100644 (file)
index 0000000..ae3b31c
--- /dev/null
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import subprocess
+import json
+import requests
+import os
+import argparse
+
+
+def _run(cmd, is_shell=False):
+    result = subprocess.Popen(
+        cmd, shell=is_shell, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    stdout, stderr = result.communicate()
+    stdout = stdout.decode('utf-8')
+    stderr = stderr.decode('utf-8')
+
+    if result.returncode:
+        print(stderr, end='')
+        exit(result.returncode)
+    else:
+        return stdout
+
+
+def _image_exists(name):
+    cmd = ['docker', 'images', '-q', name]
+    lines = _run(cmd).splitlines()
+    return lines
+
+
+def main():
+    script_path = os.path.dirname(os.path.realpath(__file__))
+    dockerfile_path = os.path.join(script_path, 'docker')
+
+    onecc_docker_usage = 'onecc-docker [-h] [-t TOKEN] [COMMAND <args>]'
+    onecc_docker_desc = 'Run onecc via docker'
+    parser = argparse.ArgumentParser(
+        usage=onecc_docker_usage, description=onecc_docker_desc)
+    parser.add_argument(
+        "-t",
+        "--token",
+        help=
+        "Token for authentication to GitHub. This is a workaround for Rate limit exceeded error"
+    )
+
+    args, onecc_arguments = parser.parse_known_args()
+    authorization_token = args.token
+
+    LATEST_URL = "https://api.github.com/repos/Samsung/ONE/releases/latest"
+    headers = {}
+    if authorization_token:
+        headers = {"Authorization": "Bearer {}".format(authorization_token)}
+    try:
+        response = requests.get(LATEST_URL, headers=headers)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        raise SystemExit('onecc-docker: error: {}'.format(e))
+
+    versions_str = response.content
+    versions_json = json.loads(versions_str)
+    recent_version = versions_json["tag_name"]
+
+    image_name = f"onecc:{recent_version}"
+    build_arg = f"VERSION={recent_version}"
+
+    if not _image_exists(image_name):
+        build_cmd = [
+            "docker", "build", "-t", image_name, "--build-arg", build_arg, dockerfile_path
+        ]
+        print('build Docker image ...')
+        _run(build_cmd)
+        print('Dockerfile successfully built.')
+
+    contianer_name = f"onecc_{recent_version.replace('.','_')}"
+    user_cmd = ' '.join(onecc_arguments)
+
+    run_cmd = [
+        "docker", "run", "--rm", "-u", "$(id -u):$(id -g)", "--name", contianer_name,
+        "-v", "${HOME}:${HOME}", "-e", "HOME=${HOME}", "-w", "${PWD}", image_name,
+        user_cmd
+    ]
+
+    cmd = ' '.join(run_cmd)
+    output = _run(cmd, is_shell=True)
+    print(output, end='')
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        prog_name = os.path.basename(__file__)
+        print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+        sys.exit(255)
diff --git a/compiler/pics/CMakeLists.txt b/compiler/pics/CMakeLists.txt
new file mode 100644 (file)
index 0000000..80e6a1c
--- /dev/null
@@ -0,0 +1,33 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "Configure pics: FAILED (missing FlatBuffers)")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+unset(PICS_DEPS)
+
+###
+### Generate python interface for circle schema
+###
+set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle")
+
+get_target_property(SCHEMA_BIN_PATH mio_circle04 BINARY_DIR)
+
+add_custom_command(
+  OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR}
+  COMMAND "$<TARGET_FILE:flatbuffers::flatc>" --python
+          -o "${CMAKE_CURRENT_BINARY_DIR}" "${SCHEMA_BIN_PATH}/schema.fbs"
+  DEPENDS flatbuffers::flatc
+  COMMENT "Generate python interface for circle schema"
+)
+
+list(APPEND PICS_DEPS "${CIRCLE_SCHEMA_PYTHON_DIR}")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(pics ALL DEPENDS ${PICS_DEPS})
+
+install(DIRECTORY ${CIRCLE_SCHEMA_PYTHON_DIR}
+        FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+                          GROUP_READ
+                          WORLD_READ
+        DESTINATION bin)
diff --git a/compiler/pics/README.md b/compiler/pics/README.md
new file mode 100644 (file)
index 0000000..248d1b8
--- /dev/null
@@ -0,0 +1,16 @@
+# pics
+
+_pics_ is flatbuffer Python interface for circle schema.
+
+## How to use pics in your module?
+
+Add below lines to your module's `CMakeLists.txt`. It will create a symbolic link to `circle` directory under your module's binary directory.
+
+```
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+# Add dependency to ${CMAKE_CURRENT_BINARY_DIR}/circle
+```
index 96dfc86..53233cd 100644 (file)
@@ -7,12 +7,6 @@ unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
 unset(QUANTIZATION_CONFIG_VALUE_TEST)
 unset(QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM)
 
-nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
-if(NOT FlatBuffers_FOUND)
-  message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)")
-  return()
-endif(NOT FlatBuffers_FOUND)
-
 macro(addTest NAME GRANULARITY DTYPE)
   list(APPEND QUANTIZATION_VALUE_TEST ${NAME})
   list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
@@ -31,12 +25,16 @@ include("test.local.lst" OPTIONAL)
 unset(TEST_DEPS)
 
 get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
-get_target_property(SCHEMA_BIN_PATH mio_circle04 BINARY_DIR)
 
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py"
                "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY)
 
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+# TODO Run both 2.8.0 and 2.10.1 test for jammy
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_10_1")
+else(ONE_UBUNTU_CODENAME_JAMMY)
+  set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+endif(ONE_UBUNTU_CODENAME_JAMMY)
 
 ###
 ### Generate test.config
@@ -56,21 +54,13 @@ add_custom_command(
   COMMENT "Generate test configuration"
 )
 
-###
-### Generate python interface for circle schema
-###
-set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle")
-
-add_custom_command(
-  OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR}
-  COMMAND ${CMAKE_COMMAND} -E remove_directory "${CIRCLE_SCHEMA_PYTHON_DIR}"
-  COMMAND "$<TARGET_FILE:flatbuffers::flatc>" --python
-  -o "${CMAKE_CURRENT_BINARY_DIR}" "${SCHEMA_BIN_PATH}/schema.fbs"
-  DEPENDS flatbuffers::flatc
-  COMMENT "Generate python interface for circle schema"
-)
+# Import pics module
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
 
-list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CIRCLE_SCHEMA_PYTHON_DIR}")
+list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CMAKE_CURRENT_BINARY_DIR}/circle")
 
 # This enforces CMake to generate all the dependencies during "build" phase
 add_custom_target(pota_quantization_value_test_deps ALL DEPENDS ${TEST_DEPS})
@@ -109,6 +99,14 @@ add_test(
 )
 
 add_test(
+  NAME pota_parallel_record_minmax_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_parallel_record_minmax.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
+)
+
+add_test(
   NAME pota_quantization_test_with_config
   COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization_with_config.sh"
           "${TEST_CONFIG}"
@@ -118,4 +116,5 @@ add_test(
 
 set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
 set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
+set_tests_properties(pota_parallel_record_minmax_test PROPERTIES DEPENDS pota_record_minmax_test)
 set_tests_properties(pota_quantization_test_with_config PROPERTIES DEPENDS pota_fake_wquant_test_with_config)
index 5ce8dfb..e8acc81 100644 (file)
@@ -3,3 +3,4 @@ require("circle-quantizer")
 require("circle-tensordump")
 require("common-artifacts")
 require("mio-circle04")
+require("pics")
diff --git a/compiler/pota-quantization-value-test/test_parallel_record_minmax.sh b/compiler/pota-quantization-value-test/test_parallel_record_minmax.sh
new file mode 100755 (executable)
index 0000000..0af2c01
--- /dev/null
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# This script tests the parallel behavior of record-minmax
+#
+# HOW TO USE
+#
+# ./test_parallel_record_minmax.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE2CIRCLE_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py"
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found RECORD-MINMAX: ${RECORD_MINMAX_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do
+  MODELNAME=$1; shift
+  GRANULARITY=$1; shift
+  DTYPE=$1; shift
+  TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.parallel_record_minmax.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}_parallel_record_minmax.log" <(
+    exec 2>&1
+    set -ex
+    # Generate h5 input data
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+      --model "${WORKDIR}/${MODELNAME}.circle" \
+      --input "${TEST_INPUT_PATH}/${MODELNAME}/${GRANULARITY}/${DTYPE}" \
+      --output "${TESTCASE_FILE}.input.h5"
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE INPUT"
+      continue
+    fi
+    # Run parallel record-minmax
+    "${RECORD_MINMAX_PATH}" \
+      --input_model "${TEST_RESULT_FILE}.fake_quantized.circle" \
+      --input_data "${TESTCASE_FILE}.input.h5" \
+      --output_model "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle" \
+      --num_threads 4
+    # Dump min/max values (circle-tensordump)
+    "${CIRCLE_TENSORDUMP_PATH}" \
+      "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle" \
+      --tensors_to_hdf5 "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle.h5"
+    # Compare result
+    "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+      --input_h5 "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle.h5" \
+      --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}/${GRANULARITY}/${DTYPE}/record_minmax" \
+      --mode record_minmax
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
index 6363614..63fe33d 100644 (file)
@@ -44,3 +44,14 @@ add_test(
           "${NNCC_OVERLAY_DIR}/venv_2_8_0"
           ${RECORD_MINMAX_CONVERSION_TEST}
 )
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  add_test(
+    NAME record_minmax_conversion_210_test
+    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
+            "${TEST_CONFIG}"
+            "${ARTIFACTS_BIN_PATH}"
+            "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+            ${RECORD_MINMAX_CONVERSION_TEST}
+  )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
diff --git a/compiler/record-minmax-thread-safety-test/CMakeLists.txt b/compiler/record-minmax-thread-safety-test/CMakeLists.txt
new file mode 100644 (file)
index 0000000..ba0f082
--- /dev/null
@@ -0,0 +1,73 @@
+if(NOT ENABLE_TEST)
+    return()
+endif(NOT ENABLE_TEST)
+
+# Build record-minmax-for-thread-test if target arch is 64bit
+# Thread sanitizer is only available on 64bit machine
+# (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#supported-platforms)
+if(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+    return()
+endif(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+
+unset(RECORD_MINMAX_THREAD_SAFETY_TEST)
+
+macro(addTest NAME)
+    list(APPEND RECORD_MINMAX_THREAD_SAFETY_TEST ${NAME})
+endmacro(addTest)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+        OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
+        COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
+        WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+        DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
+        VERBATIM
+)
+set(RECORD_MINMAX_PATH "$<TARGET_FILE:record-minmax-for-thread-test>")
+
+add_custom_command(
+        OUTPUT ${TEST_CONFIG}
+        COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+        COMMAND ${CMAKE_COMMAND} -E echo 'RECORD_MINMAX_PATH=\"$<TARGET_FILE:record-minmax-for-thread-test>\"' >> ${TEST_CONFIG}
+        DEPENDS record-minmax-for-thread-test
+        COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(record_minmax_thread_safety_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+        NAME record_minmax_thread_safety_test
+        COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
+        "${TEST_CONFIG}"
+        "${ARTIFACTS_BIN_PATH}"
+        "${NNCC_OVERLAY_DIR}/venv_2_8_0"
+        ${RECORD_MINMAX_THREAD_SAFETY_TEST}
+)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+    add_test(
+            NAME record_minmax_thread_safety_210_test
+            COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
+            "${TEST_CONFIG}"
+            "${ARTIFACTS_BIN_PATH}"
+            "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+            ${RECORD_MINMAX_THREAD_SAFETY_TEST}
+    )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
diff --git a/compiler/record-minmax-thread-safety-test/gen_h5_random_inputs.py b/compiler/record-minmax-thread-safety-test/gen_h5_random_inputs.py
new file mode 100644 (file)
index 0000000..d57289a
--- /dev/null
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+import h5py as h5
+import numpy as np
+import tensorflow as tf
+import argparse
+
+#
+# This script generates a pack of random input data (.h5) expected by the input tflite model
+#
+# Basic usage:
+#   gen_h5_inputs.py --model <path/to/tflite/model> --num_data <number/of/data> --output <path/to/output/data>
+#   ex: gen_h5_inputs.py --model add.tflite --num_data 3 --output add.tflite.input.h5
+#   (This will create add.tflite.input.h5 composed of three random inputs in the same directory as the model)
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--num_data', type=int, required=True)
+parser.add_argument('--output', type=str, required=True)
+args = parser.parse_args()
+
+model = args.model
+
+num_data = args.num_data
+
+output_path = args.output
+
+# Build TFLite interpreter. (to get the information of model input)
+interpreter = tf.lite.Interpreter(model)
+input_details = interpreter.get_input_details()
+
+# Create h5 file
+h5_file = h5.File(output_path, 'w')
+group = h5_file.create_group("value")
+group.attrs['desc'] = "Input data for " + model
+
+# Generate random data
+for i in range(num_data):
+    sample = group.create_group(str(i))
+    sample.attrs['desc'] = "Input data " + str(i)
+
+    for j in range(len(input_details)):
+        input_detail = input_details[j]
+        print(input_detail["dtype"])
+        if input_detail["dtype"] == np.bool_:
+            # Generate random bool [0, 1]
+            input_data = np.array(
+                np.random.random_integers(0, 1, input_detail["shape"]),
+                input_detail["dtype"])
+        elif input_detail["dtype"] == np.float32:
+            # Generate random input [-5, 5)
+            input_data = np.array(10 * np.random.random_sample(input_detail["shape"]) - 5,
+                                  input_detail["dtype"])
+        sample.create_dataset(str(j), data=input_data)
+
+h5_file.close()
diff --git a/compiler/record-minmax-thread-safety-test/requires.cmake b/compiler/record-minmax-thread-safety-test/requires.cmake
new file mode 100644 (file)
index 0000000..9105c3e
--- /dev/null
@@ -0,0 +1,2 @@
+require("common-artifacts")
+require("record-minmax")
diff --git a/compiler/record-minmax-thread-safety-test/test.lst b/compiler/record-minmax-thread-safety-test/test.lst
new file mode 100644 (file)
index 0000000..771c3bd
--- /dev/null
@@ -0,0 +1,16 @@
+addTest(Add_000)
+addTest(AveragePool2D_000)
+addTest(Concatenation_000)
+addTest(Conv2D_000)
+addTest(Conv2D_001)
+addTest(Conv2D_002)
+addTest(DepthwiseConv2D_000)
+addTest(FullyConnected_000)
+addTest(FullyConnected_001)
+addTest(MaxPool2D_000)
+addTest(Mul_000)
+addTest(Pad_000)
+addTest(Reshape_000)
+addTest(Reshape_001)
+addTest(Reshape_002)
+addTest(Softmax_000)
diff --git a/compiler/record-minmax-thread-safety-test/testall.sh b/compiler/record-minmax-thread-safety-test/testall.sh
new file mode 100755 (executable)
index 0000000..4b47b3e
--- /dev/null
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# This script tests the parallel behavior of record-minmax
+#
+# HOW TO USE
+#
+# ./testall.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH}
+# work_dir : build directory of record-minmax-thread-safety-test (ex: build/compiler/record-minmax-thread-safety-test)
+
+GEN_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+GEN_SCRIPT_PATH="${GEN_SOURCE_PATH}/gen_h5_random_inputs.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "$CONFIG_PATH")
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found RECORD-MINMAX: ${RECORD_MINMAX_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+
+  PASSED_TAG="${BIN_PATH}/${TESTCASE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${BIN_PATH}/${TESTCASE}.log" <(
+    exec 2>&1
+    set -ex
+    # Generate h5 input data
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+    --model "${TESTCASE_FILE}.tflite" \
+    --num_data 8 \
+    --output "${BIN_PATH}/${TESTCASE}.tflite.input.h5"
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE INPUT"
+      continue
+    fi
+    # Run record-minmax in parallel mode
+    "${RECORD_MINMAX_PATH}" \
+      --input_model "${TESTCASE_FILE}.circle" \
+      --input_data "${BIN_PATH}/${TESTCASE}.tflite.input.h5" \
+      --output_model "${BIN_PATH}/${TESTCASE}.out.circle" \
+      --num_threads 4
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE CIRCLE OUTPUT"
+      continue
+    fi
+  )
+
+  if ! grep -q "ThreadSanitizer: data race" "${BIN_PATH}/${TESTCASE}.log"; then
+    touch "${PASSED_TAG}"
+  fi
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
index b9c08f4..391e35b 100644 (file)
@@ -11,9 +11,11 @@ target_link_libraries(record-minmax luci_import)
 target_link_libraries(record-minmax luci_env)
 target_link_libraries(record-minmax luci_export)
 target_link_libraries(record-minmax luci_interpreter)
+target_link_libraries(record-minmax luci_log)
 target_link_libraries(record-minmax dio_hdf5)
 target_link_libraries(record-minmax vconone)
 target_link_libraries(record-minmax nncc_coverage)
+target_link_libraries(record-minmax nncc_common)
 
 install(TARGETS record-minmax DESTINATION bin)
 
@@ -21,6 +23,31 @@ if(NOT ENABLE_TEST)
   return()
 endif(NOT ENABLE_TEST)
 
+# Build record-minmax-for-thread-test if target arch is 64bit
+# Thread sanitizer is only available on 64bit machine
+# (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#supported-platforms)
+if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+  # create record-minmax-for-thread-test target
+  # Note: record-minmax-for-thread-test is built with -fsanitize=thread so that thread sanitizer can check memory bugs,
+  # record-minmax is built without the option for performance.
+  add_executable(record-minmax-for-thread-test ${DRIVER} ${SOURCES})
+  target_include_directories(record-minmax-for-thread-test PRIVATE include)
+
+  target_link_libraries(record-minmax-for-thread-test arser)
+  target_link_libraries(record-minmax-for-thread-test safemain)
+  target_link_libraries(record-minmax-for-thread-test luci_import)
+  target_link_libraries(record-minmax-for-thread-test luci_env)
+  target_link_libraries(record-minmax-for-thread-test luci_export)
+  target_link_libraries(record-minmax-for-thread-test luci_interpreter)
+  target_link_libraries(record-minmax-for-thread-test dio_hdf5)
+  target_link_libraries(record-minmax-for-thread-test vconone)
+  target_link_libraries(record-minmax-for-thread-test nncc_coverage)
+  target_link_libraries(record-minmax-for-thread-test luci_log)
+
+  target_compile_options(record-minmax-for-thread-test PUBLIC -fsanitize=thread)
+  target_link_libraries(record-minmax-for-thread-test -fsanitize=thread)
+endif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+
 file(GLOB_RECURSE TESTS "tests/*.test.cpp")
 
 nnas_find_package(GTest REQUIRED)
index faa402f..d8ed95e 100644 (file)
@@ -21,6 +21,9 @@
 
 #include <luci/UserSettings.h>
 
+// TODO declare own log signature of record-minmax
+#include <luci/Log.h>
+
 void print_version(void)
 {
   std::cout << "record-minmax version " << vconone::get_string() << std::endl;
@@ -31,6 +34,8 @@ int entry(const int argc, char **argv)
 {
   using namespace record_minmax;
 
+  LOGGER(l);
+
   arser::Arser arser(
     "Embedding min/max values of activations to the circle model for post-training quantization");
 
@@ -50,6 +55,10 @@ int entry(const int argc, char **argv)
     .type(arser::DataType::FLOAT)
     .help("Record n'th percentile of min");
 
+  arser.add_argument("--num_threads")
+    .type(arser::DataType::INT32)
+    .help("Number of threads (default: 1)");
+
   arser.add_argument("--max_percentile")
     .type(arser::DataType::FLOAT)
     .help("Record n'th percentile of max");
@@ -92,10 +101,17 @@ int entry(const int argc, char **argv)
   float min_percentile = 1.0;
   float max_percentile = 99.0;
   std::string input_data_format("h5");
+  uint32_t num_threads = 1;
 
   if (arser["--min_percentile"])
     min_percentile = arser.get<float>("--min_percentile");
 
+  if (arser["--num_threads"])
+    num_threads = arser.get<int>("--num_threads");
+
+  if (num_threads < 1)
+    throw std::runtime_error("The number of threads must be greater than zero");
+
   if (arser["--max_percentile"])
     max_percentile = arser.get<float>("--max_percentile");
 
@@ -111,7 +127,13 @@ int entry(const int argc, char **argv)
   if (arser["--input_data_format"])
     input_data_format = arser.get<std::string>("--input_data_format");
 
-  RecordMinMax rmm;
+  RecordMinMax rmm(num_threads);
+
+  // TODO: support parallel record for profile with random data
+  if (num_threads > 1 and not arser["--input_data"])
+  {
+    throw std::runtime_error("Input data must be given for parallel recording");
+  }
 
   // Initialize interpreter and observer
   rmm.initialize(input_model_path);
@@ -120,10 +142,22 @@ int entry(const int argc, char **argv)
   {
     auto input_data_path = arser.get<std::string>("--input_data");
 
+    // TODO: support parallel record from file and dir input data format
+    if (num_threads > 1 and not(input_data_format == "h5") and not(input_data_format == "hdf5"))
+    {
+      throw std::runtime_error("Parallel recording is used only for h5 now");
+    }
+
     if (input_data_format == "h5" || input_data_format == "hdf5")
     {
       // Profile min/max while executing the H5 data
-      rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+      if (num_threads == 1)
+        rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+      else
+      {
+        INFO(l) << "Using parallel recording" << std::endl;
+        rmm.profileDataInParallel(mode, input_data_path, min_percentile, max_percentile);
+      }
     }
     // input_data is a text file having a file path in each line.
     // Each data file is composed of inputs of a model, concatenated in
index ce63438..c34ac8e 100644 (file)
@@ -43,6 +43,15 @@ public:
     vectors.max_vector.push_back(max);
   }
 
+  void appendMinMaxVector(const luci::CircleNode *node, const MinMaxVectors &minmax_vector)
+  {
+    MinMaxVectors &vectors = _minmax_map[node];
+    vectors.min_vector.insert(vectors.min_vector.end(), minmax_vector.min_vector.begin(),
+                              minmax_vector.min_vector.end());
+    vectors.max_vector.insert(vectors.max_vector.end(), minmax_vector.max_vector.begin(),
+                              minmax_vector.max_vector.end());
+  }
+
   const std::unordered_map<const luci::CircleNode *, MinMaxVectors> *getMap() const
   {
     return &_minmax_map;
index 5b993e4..d5da01c 100644 (file)
@@ -69,13 +69,13 @@ float getMovingAverage(const std::vector<float> &vector, const float alpha,
   assert(alpha >= 0.0 && alpha <= 1.0);
   assert(batch_size > 0);
 
-  auto getBatchMinOrMax = [&](int start_index) {
-    assert(start_index >= 0 && start_index < vector.size());
+  auto getBatchMinOrMax = [&](uint32_t start_index) {
+    assert(start_index < vector.size());
 
     float res = is_min ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
-    for (int offset = 0; offset < batch_size; offset++)
+    for (uint32_t offset = 0; offset < batch_size; offset++)
     {
-      int index = start_index + offset;
+      uint32_t index = start_index + offset;
       if (index >= vector.size())
         break;
 
@@ -92,7 +92,7 @@ float getMovingAverage(const std::vector<float> &vector, const float alpha,
   };
 
   float curr_avg = getBatchMinOrMax(0);
-  for (size_t i = batch_size; i < vector.size(); i += batch_size)
+  for (uint32_t i = batch_size; i < vector.size(); i += batch_size)
   {
     curr_avg = curr_avg * alpha + getBatchMinOrMax(i) * (1.0 - alpha);
   }
index aaf1161..2a7b88f 100644 (file)
 #include "MinMaxObserver.h"
 
 #include <memory>
+#include <thread>
 
 namespace record_minmax
 {
 
+using Buffer = std::vector<char>;
+using Output = std::vector<Buffer>;
+using WholeOutput = std::vector<Output>;
+
 class RecordMinMax
 {
 public:
-  explicit RecordMinMax() = default;
+  explicit RecordMinMax(uint32_t num_threads) : _threads_size(num_threads)
+  {
+    assert(_threads_size > 0);
+  }
 
   ~RecordMinMax() = default;
 
@@ -39,6 +47,9 @@ public:
   void profileData(const std::string &mode, const std::string &input_data_path,
                    float min_percentile, float max_percentile);
 
+  void profileDataInParallel(const std::string &mode, const std::string &input_data_path,
+                             float min_percentile, float max_percentile);
+
   void profileRawData(const std::string &mode, const std::string &input_data_path,
                       float min_percentile, float max_percentile);
 
@@ -51,9 +62,18 @@ public:
   void saveModel(const std::string &output_model_path);
 
 private:
+  luci_interpreter::Interpreter *getInterpreter() const { return _interpreters[0].get(); }
+  MinMaxObserver *getObserver() const { return _observers[0].get(); }
+
+  WholeOutput importH5Data(const std::string &input_data_path);
+
   std::unique_ptr<luci::Module> _module;
-  std::unique_ptr<luci_interpreter::Interpreter> _interpreter;
-  std::unique_ptr<MinMaxObserver> _observer;
+
+  // Multiple interpreters are used for parallel execution
+  std::vector<std::unique_ptr<luci_interpreter::Interpreter>> _interpreters;
+  std::vector<std::unique_ptr<MinMaxObserver>> _observers;
+
+  uint32_t _threads_size = 0;
 };
 
 } // namespace record_minmax
index 6dbf98d..c3da83a 100644 (file)
@@ -22,6 +22,7 @@
 #include <luci/CircleExporter.h>
 #include <luci/CircleFileExpContract.h>
 #include <luci/IR/CircleQuantParam.h>
+#include <luci/Log.h>
 #include <dio_hdf5/HDF5Importer.h>
 
 #include <dirent.h>
@@ -39,6 +40,17 @@ using DataType = loco::DataType;
 namespace
 {
 
+// Max h5 file size for parallel recording in bytes = 1 GB
+const long h5_max_size_bytes = 1000000000;
+
+long getH5FileSize(const std::string &input_data_path)
+{
+  std::ifstream in_file(input_data_path, std::ios::binary);
+  in_file.seekg(0, std::ios::end);
+
+  return in_file.tellg();
+}
+
 uint32_t numElements(const luci::CircleNode *node)
 {
   uint32_t num_elements = 1;
@@ -70,6 +82,8 @@ void readDataFromFile(const std::string &filename, std::vector<char> &data, size
     throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
   if (fs.read(data.data(), data_size).fail())
     throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+  if (fs.peek() != EOF)
+    throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
 }
 
 std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements)
@@ -169,6 +183,8 @@ namespace record_minmax
 
 void RecordMinMax::initialize(const std::string &input_model_path)
 {
+  assert(_threads_size > 0);
+
   // Load model from the file
   std::ifstream fs(input_model_path, std::ifstream::binary);
   if (fs.fail())
@@ -199,12 +215,20 @@ void RecordMinMax::initialize(const std::string &input_model_path)
     throw std::runtime_error("Failed to load '" + input_model_path + "'");
   }
 
-  // Initialize interpreter
-  _interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+  // Create and initialize interpreters and observers
+  _interpreters.resize(_threads_size);
+  _observers.resize(_threads_size);
 
-  _observer = std::make_unique<MinMaxObserver>();
+  for (uint32_t thread_idx = 0; thread_idx < _threads_size; ++thread_idx)
+  {
+    auto interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+    auto observer = std::make_unique<MinMaxObserver>();
 
-  _interpreter->attachObserver(_observer.get());
+    interpreter->attachObserver(observer.get());
+
+    _observers[thread_idx] = std::move(observer);
+    _interpreters[thread_idx] = std::move(interpreter);
+  }
 }
 
 // input_data_path is a path to the directory
@@ -235,7 +259,7 @@ void RecordMinMax::profileRawDataDirectory(const std::string &mode,
     total_input_size += getTensorSize(input_node);
   }
 
-  while (entry = readdir(dp))
+  while ((entry = readdir(dp)))
   {
     // Skip if the entry is not a regular file
     if (entry->d_type != DT_REG)
@@ -256,12 +280,12 @@ void RecordMinMax::profileRawDataDirectory(const std::string &mode,
     {
       const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
       const auto input_size = getTensorSize(input_node);
-      _interpreter->writeInputTensor(input_node, input_data.data() + offset, input_size);
+      getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size);
 
       offset += input_size;
     }
 
-    _interpreter->interpret();
+    getInterpreter()->interpret();
 
     num_records++;
   }
@@ -273,7 +297,7 @@ void RecordMinMax::profileRawDataDirectory(const std::string &mode,
 
   std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
 
-  update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+  update_quantparam(getObserver(), mode, min_percentile, max_percentile);
 }
 
 // input_data_path is a text file which specifies the representative data
@@ -318,12 +342,12 @@ void RecordMinMax::profileRawData(const std::string &mode, const std::string &in
     {
       const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
       const auto input_size = getTensorSize(input_node);
-      _interpreter->writeInputTensor(input_node, input_data.data() + offset, input_size);
+      getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size);
 
       offset += input_size;
     }
 
-    _interpreter->interpret();
+    getInterpreter()->interpret();
 
     num_records++;
   }
@@ -333,7 +357,65 @@ void RecordMinMax::profileRawData(const std::string &mode, const std::string &in
 
   std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
 
-  update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+  update_quantparam(getObserver(), mode, min_percentile, max_percentile);
+}
+
+WholeOutput RecordMinMax::importH5Data(const std::string &input_data_path)
+{
+  try
+  {
+    dio::hdf5::HDF5Importer importer(input_data_path);
+    importer.importGroup("value");
+
+    bool is_raw_data = importer.isRawData();
+
+    const auto num_records = importer.numData();
+    if (num_records == 0)
+      throw std::runtime_error("The input data file does not contain any record.");
+
+    const auto input_nodes = loco::input_nodes(_module->graph());
+    const auto num_inputs = input_nodes.size();
+
+    WholeOutput whole_output(num_records);
+
+    // Read inputs to whole_output
+    for (int i = 0; i < num_records; ++i)
+    {
+      if (num_inputs != static_cast<uint32_t>(importer.numInputs(i)))
+        throw std::runtime_error("Wrong number of inputs.");
+
+      for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+      {
+        const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+        assert(input_node->index() == input_idx);
+        checkInputDimension(input_node);
+        Buffer input_data(getTensorSize(input_node));
+
+        if (!is_raw_data)
+        {
+          DataType dtype;
+          Shape shape;
+          importer.readTensor(i, input_idx, &dtype, &shape, input_data.data());
+
+          // Check the type and the shape of the input data is valid
+          verifyTypeShape(input_node, dtype, shape);
+        }
+        else
+        {
+          // Skip type/shape check for raw data
+          importer.readTensor(i, input_idx, input_data.data());
+        }
+        whole_output[i].emplace_back(std::move(input_data));
+      }
+    }
+
+    return whole_output;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
+  }
 }
 
 void RecordMinMax::profileData(const std::string &mode, const std::string &input_data_path,
@@ -355,12 +437,12 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
 
     for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
     {
-      if (num_inputs != importer.numInputs(record_idx))
+      if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
         throw std::runtime_error("Wrong number of inputs.");
 
       std::cout << "Recording " << record_idx << "'th data" << std::endl;
 
-      for (int32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+      for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
       {
         const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
         assert(input_node->index() == input_idx);
@@ -384,10 +466,10 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
 
         // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
         //       We can redcue the copy by directly writing data from file to interpreter inputs
-        _interpreter->writeInputTensor(input_node, input_data.data(), input_data.size());
+        getInterpreter()->writeInputTensor(input_node, input_data.data(), input_data.size());
       }
 
-      _interpreter->interpret();
+      getInterpreter()->interpret();
     }
 
     std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
@@ -398,7 +480,96 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
     throw std::runtime_error("HDF5 error occurred.");
   }
 
-  update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+  update_quantparam(getObserver(), mode, min_percentile, max_percentile);
+}
+
+void RecordMinMax::profileDataInParallel(const std::string &mode,
+                                         const std::string &input_data_path, float min_percentile,
+                                         float max_percentile)
+{
+  LOGGER(l);
+
+  assert(_interpreters.size() == _threads_size);
+  assert(_observers.size() == _threads_size);
+
+  const long h5_file_size = getH5FileSize(input_data_path);
+
+  if (h5_file_size > h5_max_size_bytes)
+    throw std::runtime_error("H5 file size is too large for parallel recording");
+
+  WholeOutput whole_output;
+  try
+  {
+    whole_output = importH5Data(input_data_path);
+  }
+  catch (const std::bad_alloc &e)
+  {
+    throw std::runtime_error("Out of memory during h5 data load.");
+  }
+
+  const auto num_records = whole_output.size();
+  const auto input_nodes = loco::input_nodes(_module->graph());
+
+  // Start parallel part
+  INFO(l) << _threads_size << " concurrent threads are supported." << std::endl;
+
+  const auto run_threads = num_records < _threads_size ? num_records : _threads_size;
+
+  const auto records_batch = static_cast<uint32_t>(num_records / run_threads);
+
+  auto interpret_batch = [&whole_output, &input_nodes](int first_record, int last_record,
+                                                       luci_interpreter::Interpreter *interpreter) {
+    for (int record_index = first_record; record_index < last_record; ++record_index)
+    {
+      for (uint32_t input_idx = 0; input_idx < input_nodes.size(); input_idx++)
+      {
+        const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+
+        const auto &cur_input_data = whole_output[record_index][input_idx];
+        interpreter->writeInputTensor(input_node, cur_input_data.data(), cur_input_data.size());
+      }
+      interpreter->interpret();
+    }
+  };
+
+  std::vector<std::thread> threads;
+  for (uint32_t t = 0; t < run_threads; ++t)
+  {
+    if (t < run_threads - 1)
+    {
+      threads.emplace_back(interpret_batch, records_batch * t, records_batch * (t + 1),
+                           _interpreters[t].get());
+    }
+    else
+    {
+      threads.emplace_back(interpret_batch, records_batch * t, num_records, _interpreters[t].get());
+    }
+  }
+
+  for (uint32_t i = 0; i < run_threads; ++i)
+    threads.at(i).join();
+
+  // End parallel part
+
+  // Copy all min, max values to one observer
+  auto observer = std::make_unique<MinMaxObserver>();
+  auto main_min_max_map = const_cast<MinMaxMap *>(observer->minMaxData());
+
+  for (const auto &obs : _observers)
+  {
+    const auto cur_minmax_map = obs->minMaxData()->getMap();
+    for (auto &iter : *cur_minmax_map)
+    {
+      const auto node = iter.first;
+      const auto &minmax = iter.second;
+
+      main_min_max_map->appendMinMaxVector(node, minmax);
+    }
+  }
+
+  std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+
+  update_quantparam(observer.get(), mode, min_percentile, max_percentile);
 }
 
 void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float min_percentile,
@@ -414,11 +585,11 @@ void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float mi
   std::mt19937 gen(rd());
   std::uniform_real_distribution<> dist(-5, 5);
 
-  for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+  for (uint32_t record_idx = 0; record_idx < num_records; record_idx++)
   {
     std::cout << "Recording " << record_idx << "'th data" << std::endl;
 
-    for (int32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+    for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
     {
       const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
       assert(input_node->index() == input_idx);
@@ -442,35 +613,35 @@ void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float mi
 
         // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
         //       We can redcue the copy by directly writing data from file to interpreter inputs
-        _interpreter->writeInputTensor(input_node, input_data.data(),
-                                       input_data.size() * sizeof(float));
+        getInterpreter()->writeInputTensor(input_node, input_data.data(),
+                                           input_data.size() * sizeof(float));
       }
       else if (input_node->dtype() == DataType::BOOL)
       {
         auto input_data = genRandomBoolData(gen, num_elements);
-        _interpreter->writeInputTensor(input_node, input_data.data(),
-                                       input_data.size() * sizeof(uint8_t));
+        getInterpreter()->writeInputTensor(input_node, input_data.data(),
+                                           input_data.size() * sizeof(uint8_t));
       }
       else if (input_node->dtype() == DataType::S32)
       {
         auto input_data = genRandomIntData<int32_t>(gen, num_elements, 0, 100);
-        _interpreter->writeInputTensor(input_node, input_data.data(),
-                                       input_data.size() * sizeof(int32_t));
+        getInterpreter()->writeInputTensor(input_node, input_data.data(),
+                                           input_data.size() * sizeof(int32_t));
       }
       else if (input_node->dtype() == DataType::S64)
       {
         auto input_data = genRandomIntData<int64_t>(gen, num_elements, 0, 100);
-        _interpreter->writeInputTensor(input_node, input_data.data(),
-                                       input_data.size() * sizeof(int64_t));
+        getInterpreter()->writeInputTensor(input_node, input_data.data(),
+                                           input_data.size() * sizeof(int64_t));
       }
     }
 
-    _interpreter->interpret();
+    getInterpreter()->interpret();
   }
 
   std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
 
-  update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+  update_quantparam(getObserver(), mode, min_percentile, max_percentile);
 }
 
 void RecordMinMax::saveModel(const std::string &output_model_path)
index 53a62ca..a180e24 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <cassert>
 #include <stdexcept>
+#include <limits> // std::numeric_limits
 
 #include <fp16.h>
 
index 852018e..f0ba921 100644 (file)
@@ -140,7 +140,7 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TEST_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TEST_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TEST_CONFIG}
-  COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh\"' >> ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py\"' >> ${TEST_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E echo 'RUNTIME_LIBRARY_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/Product/out/\"' >> ${TEST_CONFIG}
   DEPENDS
     nnkit-run
index c80b00a..56ef070 100755 (executable)
@@ -102,7 +102,7 @@ while [[ $# -ne 0 ]]; do
       --post-arg "${WORKDIR}/${PREFIX}.expected.h5"
 
     # Generate nnpackage model
-    "${MODEL2NNPKG_PATH}" -o "${WORKDIR}" "${WORKDIR}/${PREFIX}.circle"
+    "${MODEL2NNPKG_PATH}" -o "${WORKDIR}" -m "${WORKDIR}/${PREFIX}.circle"
 
     # Copy h5 files into nnpackage
     mkdir -p "${WORKDIR}/${PREFIX}/metadata/tc"
index 4124058..0520ad0 100644 (file)
@@ -27,6 +27,8 @@
 
 #include <fcntl.h>
 
+#include <limits> // std::numeric_limits
+
 namespace
 {
 bool load_text(const cwrap::Fildes &fildes, tf2circle::CustomOpInfoDef &def)
index 7399a43..c50c17f 100644 (file)
@@ -27,6 +27,8 @@
 
 #include <fcntl.h>
 
+#include <limits> // std::numeric_limits
+
 namespace
 {
 bool load_text(const cwrap::Fildes &fildes, tf2tflite::CustomOpInfoDef &def)
index 97aa07f..ded4162 100644 (file)
@@ -76,7 +76,12 @@ list(APPEND TEST_DEPS "${TEST_RUNNER}")
 
 get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
 
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+# TODO Run both 2.8.0 and 2.10.1 test for jammy
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_10_1")
+else()
+  set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+endif()
 
 ###
 ### Generate test.config
index 7c3aab0..c16e601 100644 (file)
@@ -36,15 +36,22 @@ circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop)
 
 int8_t get_circle_builtin_code(int8_t tfl_bop_i8)
 {
-  tflite::BuiltinOperator tfl_bop = static_cast<tflite::BuiltinOperator>(tfl_bop_i8);
+  return get_circle_builtin_code(static_cast<int32_t>(tfl_bop_i8));
+}
+
+int32_t get_circle_builtin_code(int32_t tfl_bop_i32)
+{
+  tflite::BuiltinOperator tfl_bop = static_cast<tflite::BuiltinOperator>(tfl_bop_i32);
 
   switch (tfl_bop)
   {
 #define TFL_OPERATOR(OP)             \
   case tflite::BuiltinOperator_##OP: \
-    return static_cast<int8_t>(circle::BuiltinOperator_##OP);
+    return static_cast<int32_t>(circle::BuiltinOperator_##OP);
 #include "TFLOperator.lst"
 #undef TFL_OPERATOR
+    case tflite::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
+      return static_cast<int32_t>(circle::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES);
     default:
       throw std::runtime_error("tflite2circle: wrong op");
   }
index 5aeeb6e..f346b01 100644 (file)
@@ -31,6 +31,7 @@ namespace tflite2circle
 circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop);
 
 int8_t get_circle_builtin_code(int8_t tfl_bop_i8);
+int32_t get_circle_builtin_code(int32_t tfl_bop_i32);
 
 /**
  * @brief Returns circle TensorType according to tflite.
index 942c846..72a29fc 100644 (file)
@@ -131,3 +131,23 @@ TFL_OPERATOR(SELECT_V2)
 TFL_OPERATOR(DENSIFY)
 TFL_OPERATOR(SEGMENT_SUM)
 TFL_OPERATOR(BATCH_MATMUL)
+// PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+TFL_OPERATOR(CUMSUM)
+TFL_OPERATOR(CALL_ONCE)
+TFL_OPERATOR(BROADCAST_TO)
+TFL_OPERATOR(RFFT2D)
+TFL_OPERATOR(CONV_3D)
+TFL_OPERATOR(IMAG)
+TFL_OPERATOR(REAL)
+TFL_OPERATOR(COMPLEX_ABS)
+TFL_OPERATOR(HASHTABLE)
+TFL_OPERATOR(HASHTABLE_FIND)
+TFL_OPERATOR(HASHTABLE_IMPORT)
+TFL_OPERATOR(HASHTABLE_SIZE)
+TFL_OPERATOR(REDUCE_ALL)
+TFL_OPERATOR(CONV_3D_TRANSPOSE)
+TFL_OPERATOR(VAR_HANDLE)
+TFL_OPERATOR(READ_VARIABLE)
+TFL_OPERATOR(ASSIGN_VARIABLE)
+TFL_OPERATOR(BROADCAST_ARGS)
+TFL_OPERATOR(RANDOM_STANDARD_NORMAL)
index 93c33cd..38e8686 100644 (file)
@@ -1,5 +1,5 @@
 if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x0000000000150001)
+  set(VCONONE_VERSION 0x0000000000160001)
   # NOTE order is [build patch minor major]
   # if VCONONE_VERSION is set with -D option, it will be cached
   # you may have to remove cache file if you remove -D option
diff --git a/compiler/visq-unittest/CMakeLists.txt b/compiler/visq-unittest/CMakeLists.txt
new file mode 100644 (file)
index 0000000..4eefa8d
--- /dev/null
@@ -0,0 +1,66 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+unset(VISQ_TEST_DEPS)
+
+###
+### Copy test files
+###
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/test
+                   COMMAND ${CMAKE_COMMAND} -E copy_directory
+                   ${CMAKE_CURRENT_SOURCE_DIR}/test ${CMAKE_CURRENT_BINARY_DIR}/test)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/test)
+
+###
+### Import visqlib module
+###
+get_target_property(VISQ_BIN_PATH visq BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/visqlib
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${VISQ_BIN_PATH}/visqlib ${CMAKE_CURRENT_BINARY_DIR}/visqlib)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/visqlib)
+
+###
+### Import pics module
+###
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+                   COMMAND ${CMAKE_COMMAND} -E create_symlink
+                   ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+###
+### Generate Resources.py
+###
+set(RESOURCE_FILE "${CMAKE_CURRENT_BINARY_DIR}/test/Resources.py")
+
+get_target_property(FP32_MODEL_DIR testDataGenerator BINARY_DIR)
+
+add_custom_command(
+  OUTPUT ${RESOURCE_FILE}
+  COMMAND ${CMAKE_COMMAND} -E echo 'fp32_model_dir=\"${FP32_MODEL_DIR}\"' >> ${RESOURCE_FILE}
+  COMMENT "Generate file to specify resource location"
+)
+
+list(APPEND VISQ_TEST_DEPS ${RESOURCE_FILE})
+
+add_custom_target(visq_unittest ALL DEPENDS ${VISQ_TEST_DEPS})
+
+# Use Python in venv to run unittest with pydot module
+add_test(
+  NAME visq_unittest
+  COMMAND ${NNCC_OVERLAY_DIR}/venv_2_8_0/bin/python -m unittest
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+  add_test(
+    NAME visq_210_unittest
+    COMMAND ${NNCC_OVERLAY_DIR}/venv_2_10_1/bin/python -m unittest
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
diff --git a/compiler/visq-unittest/README.md b/compiler/visq-unittest/README.md
new file mode 100644 (file)
index 0000000..e90837b
--- /dev/null
@@ -0,0 +1,3 @@
+# visq-unittest
+
+_visq-unittest_ is a module to test visq
diff --git a/compiler/visq-unittest/requires.cmake b/compiler/visq-unittest/requires.cmake
new file mode 100644 (file)
index 0000000..bf7a41f
--- /dev/null
@@ -0,0 +1,3 @@
+require("pics")
+require("common-artifacts")
+require("visq")
diff --git a/compiler/visq-unittest/test/__init__.py b/compiler/visq-unittest/test/__init__.py
new file mode 100644 (file)
index 0000000..0c29109
--- /dev/null
@@ -0,0 +1 @@
+# DO NOT REMOVE THIS FILE
diff --git a/compiler/visq-unittest/test/testDotBuilder.py b/compiler/visq-unittest/test/testDotBuilder.py
new file mode 100644 (file)
index 0000000..b657d60
--- /dev/null
@@ -0,0 +1,44 @@
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test visqlib.DotBuilder module"""
+
+import unittest
+import pydot
+from pathlib import Path
+
+from visqlib.DotBuilder import DotBuilder
+from test.Resources import fp32_model_dir
+
+
+class VisqDotBuilderTest(unittest.TestCase):
+    def test_dot_builder_wrong_input_file(self):
+        self.assertRaises(FileNotFoundError, DotBuilder, "wrong", "wrong", "wrong",
+                          "wrong")
+
+    def test_dot_builder(self):
+        test_colors = [{"b": 0, "e": 0.5, "c": "green"}, {"b": 0.5, "e": 1, "c": "red"}]
+        test_qerror_map = dict()
+        test_qerror_map["ofm"] = 0.1
+        builder = DotBuilder(fp32_model_dir + "/Add_000.circle", "Add_000.dot", "MPEIR",
+                             test_colors)
+        builder.save(test_qerror_map)
+
+        graph = pydot.graph_from_dot_file("Add_000.dot")[0]
+        # Why 1? 0 is output
+        ofm_node = graph.get_node("\"ofm\"")[1]
+        self.assertEqual("green", ofm_node.get_fillcolor())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/compiler/visq-unittest/test/testPalette.py b/compiler/visq-unittest/test/testPalette.py
new file mode 100644 (file)
index 0000000..bf5fbb4
--- /dev/null
@@ -0,0 +1,42 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.Palette module'''
+
+import unittest
+
+from visqlib.Palette import YLORRD9Palette
+
+
+class VisqPaletteTest(unittest.TestCase):
+    def test_ylorrd9(self):
+        min_test = [0.0, 0, -100, -100]
+        max_test = [1.0, 500, 100, -10]
+
+        for min_val, max_val in zip(min_test, max_test):
+            palette = YLORRD9Palette(qerror_min=min_val, qerror_max=max_val)
+            cs = palette.colorscheme()
+            self.assertEqual(9, len(cs))
+
+    def test_ylorrd9_wrong_minmax(self):
+        min_test = [0.0, 10]
+        max_test = [0.0, 0]
+
+        for min_val, max_val in zip(min_test, max_test):
+            # min must be less than max
+            self.assertRaises(
+                RuntimeError, YLORRD9Palette, qerror_min=min_val, qerror_max=max_val)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/compiler/visq-unittest/test/testQErrorComputer.py b/compiler/visq-unittest/test/testQErrorComputer.py
new file mode 100644 (file)
index 0000000..1c6b185
--- /dev/null
@@ -0,0 +1,150 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.QErrorComputer module'''
+
+import unittest
+import tempfile
+import numpy as np
+import os
+
+from visqlib.QErrorComputer import MPEIRComputer
+from visqlib.QErrorComputer import MSEComputer
+from visqlib.QErrorComputer import TAEComputer
+
+
+class VisqQErrorComputerTest(unittest.TestCase):
+    def setUp(self):
+        "Called before running each test"
+        self.fp32_dir = tempfile.TemporaryDirectory()
+        self.fq_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        "Called after running each test"
+        self.fp32_dir.cleanup()
+        self.fq_dir.cleanup()
+
+    def _setUpSingleTensorData(self):
+        with open(self.fp32_dir.name + '/tensors.txt', 'w') as f:
+            f.write('test')
+        with open(self.fq_dir.name + '/tensors.txt', 'w') as f:
+            f.write('test')
+        os.mkdir(self.fp32_dir.name + '/0')
+        os.mkdir(self.fq_dir.name + '/0')
+        test_data = np.zeros(16)
+        np.save(self.fp32_dir.name + '/0/test.npy', test_data)
+        np.save(self.fq_dir.name + '/0/test.npy', test_data)
+
+    def _setUpTwoTensorData(self):
+        with open(self.fp32_dir.name + '/tensors.txt', 'w') as f:
+            f.write('test')
+        with open(self.fq_dir.name + '/tensors.txt', 'w') as f:
+            f.write('test')
+        os.mkdir(self.fp32_dir.name + '/0')
+        os.mkdir(self.fp32_dir.name + '/1')
+        os.mkdir(self.fq_dir.name + '/0')
+        os.mkdir(self.fq_dir.name + '/1')
+        test_data_one = np.ones(16)
+        test_data_zero = np.zeros(16)
+        np.save(self.fp32_dir.name + '/0/test.npy', test_data_one)
+        np.save(self.fp32_dir.name + '/1/test.npy', test_data_zero)
+        np.save(self.fq_dir.name + '/0/test.npy', test_data_zero)
+        np.save(self.fq_dir.name + '/1/test.npy', test_data_zero)
+        # Golden: (1 + 0) / 2 = 0.5 for MSE
+
+    def _setUpDifferentTensorData(self):
+        # Two fp32 data (test, test2)
+        # One fq data (test)
+        # NOTE When does this happen?
+        # This case can happen because visq ignores nodes that do not affect qerrors.
+        # For example, RESHAPE Op does not affect qerrors, so its fq data is not dumped,
+        # although it is listed in 'tensors.txt'.
+        with open(self.fp32_dir.name + '/tensors.txt', 'w') as f:
+            f.writelines(['test\n', 'test2'])
+        with open(self.fq_dir.name + '/tensors.txt', 'w') as f:
+            f.writelines(['test\n', 'test2'])
+        os.mkdir(self.fp32_dir.name + '/0')
+        os.mkdir(self.fq_dir.name + '/0')
+        test_data = np.zeros(16)
+        np.save(self.fp32_dir.name + '/0/test.npy', test_data)
+        np.save(self.fp32_dir.name + '/0/test2.npy', test_data)
+        np.save(self.fq_dir.name + '/0/test.npy', test_data)
+
+    def test_MPEIR(self):
+        self._setUpSingleTensorData()
+
+        computer = MPEIRComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+
+    def test_MPEIR_different_tensors(self):
+        self._setUpDifferentTensorData()
+
+        computer = MPEIRComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+
+    def test_MSE(self):
+        self._setUpSingleTensorData()
+
+        computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(0.0, qmax)
+
+    def test_MSE_two(self):
+        self._setUpTwoTensorData()
+
+        computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.5, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(1.0, qmax)
+
+    def test_MSE_different_tensors(self):
+        self._setUpDifferentTensorData()
+
+        computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(0.0, qmax)
+
+    def test_TAE(self):
+        self._setUpSingleTensorData()
+
+        computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+
+    def test_TAE_different_options(self):
+        self._setUpDifferentTensorData()
+
+        computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmap['test'])
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(0.0, qmax)
+
+    def test_TAE_two(self):
+        self._setUpTwoTensorData()
+        computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+        qmap, qmin, qmax = computer.run()
+        self.assertAlmostEqual(0.0, qmin)
+        self.assertAlmostEqual(8.0, qmap['test'])
+        self.assertAlmostEqual(16.0, qmax)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/compiler/visq-unittest/test/testUtil.py b/compiler/visq-unittest/test/testUtil.py
new file mode 100644 (file)
index 0000000..51f6eb9
--- /dev/null
@@ -0,0 +1,55 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.Util module'''
+
+import unittest
+
+from visqlib.Util import to_filename
+from visqlib.Util import valid_attr
+from visqlib.Util import pretty_float
+
+
+class VisqUtilTest(unittest.TestCase):
+    def test_to_filename(self):
+        data = 'abc/d/e'
+        self.assertEqual('abc_d_e', to_filename(data))
+
+        long_data = 'x' * 300
+        self.assertEqual('x' * 255, to_filename(long_data))
+
+    def test_valid_attr(self):
+        class Test:
+            def __init__(self):
+                self.a = 'a'
+
+        test = Test()
+        self.assertTrue(valid_attr(test, 'a'))
+        self.assertFalse(valid_attr(test, 'b'))
+
+    def test_pretty_float(self):
+        test_configs = [0.123456, 12.3456, [0.123456], {'test': [0.123456]}]
+        three_digits_ans = [0.123, 12.346, [0.123], {'test': [0.123]}]
+        for test_data, ans in zip(test_configs, three_digits_ans):
+            res = pretty_float(test_data, ndigits=3)
+            self.assertEqual(res, ans)
+
+        test_configs = [0.123456, 12.3456, [0.123456], {'test': [0.123456]}]
+        four_digits_ans = [0.1235, 12.3456, [0.1235], {'test': [0.1235]}]
+        for test_data, ans in zip(test_configs, four_digits_ans):
+            res = pretty_float(test_data, ndigits=4)
+            self.assertEqual(res, ans)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/compiler/visq/CMakeLists.txt b/compiler/visq/CMakeLists.txt
new file mode 100644 (file)
index 0000000..0dd1b78
--- /dev/null
@@ -0,0 +1,67 @@
+unset(VISQ_DEPS)
+
+###
+### Set up visq executable
+###
+set(VISQ_FILE "visq")
+set(VISQ_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${VISQ_FILE}")
+set(VISQ_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_FILE}")
+
+add_custom_command(OUTPUT ${VISQ_BIN}
+  COMMAND ${CMAKE_COMMAND} -E copy "${VISQ_SRC}" "${VISQ_BIN}"
+  DEPENDS ${VISQ_SRC}
+  COMMENT "Generate ${VISQ_BIN}"
+)
+
+list(APPEND VISQ_DEPS ${VISQ_BIN})
+
+###
+### Set up visqlib directory
+###
+set(VISQ_PYTHON_DIR "visqlib")
+set(VISQ_PYTHON_DIR_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_PYTHON_DIR}")
+
+add_custom_command(OUTPUT ${VISQ_PYTHON_DIR_BIN}
+  COMMAND ${CMAKE_COMMAND} -E make_directory "${VISQ_PYTHON_DIR_BIN}"
+  COMMENT "Generate ${VISQ_PYTHON_DIR_BIN}"
+)
+
+list(APPEND VISQ_DEPS ${VISQ_PYTHON_DIR_BIN})
+
+###
+### Set up Python files
+###
+set(VISQ_PYTHON_FILES DumpFakeQuantFM.py
+                      DumpFP32FM.py
+                      Palette.py
+                      QErrorComputer.py
+                      DotBuilder.py
+                      Util.py)
+
+foreach(VISQ_PYTHON_FILE IN ITEMS ${VISQ_PYTHON_FILES})
+  set(VISQ_PYTHON_FILE_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${VISQ_PYTHON_DIR}/${VISQ_PYTHON_FILE}")
+  set(VISQ_PYTHON_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_PYTHON_DIR}/${VISQ_PYTHON_FILE}")
+
+  add_custom_command(OUTPUT ${VISQ_PYTHON_FILE_BIN}
+    COMMAND ${CMAKE_COMMAND} -E copy "${VISQ_PYTHON_FILE_SRC}" "${VISQ_PYTHON_FILE_BIN}"
+    DEPENDS ${VISQ_PYTHON_SRC}
+    COMMENT "Generate ${VISQ_PYTHON_FILE_BIN}"
+  )
+
+  list(APPEND VISQ_DEPS ${VISQ_PYTHON_FILE_BIN})
+
+endforeach(VISQ_PYTHON_FILE)
+
+add_custom_target(visq ALL DEPENDS ${VISQ_DEPS})
+
+install(FILES ${VISQ_FILE}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION bin)
+
+install(DIRECTORY ${VISQ_PYTHON_DIR}
+        FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+                          GROUP_READ
+                          WORLD_READ
+        DESTINATION bin)
diff --git a/compiler/visq/README.md b/compiler/visq/README.md
new file mode 100644 (file)
index 0000000..0d0a838
--- /dev/null
@@ -0,0 +1,32 @@
+# visq
+
+_visq_ is a module to generate a json file used to visualize layer-wise quantization errors
+(https://github.com/Samsung/ONE/issues/9694).
+
+## Example
+```bash
+$ ./visq --fp32_circle sample.circle \
+  --q_circle sample.q.circle \
+  --data test.h5 \
+  --mpeir_output sample.mpeir.visq.json \
+  --mse_output sample.mse.visq.json \
+  --tae_output sample.tae.visq.json \
+  --dump_dot_graph
+```
+
+The above command will generate
+- `sample.mpeir.visq.json`: Json file that contains layer-wise mpeir.
+- `sample.mse.visq.json`: Json file that conatins layer-wise mse.
+- `sample.mpeir.visq.json.dot`: Dot graph for layer-wise mpeir.
+- `sample.tae.visq.json.dot`: Dot graph for layer-wise tae.
+- `sample.mse.visq.json.dot`: Dot graph for layer-wise mse.
+
+## Quantization error metrics
+
+f: Result of fp32 model
+q: Result of quantized model
+
+- MPEIR: Mean Peak Error to Interval Ratio = Average(max(|f - q|) / (max(f) - min(f) + epsilon))
+epsilon: 1e-6
+- MSE: Mean Squared Error = Average(square(f - q))
+- TAE: Total Absolute Error = Sum(|f - q|)
diff --git a/compiler/visq/requires.cmake b/compiler/visq/requires.cmake
new file mode 100644 (file)
index 0000000..fdf32c6
--- /dev/null
@@ -0,0 +1,2 @@
+require("dalgona")
+require("circle-quantizer")
diff --git a/compiler/visq/visq b/compiler/visq/visq
new file mode 100644 (file)
index 0000000..02f63ab
--- /dev/null
@@ -0,0 +1,381 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import subprocess
+import tempfile
+import json
+import os
+import math
+import sys
+
+import h5py as h5
+import numpy as np
+
+from shutil import copyfile
+from pathlib import Path
+
+from visqlib.Palette import YLORRD9Palette
+from visqlib.QErrorComputer import MPEIRComputer, MSEComputer, TAEComputer
+from visqlib.Util import valid_attr, pretty_float
+from visqlib.DotBuilder import DotBuilder
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='Command line tool to visualize layer-wise quantization errors')
+    parser.add_argument(
+        "-f",
+        "--fp32_circle",
+        type=str,
+        help="Path to the fp32 circle model.",
+        required=True)
+    parser.add_argument(
+        "-q",
+        "--q_circle",
+        type=str,
+        help="Path to the quantized circle model.",
+        required=True)
+    parser.add_argument(
+        "-d",
+        "--data",
+        type=str,
+        help=
+        "Path to the data used for inference. Random data will be used if this option is not given.",
+        required=False)
+    parser.add_argument(
+        "--mpeir_output",
+        type=str,
+        help="Path to the output json file (qerror metric = MPEIR).",
+        required=False)
+    parser.add_argument(
+        "--mse_output",
+        type=str,
+        help="Path to the output json file (qerror metric = MSE).",
+        required=False)
+    parser.add_argument(
+        "--tae_output",
+        type=str,
+        help="Path to the output json file (qerror metric = TAE).",
+        required=False)
+    parser.add_argument(
+        "--dump_dot_graph", action="store_true", help="Dump dot graph.", required=False)
+    parser.add_argument(
+        "-b",
+        "--batch_size",
+        type=int,
+        help="Batch size to process large datasets.",
+        required=False)
+
+    return parser
+
+
+def _verify_args(args):
+    """Verify the given arguments"""
+
+    valid_outputs = ['mpeir_output', 'mse_output', 'tae_output']
+
+    # Check if at least one output option is given
+    num_outputs = 0
+    for output_name in valid_outputs:
+        if valid_attr(args, output_name):
+            num_outputs += 1
+
+    if num_outputs == 0:
+        raise RuntimeError("At least one output should be given.")
+
+
+def _run_dalgona(model, data, analysis, save_dir):
+    dir_path = Path(__file__).parent.resolve()
+    dalgona_path = os.path.join(dir_path, 'dalgona')
+    cmd = [dalgona_path]
+    cmd += ['--input_model', str(model)]
+    cmd += ['--analysis', str(analysis)]
+    if data != None:
+        cmd += ['--input_data', str(data)]
+    cmd += ['--analysis_args', str(save_dir)]
+
+    try:
+        subprocess.run(cmd, capture_output=True, check=True)
+    except subprocess.CalledProcessError as e:
+        print('Error raised while running the below command')
+        print(' '.join(cmd))
+        print(e.output)
+        raise
+
+
+# Generate h5 file that contains a dataset of a single batch
+# This is for batch execution of visq
+def gen_batch_h5(inputs_data, inputs_path):
+    # Create h5 file
+    output_path = inputs_path + "/inputs.h5"
+    h5_file = h5.File(output_path, 'w')
+    group = h5_file.create_group("value")
+    group.attrs['desc'] = "Input data"
+
+    for i in range(len(inputs_data)):
+        sample = group.create_group(str(i))
+        for j in range(len(inputs_data[i])):
+            sample.create_dataset(str(j), data=inputs_data[i][j])
+
+    h5_file.close()
+    return output_path
+
+
+# Aggregate intermediate results for a given data
+def advance_on_data(fp32_model, fq_model, data, computers):
+
+    curr_dir = Path(__file__).parent.resolve()
+    dump_fp32_py = curr_dir / 'visqlib' / 'DumpFP32FM.py'
+    dump_fq_py = curr_dir / 'visqlib' / 'DumpFakeQuantFM.py'
+
+    with tempfile.TemporaryDirectory() as fp32_dir, \
+         tempfile.TemporaryDirectory() as fq_dir:
+
+        _run_dalgona(fp32_model, data, dump_fp32_py, fp32_dir)
+        copyfile(fp32_dir + '/tensors.txt', fq_dir + '/tensors.txt')
+        _run_dalgona(fq_model, data, dump_fq_py, fq_dir)
+
+        for metric_key in computers:
+            computers[metric_key][0].advance_on(fp32_dir, fq_dir)
+
+
+def _run_batch(fp32_model, fq_model, data, computers, batch_size):
+    with tempfile.TemporaryDirectory() as inputs_dir:
+        with h5.File(data, 'r') as f:
+            dataset = f['value']
+
+            inputs = []
+            for data_index in dataset:
+                cur_inputs = []
+                for input_index in dataset[data_index]:
+                    d = dataset[data_index][input_index][:]
+                    cur_inputs.append(np.array(d, np.float32))
+
+                inputs.append(cur_inputs)
+                if len(inputs) >= batch_size:
+                    input_path = gen_batch_h5(inputs, inputs_dir)
+                    advance_on_data(fp32_model, fq_model, input_path, computers)
+                    inputs = []
+
+            if len(inputs) > 0:
+                input_path = gen_batch_h5(inputs, inputs_dir)
+                advance_on_data(fp32_model, fq_model, input_path, computers)
+
+
+def _fake_quantize(input_model, output_model):
+    dir_path = Path(__file__).parent.resolve()
+    circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+    cmd = [circle_quantizer_path]
+    cmd += ['--fake_quantize']
+    cmd += [str(input_model)]
+    cmd += [str(output_model)]
+
+    try:
+        subprocess.run(cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        print('Error raised while running the below command')
+        print(' '.join(cmd))
+        print(e.output)
+        raise
+
+
+# Recursively visit items and check if there is Infinity or NaN
+def _check_float(item):
+    if isinstance(item, dict):
+        for v in item.values():
+            _check_float(v)
+    if isinstance(item, list):
+        for v in item:
+            _check_float(v)
+    if isinstance(item, float):
+        if item == -float('inf') or item == float('inf'):
+            raise RuntimeError('Infinite value detected. Value must be float')
+        if math.isnan(item):
+            raise RuntimeError('NaN value detected. Value must be float')
+
+
+def _build_json(model, metric, colorscheme, error):
+    # model: string
+    # metric: string
+    # colorscheme: list ['b': begin, 'e': end, 'c':color]
+    # error: dict {tensor_name:error}
+
+    meta = {}
+    meta["model"] = model
+    meta["metric"] = metric
+    meta["colorscheme"] = pretty_float(colorscheme)
+    result = {}
+    result["meta"] = meta
+    # Why list? To support multiple subgraphs
+    result["error"] = [pretty_float(error)]
+
+    # Invariants
+    _check_float(meta["colorscheme"])
+    _check_float(result["error"])
+    return result
+
+
+def _save_dot(circle_path: str, dot_path: str, metric: str, colors: list, qerror: dict):
+    # circle_path: Path to the circle model (required to build graph)
+    # dot_path: Path to the output dot file
+    # metric: Metric name (ex: MPEIR, MSE)
+    # colors: list [{'b': begin, 'e': end, 'c':color}, ..]
+    # qerror: dict {tensor_name (str) -> qerror (float)}
+    builder = DotBuilder(
+        circle_path=circle_path, dot_path=dot_path, metric=metric, colors=colors)
+
+    builder.save(qerror)
+
+
+def run_on_data_batchwise(fp32_model, q_model, data, dump_dot_graph, computers,
+                          batch_size):
+
+    with tempfile.TemporaryDirectory() as model_dir:
+        fq_model = model_dir + '/fq_model.circle'
+
+        # Step 1. Fake quantize quantized circle model
+        _fake_quantize(q_model, fq_model)
+
+        # process the whole dataset batch by batch
+        _run_batch(fp32_model, fq_model, data, computers, batch_size)
+
+        #compute the final results
+        for metric_key in computers:
+            cur_computer = computers[metric_key][0]
+            output = computers[metric_key][1]
+            if metric_key == 'MPEIR':
+                qerror_map = cur_computer.get_final_result()
+                q_min = 0.0
+                q_max = 1.0
+            elif metric_key == 'MSE' or metric_key == 'TAE':
+                qerror_map, q_min, q_max = cur_computer.get_final_result()
+
+            palette = YLORRD9Palette(qerror_min=q_min, qerror_max=q_max)
+            result = _build_json(
+                metric=metric_key,
+                model=Path(fp32_model).name,
+                colorscheme=palette.colorscheme(),
+                error=qerror_map)
+            with open(output, "w") as f:
+                json.dump(result, f)
+
+            if dump_dot_graph:
+                _save_dot(
+                    circle_path=fp32_model,
+                    dot_path=output + '.dot',
+                    metric=metric_key,
+                    colors=palette.colorscheme(),
+                    qerror=qerror_map)
+
+
+def run_on_data(fp32_model, q_model, data, dump_dot_graph, computers):
+    curr_dir = Path(__file__).parent.resolve()
+    dump_fp32_py = curr_dir / 'visqlib' / 'DumpFP32FM.py'
+    dump_fq_py = curr_dir / 'visqlib' / 'DumpFakeQuantFM.py'
+
+    with tempfile.TemporaryDirectory() as model_dir, \
+         tempfile.TemporaryDirectory() as fp32_dir, \
+         tempfile.TemporaryDirectory() as fq_dir:
+        fq_model = model_dir + '/fq_model.circle'
+
+        # Step 1. Fake quantize quantized circle model
+        _fake_quantize(q_model, fq_model)
+
+        # Step 2. Run dalgona to dump intermediate FMs in FP32 model
+        _run_dalgona(fp32_model, data, dump_fp32_py, fp32_dir)
+
+        # Copy list of dumped tensors
+        copyfile(fp32_dir + '/tensors.txt', fq_dir + '/tensors.txt')
+
+        # Step 3. Run dalgona to dump intermediate FMs in fq model
+        _run_dalgona(fq_model, data, dump_fq_py, fq_dir)
+
+        # Step 4. Read results and compute qerror
+        for metric_key in computers:
+            cur_computer = computers[metric_key][0]
+            output = computers[metric_key][1]
+            cur_computer.advance_on(fp32_dir, fq_dir)
+            if metric_key == 'MPEIR':
+                qerror_map = cur_computer.get_final_result()
+                q_min = 0.0
+                q_max = 1.0
+            elif metric_key == 'MSE' or metric_key == 'TAE':
+                qerror_map, q_min, q_max = cur_computer.get_final_result()
+
+            palette = YLORRD9Palette(qerror_min=q_min, qerror_max=q_max)
+            result = _build_json(
+                metric=metric_key,
+                model=Path(fp32_model).name,
+                colorscheme=palette.colorscheme(),
+                error=qerror_map)
+            with open(output, "w") as f:
+                json.dump(result, f)
+
+            if dump_dot_graph:
+                _save_dot(
+                    circle_path=fp32_model,
+                    dot_path=output + '.dot',
+                    metric=metric_key,
+                    colors=palette.colorscheme(),
+                    qerror=qerror_map)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = parser.parse_args()
+    _verify_args(args)
+
+    fp32_model = args.fp32_circle
+    q_model = args.q_circle
+    data = None
+    if valid_attr(args, 'data'):
+        data = args.data
+    dump_dot_graph = args.dump_dot_graph
+    batch_size = None
+    if valid_attr(args, 'batch_size'):
+        batch_size = args.batch_size
+
+    computers = {}
+    if args.mpeir_output:
+        computers['MPEIR'] = (MPEIRComputer(None, None), args.mpeir_output)
+
+    if args.mse_output:
+        computers['MSE'] = (MSEComputer(None, None), args.mse_output)
+
+    if args.tae_output:
+        computers['TAE'] = (TAEComputer(None, None), args.tae_output)
+
+    if batch_size == None:
+        run_on_data(fp32_model, q_model, data, dump_dot_graph, computers)
+    else:
+        run_on_data_batchwise(fp32_model, q_model, data, dump_dot_graph, computers,
+                              batch_size)
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except Exception as e:
+        prog_name = os.path.basename(__file__)
+        print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+        sys.exit(255)
diff --git a/compiler/visq/visqlib/DotBuilder.py b/compiler/visq/visqlib/DotBuilder.py
new file mode 100644 (file)
index 0000000..a6afb96
--- /dev/null
@@ -0,0 +1,165 @@
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pydot
+import math
+
+from circle import Model
+
+from pathlib import Path
+
+
+# Return the name of the tensor
+def _tensor_name(graph, tid):
+    return graph.Tensors(tid).Name().decode('utf-8')
+
+
+# Return double-quoted string
+def _quote(string: str):
+    return '"' + string + '"'
+
+
+# Class to build dot graph from qerror_map
+class DotBuilder:
+    def __init__(self, circle_path: str, dot_path: str, metric: str, colors: str):
+        '''
+        circle_path: Path to the fp32 circle model (required to build graph)
+        dot_path: Path to the saved dot file
+        metric: Metric name (ex: MPEIR, MSE)
+        colors: List of color slots [{'b': begin, 'e': end, 'c':color}, ..]
+        '''
+        with open(circle_path, 'rb') as f:
+            self._model = Model.Model.GetRootAsModel(f.read())
+
+        if self._model.SubgraphsLength() != 1:
+            raise RuntimeError("Only one subgraph is supported")
+
+        self._name = Path(circle_path).name
+        self._dot_path = dot_path
+        self._metric = metric
+        self._colors = colors
+
+    # Return color (RGB) for the given qerror
+    def _get_color(self, qerror: float):
+        # Find a slot where qerror is in the range of [begin, end]
+        for slot in self._colors:
+            begin = slot['b']
+            end = slot['e']
+            if (qerror > begin or math.isclose(
+                    qerror, begin)) and (qerror < end or math.isclose(qerror, end)):
+                return slot['c']
+
+        # Use the first color if qerror is smaller than the first begin
+        if qerror < self._colors[0]['b']:
+            return self._colors[0]['c']
+
+        # Use the last color if qerror is larger than the last end
+        if qerror > self._colors[-1]['e']:
+            return self._colors[-1]['c']
+
+        raise RuntimeError("Color ID not found. QError: " + str(qerror))
+
+    # Generate a pydot.Node object which represents the color table
+    def _gen_color_table(self):
+        color_table = "< <table>"
+        for slot in self._colors:
+            begin = slot['b']
+            end = slot['e']
+            color = slot['c']
+            color_table += "<tr> <td bgcolor=\""
+            color_table += color
+            color_table += "\">"
+            color_table += self._metric + ": {:.4f}".format(
+                begin) + " ~ " + "{:.4f}".format(end)
+            color_table += "</td> </tr>"
+        color_table += "</table> >"
+        return pydot.Node("color_table", shape='none', label=color_table)
+
+    # Save dot graph to self._dot_path
+    def save(self, qerror_map: dict):
+        '''
+        qerror_map: Dictionary of {op_name (str) -> qerror (float)}
+        '''
+        # Build graph
+        DOT = pydot.Dot(self._name, graph_type="digraph")
+
+        # Add color table
+        DOT.add_node(self._gen_color_table())
+
+        # Dictionary from output tensor name to Op name {str -> str}
+        # This dict is for handling Ops with multiple output tensors.
+        # We use the first output tensor's name as the Op name, following
+        # the implementation of luci IR
+        output_to_op = dict()
+
+        graph = self._model.Subgraphs(0)
+
+        # Add Input nodes
+        for i in range(graph.InputsLength()):
+            name = _tensor_name(graph, graph.Inputs(i))
+            output_to_op[name] = name
+            DOT.add_node(pydot.Node(_quote(name)))
+
+        # Add Output nodes
+        for i in range(graph.OutputsLength()):
+            name = _tensor_name(graph, graph.Outputs(i))
+            output_to_op[name] = name
+            DOT.add_node(pydot.Node(_quote(name)))
+
+        # Add Edges
+        for i in range(graph.OperatorsLength()):
+            op = graph.Operators(i)
+            # Name of the first output tensor
+            op_name = _tensor_name(graph, op.Outputs(0))
+            if op.OutputsLength() == 0:
+                print(op_name)
+                continue
+
+            if op_name in qerror_map:
+                qerror = qerror_map[op_name]
+                node = pydot.Node(
+                    _quote(op_name),
+                    style="filled",
+                    fillcolor=self._get_color(qerror),
+                    xlabel=self._metric + ": {:.4f}".format(qerror))
+            else:
+                # qerror_map does not have qerror info for the op. Color gray.
+                # When this happen? visq does not collect qerror info of some Ops
+                # For example, Reshape Op does not change values, so its qerror
+                # info is not collected.
+                node = pydot.Node(_quote(op_name), style="filled", fillcolor='gray')
+
+            DOT.add_node(node)
+
+            for output_idx in range(op.OutputsLength()):
+                output_name = _tensor_name(graph, op.Outputs(output_idx))
+                # Set Op name as the first output tensor name (op_name)
+                output_to_op[output_name] = op_name
+
+            for j in range(op.InputsLength()):
+                op_input = op.Inputs(j)
+
+                # Optional input case (ex: For TConv with no bias, bias is -1)
+                if op_input == -1:
+                    continue
+
+                op_input_name = _tensor_name(graph, op_input)
+                if op_input_name not in output_to_op:
+                    continue
+
+                # Use the saved name to handle multiple outputs
+                op_input_name = output_to_op[op_input_name]
+                DOT.add_edge(pydot.Edge(_quote(op_input_name), _quote(op_name)))
+
+        DOT.write(self._dot_path)
diff --git a/compiler/visq/visqlib/DumpFP32FM.py b/compiler/visq/visqlib/DumpFP32FM.py
new file mode 100644 (file)
index 0000000..d5b7545
--- /dev/null
@@ -0,0 +1,54 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script that dumps FM of FP32 model
+# NOTE This script runs on dalgona
+
+import numpy as np
+
+from pathlib import Path
+from Util import to_filename
+
+
+# Dump FP32 model's intermediate FM data and their names
+#
+# Before
+# self._dir/
+#
+# After
+# self._dir/
+#  tensors.txt
+#  <TENSOR_NAME>.npy
+# NOTE TENSOR_NAME is transformed by to_filename
+class DumpFP32FM:
+    def StartAnalysis(self, args):
+        self._dir = Path(args)
+        self._num_data = 0
+        self._tensor_names = set()
+
+    def EndNetworkExecution(self, outputs):
+        self._num_data += 1
+
+    def DefaultOpPost(self, name, opcode, inputs, output):
+        # Save intermediate FM into tensor_name.npy
+        data_path = self._dir / str(self._num_data)
+        data_path.mkdir(parents=False, exist_ok=True)
+        np.save(str(data_path / to_filename(name)), output['data'])
+        self._tensor_names.add(name)
+
+    def EndAnalysis(self):
+        # Save tensor names line by line
+        with open(self._dir / 'tensors.txt', 'w') as f:
+            for name in self._tensor_names:
+                f.write("%s\n" % name)
diff --git a/compiler/visq/visqlib/DumpFakeQuantFM.py b/compiler/visq/visqlib/DumpFakeQuantFM.py
new file mode 100644 (file)
index 0000000..0248428
--- /dev/null
@@ -0,0 +1,66 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script that dumps dequantized FM
+# NOTE This script runs on dalgona
+
+import numpy as np
+
+from pathlib import Path
+from Util import to_filename
+
+# Fake-quantized Op has the postfix of fq_postfix
+# TODO Remove coupling with fake quantization codes
+fq_postfix = '_FQ_Quantize_FQ_Dequantize'
+
+
+# Return the original name before fake quantization
+# Return None if name is not from fake quantization (Dequantize Op in original model)
+# TODO Handle the case when the original node's name contains fq_postfix
+def _name_before_fq(name):
+    if not name.endswith(fq_postfix):
+        return None
+
+    return name[0:name.find(fq_postfix)]
+
+
+# Dump fake-quantized model's intermediate FM data according to tensors.txt
+#
+# Before
+# self._dir/
+#  tensors.txt
+#
+# After
+# self._dir/
+#  tensors.txt
+#  <TENSOR_NAME>.npy
+# NOTE TENSOR_NAME is transformed by to_filename
+class DumpFakeQuantFM:
+    def StartAnalysis(self, args):
+        self._dir = Path(args)
+        self._num_data = 0
+        with open(self._dir / 'tensors.txt') as f:
+            self._target_tensors = set([line.rstrip() for line in f])
+
+    def EndNetworkExecution(self, outputs: list):
+        self._num_data += 1
+
+    # TODO Use DequantizePost when dalgona supports it
+    def DefaultOpPost(self, name, opcode, inputs, output):
+        if opcode == 'Dequantize':
+            orig_name = _name_before_fq(name)
+            if orig_name in self._target_tensors:
+                data_path = self._dir / str(self._num_data)
+                data_path.mkdir(parents=False, exist_ok=True)
+                np.save(str(data_path / to_filename(orig_name)), output['data'])
diff --git a/compiler/visq/visqlib/Palette.py b/compiler/visq/visqlib/Palette.py
new file mode 100644 (file)
index 0000000..9df7223
--- /dev/null
@@ -0,0 +1,70 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Class to save colorscheme
+class Palette:
+    # Child class must implement __init__ to fill the below members
+    def __init__(self):
+        # Element of self._slots has [lower bound, upper bound] of qerrors to decide a color
+        self._slots = []
+        # Element of self._colors has rgb values in string format
+        self._colors = []
+        raise NotImplementedError('Child class must implement __init__')
+
+    # Return color scheme as a list of objects
+    # Each object has the following attributes
+    # b: begin qerror
+    # e: end qerror
+    # c: color (in RGB string)
+    def colorscheme(self):
+        cs = []
+        for slot, color in zip(self._slots, self._colors):
+            cs.append({"b": slot[0], "e": slot[1], "c": color})
+        return cs
+
+
+# Ranges of slots are defined by qerror_min/qerror_max
+# Each slot has a uniform range
+# For example, if qerror_min = 0.0, qerror_max = 1.0, number of colors = 10
+# Ranges of slots will be as follows.
+# [0.0, 0.1], [0.1, 0.2], [0.2, 0.3] ... [0.8, 0.9], [0.9, 1.0]
+class UniformPalette(Palette):
+    def __init__(self, qerror_min, qerror_max, colors):
+        self._colors = colors
+        self._slots = []
+        qerror_range = qerror_max - qerror_min
+        num_colors = len(self._colors)
+        for i in range(num_colors):
+            lower_bound = qerror_min + i * (qerror_range / num_colors)
+            upper_bound = qerror_min + (i + 1) * (qerror_range / num_colors)
+
+            self._slots.append([lower_bound, upper_bound])
+
+        # Invariant
+        assert len(self._slots) == num_colors
+
+
+# Palette for ylorrd9 colorscheme
+class YLORRD9Palette(UniformPalette):
+    def __init__(self, qerror_min, qerror_max):
+        if qerror_min >= qerror_max:
+            raise RuntimeError('min must be less than max')
+
+        # From https://colorbrewer2.org/#type=sequential&scheme=YlOrRd&n=9
+        colors = [
+            "#ffffcc", "#ffeda0", "#fed976", "#feb24c", "#fd8d3c", "#fc4e2a", "#e31a1c",
+            "#bd0026", "#800026"
+        ]
+        super().__init__(qerror_min, qerror_max, colors)
diff --git a/compiler/visq/visqlib/QErrorComputer.py b/compiler/visq/visqlib/QErrorComputer.py
new file mode 100644 (file)
index 0000000..c60556f
--- /dev/null
@@ -0,0 +1,200 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+import numpy as np
+
+from pathlib import Path
+from visqlib.Util import to_filename
+
+
+class QErrorComputer:
+    def __init__(self, fp32_dir, fq_dir):
+        self._fp32_dir = fp32_dir
+        self._fq_dir = fq_dir
+        self.qerror_map = dict()
+        self._num_processed_data = 0
+
+    def collect_data_path(self, fp32_dir, fq_dir):
+        # Assumption: FM data are saved as follows
+        #
+        # fp32_dir/
+        #   tensors.txt
+        #   <DATA_INDEX>/
+        #     <TENSOR_NAME>.npy
+        #
+        # fq_dir/
+        #   tensors.txt
+        #   <DATA_INDEX>/
+        #     <TENSOR_NAME>.npy
+        self._num_data = len(list(filter(os.path.isdir, glob.glob(fp32_dir + '/*'))))
+        if self._num_data != len(list(filter(os.path.isdir, glob.glob(fq_dir + '/*')))):
+            raise RuntimeError("Number of data mistmatches")
+
+        self._num_processed_data += self._num_data
+
+        self._filename_to_tensor = dict()
+        with open(Path(fp32_dir) / 'tensors.txt') as f:
+            tensors = set([line.rstrip() for line in f])
+            for tensor in tensors:
+                # Check if filename is unique
+                # Fix name finding logic unless
+                assert to_filename(tensor) not in self._filename_to_tensor
+                self._filename_to_tensor[to_filename(tensor)] = tensor
+
+        # Save paths to fp32 data and fq data for each tensor
+        # dict
+        # {
+        #   <tensor_name>: (fp32_path, fq_path),
+        #   <tensor_name>: (fp32_path, fq_path),
+        #   ...
+        # }
+        data_paths = dict()
+        for data_idx in range(self._num_data):
+            fp32_results = glob.glob(fp32_dir + '/' + str(data_idx) + '/*.npy')
+            for fp32_data_path in fp32_results:
+                fp32_path = Path(fp32_data_path)
+                fq_data_path = fq_dir + '/' + str(data_idx) + '/' + fp32_path.with_suffix(
+                    '.npy').name
+                fq_path = Path(fq_data_path)
+                filename = fp32_path.stem
+                tensor_name = self._filename_to_tensor[filename]
+
+                # Only save the tensors which have both fp32 data and fq data
+                if fq_path.is_file() and fp32_path.is_file():
+                    if tensor_name in data_paths:
+                        data_paths[tensor_name].append((fp32_data_path, fq_data_path))
+                    else:
+                        data_paths[tensor_name] = [(fp32_data_path, fq_data_path)]
+
+        return data_paths
+
+    def run(self):
+        '''Return qerror map (dict: tensor_name(string) -> qerror(float)).'''
+        raise NotImplementedError  # Child must implement this
+
+
+class MPEIRComputer(QErrorComputer):
+    def __init__(self, fp32_dir, fq_dir):
+        super().__init__(fp32_dir, fq_dir)
+
+    # Incrementally compute Qerror while traversing all data in fp32_dir and fq_dir
+    def advance_on(self, fp32_dir, fq_dir):
+        data_paths = self.collect_data_path(fp32_dir, fq_dir)
+        for tensor_name, data_path in data_paths.items():
+            for (fp32_data_path, fq_data_path) in data_path:
+                fp32_data = np.load(fp32_data_path)
+                fq_data = np.load(fq_data_path)
+
+                diff = np.absolute(fp32_data - fq_data).reshape(-1)
+
+                fp32_min = np.min(fp32_data.reshape(-1))
+                fp32_max = np.max(fp32_data.reshape(-1))
+
+                # Peak Error-to-Interval Ratio (PEIR)
+                # NOTE: PEIR is an analogue of PSNR (Peak Signal to Noise Ratio)
+                PEAK_ERROR = np.max(diff)
+                INTERVAL = fp32_max - fp32_min
+
+                # If INTERVAL is 0, PEIR becomes NaN.
+                # To prevent this, relaxed PEIR with epsilon(10^(-6)) is used.
+                rPEIR = PEAK_ERROR / (INTERVAL + 0.000001)
+
+                if tensor_name in self.qerror_map:
+                    self.qerror_map[tensor_name] += rPEIR
+                else:
+                    self.qerror_map[tensor_name] = rPEIR
+
+    def get_final_result(self):
+        qerror_map = dict()
+        for tensor_name, acc in self.qerror_map.items():
+            qerror_map[tensor_name] = acc / self._num_processed_data
+
+        return qerror_map
+
+    def run(self):
+        self.advance_on(self._fp32_dir, self._fq_dir)
+        return self.get_final_result()
+
+
+class MSEComputer(QErrorComputer):
+    def __init__(self, fp32_dir, fq_dir):
+        super().__init__(fp32_dir, fq_dir)
+        self.qerror_min = float('inf')
+        self.qerror_max = -self.qerror_min
+
+    # Incrementally compute Qerror while traversing all data in fp32_dir and fq_dir
+    def advance_on(self, fp32_dir, fq_dir):
+        data_paths = self.collect_data_path(fp32_dir, fq_dir)
+        for tensor_name, data_path in data_paths.items():
+            for (fp32_data_path, fq_data_path) in data_path:
+                fp32_data = np.load(fp32_data_path)
+                fq_data = np.load(fq_data_path)
+
+                MSE = np.square(fp32_data - fq_data).mean()
+
+                if tensor_name in self.qerror_map:
+                    self.qerror_map[tensor_name] += MSE
+                else:
+                    self.qerror_map[tensor_name] = MSE
+
+                self.qerror_min = min(MSE, self.qerror_min)
+                self.qerror_max = max(MSE, self.qerror_max)
+
+    def get_final_result(self):
+        qerror_map = dict()
+        for tensor_name, acc in self.qerror_map.items():
+            qerror_map[tensor_name] = acc / self._num_processed_data
+
+        return qerror_map, self.qerror_min, self.qerror_max
+
+    def run(self):
+        self.advance_on(self._fp32_dir, self._fq_dir)
+        return self.get_final_result()
+
+
+class TAEComputer(QErrorComputer):  #total absolute error
+    def __init__(self, fp32_dir, fq_dir):
+        super().__init__(fp32_dir, fq_dir)
+        self.total_error = 0
+        self.qerror_min = float('inf')
+        self.qerror_max = -self.qerror_min
+
+    def advance_on(self, fp32_dir, fq_dir):
+        data_paths = self.collect_data_path(fp32_dir, fq_dir)
+        for tensor_name, data_path in data_paths.items():
+            for (fp32_data_path, fq_data_path) in data_path:
+                fp32_data = np.load(fp32_data_path)
+                fq_data = np.load(fq_data_path)
+
+                total_error = np.sum(np.abs(fp32_data - fq_data))
+
+                if tensor_name in self.qerror_map:
+                    self.qerror_map[tensor_name] += total_error
+                else:
+                    self.qerror_map[tensor_name] = total_error
+
+                self.qerror_min = min(total_error, self.qerror_min)
+                self.qerror_max = max(total_error, self.qerror_max)
+
+    def get_final_result(self):
+        qerror_map = dict()
+        for tensor_name, acc in self.qerror_map.items():
+            qerror_map[tensor_name] = acc / self._num_processed_data
+        return qerror_map, self.qerror_min, self.qerror_max
+
+    def run(self):
+        self.advance_on(self._fp32_dir, self._fq_dir)
+        return self.get_final_result()
diff --git a/compiler/visq/visqlib/Util.py b/compiler/visq/visqlib/Util.py
new file mode 100644 (file)
index 0000000..f5b6c9a
--- /dev/null
@@ -0,0 +1,37 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Change tensor name into the one compatible with Linux file system
+# '/' is replaced with '_'
+# Too long name is sliced to 255 characters
+def to_filename(tensor_name):
+    assert isinstance(tensor_name, str)
+    return tensor_name.replace('/', '_')[-255:]
+
+
+# Check if attr is valid
+def valid_attr(args, attr):
+    return hasattr(args, attr) and getattr(args, attr)
+
+
+# Recursively visit items and round floats with ndigits
+def pretty_float(item, ndigits=4):
+    if isinstance(item, dict):
+        return {k: pretty_float(v, ndigits) for k, v in item.items()}
+    if isinstance(item, list):
+        return [pretty_float(x, ndigits) for x in item]
+    if isinstance(item, float):
+        return round(item, ndigits)
+    return item
index 9b3cd4f..ce328b6 100644 (file)
@@ -33,4 +33,4 @@ target_link_libraries(${TEST_CKER} nnfw_coverage)
 target_link_libraries(${TEST_CKER} gtest gtest_main ${LIB_PTHREAD})
 
 add_test(${TEST_CKER} ${TEST_CKER})
-install(TARGETS ${TEST_CKER} DESTINATION unittest_standalone)
+install(TARGETS ${TEST_CKER} DESTINATION unittest)
index c487581..c8c2fd9 100644 (file)
@@ -112,6 +112,39 @@ inline void Dequantize(const Shape &input_shape, const int8_t *input_data,
   }
 }
 
+inline void Dequantize(const Shape &input_shape, const int16_t *input_data,
+                       const Shape &output_shape, float *output_data, const float scale,
+                       const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t scale_dup = vdupq_n_f32(static_cast<float>(scale));
+  const float32x4_t zero_times_scale_dup = vdupq_n_f32(static_cast<float>(-zero_point * scale));
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const int16x4_t input_s16_low = vld1_s16(input_data + i);
+    const int16x4_t input_s16_high = vld1_s16(input_data + i + 4);
+    const int32x4_t val_low = vmovl_s16(input_s16_low);
+    const int32x4_t val_high = vmovl_s16(input_s16_high);
+
+    float32x4_t result_low, result_high;
+    ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
+    ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
+
+    vst1q_f32(output_data + i, result_low);
+    vst1q_f32(output_data + i + 4, result_high);
+  }
+#endif // NEON
+  for (; i < flat_size; ++i)
+  {
+    const int32_t val = input_data[i];
+    const float result = static_cast<float>(scale * (val - zero_point));
+    output_data[i] = result;
+  }
+}
+
 } // namespace cker
 } // namespace nnfw
 
index 3d3e59e..e990772 100644 (file)
@@ -29,12 +29,39 @@ namespace nnfw
 namespace cker
 {
 
+/**
+ * @brief Internal scalar_logistic_op operation struct
+ *
+ * @note  Recent Eigen3 scalar_logistic_op return invalid value on ARM32 if
+ *        input value is float type 88 (expected: 1, actual: 0)
+ *        As a workaround, we use old version scalar_logistic_op internal struct
+ *        TODO Remove this workaround
+ */
+template <typename T> struct scalar_logistic_op
+{
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T &x) const
+  {
+    const T one = T(1);
+    return one / (one + Eigen::numext::exp(-x));
+  }
+
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet &x) const
+  {
+    const Packet one = Eigen::internal::pset1<Packet>(T(1));
+    return pdiv(one, padd(one, pexp(pnegate(x))));
+  }
+};
+
 inline void Logistic(const Shape &input_shape, const float *input_data, const Shape &output_shape,
                      float *output_data)
 {
   auto input_map = MapAsVector(input_data, input_shape);
   auto output_map = MapAsVector(output_data, output_shape);
-  output_map.array() = input_map.array().unaryExpr(Eigen::internal::scalar_logistic_op<float>());
+
+  // Use old version scalar_logistic_op
+  output_map.array() = input_map.array().unaryExpr(nnfw::cker::scalar_logistic_op<float>());
 }
 
 } // namespace cker
index 8e5fc22..7292a19 100644 (file)
@@ -18,6 +18,7 @@
 #ifndef __NNFW_CKER_QUANTIZE_H__
 #define __NNFW_CKER_QUANTIZE_H__
 
+#include "cker/operation/Round.h"
 #include "cker/Shape.h"
 #include "cker/Types.h"
 #include "cker/Utils.h"
@@ -45,6 +46,164 @@ inline void Quantize(const Shape &input_shape, const InputT *input_data, const S
   }
 }
 
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                     int8_t *output_data, const float scale, const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  static constexpr int32_t min_val = std::numeric_limits<int8_t>::min();
+  static constexpr int32_t max_val = std::numeric_limits<int8_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+  const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+  const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+  const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const float *src_data_ptr = input_data + i;
+    float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+    float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+    input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+    input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+    int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+    int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+    casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+    casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+    // Clamp the values to fit the target type's range.
+    casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+    casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+    casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+    casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+    const int16x4_t narrowed_val_0 = vmovn_s32(casted_val_0);
+    const int16x4_t narrowed_val_1 = vmovn_s32(casted_val_1);
+    const int16x8_t combined_val = vcombine_s16(narrowed_val_0, narrowed_val_1);
+    const int8x8_t combined_val_narrowed = vmovn_s16(combined_val);
+    vst1_s8(output_data + i, combined_val_narrowed);
+  }
+#endif // NEON
+
+  for (; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+    const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+    output_data[i] = clamped;
+  }
+}
+
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                     uint8_t *output_data, const float scale, const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  static constexpr int32_t min_val = std::numeric_limits<uint8_t>::min();
+  static constexpr int32_t max_val = std::numeric_limits<uint8_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+  const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+  const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+  const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const float *src_data_ptr = input_data + i;
+    float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+    float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+    input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+    input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+    int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+    int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+    casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+    casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+    // Clamp the values to fit the target type's range.
+    casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+    casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+    casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+    casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+    const uint16x4_t narrowed_val_0 = vqmovun_s32(casted_val_0);
+    const uint16x4_t narrowed_val_1 = vqmovun_s32(casted_val_1);
+    const uint16x8_t combined_val = vcombine_u16(narrowed_val_0, narrowed_val_1);
+    const uint8x8_t combined_val_narrowed = vmovn_u16(combined_val);
+    vst1_u8(output_data + i, combined_val_narrowed);
+  }
+#endif // NEON
+
+  for (; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+    const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+    output_data[i] = clamped;
+  }
+}
+
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                     int16_t *output_data, const float scale, const int32_t zero_point)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  static constexpr int32_t min_val = std::numeric_limits<int16_t>::min();
+  static constexpr int32_t max_val = std::numeric_limits<int16_t>::max();
+
+  int i = 0;
+#ifdef USE_NEON
+  const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+  const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+  const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+  const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+  for (; i <= flat_size - 8; i += 8)
+  {
+    const float *src_data_ptr = input_data + i;
+    float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+    float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+    input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+    input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+    int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+    int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+    casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+    casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+    // Clamp the values to fit the target type's range.
+    casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+    casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+    casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+    casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+    const int16x4_t narrowed_val_0 = vmovn_s32(casted_val_0);
+    const int16x4_t narrowed_val_1 = vmovn_s32(casted_val_1);
+    vst1_s16(output_data + i, narrowed_val_0);
+    vst1_s16(output_data + i + 4, narrowed_val_1);
+  }
+#endif // NEON
+
+  for (; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+    const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+    output_data[i] = clamped;
+  }
+}
+
 inline void Quantize(const int32_t *multiplier, const int32_t *shift, int32_t channel_size,
                      int32_t total_size, int32_t output_zp, int32_t output_min, int32_t output_max,
                      int32_t *scratch, int8_t *output)
index f54f2e6..02a9eac 100644 (file)
@@ -312,12 +312,12 @@ public:
     }
 
     // Calculate mean by dividing output_data by num of aggregated element.
-    U num_elements_in_axis = 1;
+    size_t num_elements_in_axis = 1;
     for (int idx = 0; idx < num_resolved_axis; ++idx)
     {
       size_t current = static_cast<size_t>(input_shape.Dims(resolved_axis_data()[idx]));
       // Overflow prevention.
-      if (current > static_cast<size_t>(std::numeric_limits<U>::max() / num_elements_in_axis))
+      if (current > static_cast<size_t>(std::numeric_limits<size_t>::max() / num_elements_in_axis))
       {
         return false;
       }
@@ -330,7 +330,7 @@ public:
       if (compute_sum)
       {
         // TODO(b/116341117): Eliminate float and do this completely in 8bit.
-        const float bias = -input_zero_point * scale * num_elements_in_axis + 0.5f;
+        const float bias = -input_zero_point * scale * num_elements_in_axis;
         for (size_t idx = 0; idx < num_outputs; ++idx)
         {
           const U value =
@@ -340,7 +340,7 @@ public:
       }
       else
       {
-        const float bias = -input_zero_point * scale + 0.5f;
+        const float bias = -input_zero_point * scale;
         for (size_t idx = 0; idx < num_outputs; ++idx)
         {
           float float_mean =
index a04a741..d677145 100644 (file)
@@ -19,6 +19,7 @@
 #define __NNFW_CKER_ROUND_H__
 
 #include "cker/Shape.h"
+#include "cker/Utils.h"
 
 #include <cmath>
 
@@ -41,6 +42,26 @@ inline float RoundToNearest(float value)
   }
 }
 
+#ifdef USE_NEON
+
+inline int32x4_t RoundToNearest(const float32x4_t input)
+{
+#if defined(__aarch64__) || defined(__SSSE3__)
+  // Note: vcvtnq_s32_f32 is not available in ARMv7
+  return vcvtnq_s32_f32(input);
+#else
+  static const float32x4_t zero_val_dup = vdupq_n_f32(0.0f);
+  static const float32x4_t point5_val_dup = vdupq_n_f32(0.5f);
+  static const float32x4_t minus_point5_val_dup = vdupq_n_f32(-0.5f);
+
+  const uint32x4_t mask = vcltq_f32(input, zero_val_dup);
+  const float32x4_t round = vbslq_f32(mask, minus_point5_val_dup, point5_val_dup);
+  return vcvtq_s32_f32(vaddq_f32(input, round));
+#endif // defined(__aarch64__) || defined(__SSSE3__)
+}
+
+#endif // NEON
+
 inline void Round(const Shape &input_shape, const float *input_data, const Shape &output_shape,
                   float *output_data)
 {
index 409e5f7..c49ea1f 100644 (file)
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
 author = 'Samsung Research & contributors'
 
 # The full version, including alpha/beta/rc tags
-release = '1.21.0'
+release = '1.22.0'
 
 # -- General configuration ---------------------------------------------------
 
index 29e300b..7a1fa1c 100644 (file)
@@ -45,6 +45,9 @@ pylint \
 python3 \
 python3-pip \
 python3-venv \
+python3.8 \
+python3.8-dev \
+python3.8-venv \
 scons \
 software-properties-common \
 unzip \
@@ -177,6 +180,13 @@ First `make` will run above steps (1), (2) and (3). Second `make` will run (4).
 
 ### Test
 
+Preprequisite for testing in ARM32 device.
+```
+# numpy is required for value match in ARM32 target device
+sudo apt-get install python3-pip
+python3 -m pip install numpy
+```
+
 You can also run unit tests in ARM32 Ubuntu device with cross build results.
 First you need to run the test in host to prepare files that are currently
 complicated in target device.
index 57b2b78..5479a34 100644 (file)
@@ -2,7 +2,7 @@
 
 This document describes how to build runtime with GBS for Tizen AARCH64.
 As a real example, we'll also describe how to prepare Tizen on Raspberry Pi 4
-and show you how to run our test package runner `nnpackage_run`.
+and show you how to run our test package runner `onert_run`.
 
 For ARM32, there would be not much difference with some changes.
 
@@ -70,7 +70,7 @@ $ cd ONE
 $ gbs -c infra/nnfw/config/gbs.conf build --include-all -A aarch64 --define 'test_build 1'
 ```
 - `-A aarch64` is to set architecture to AARCH64. Use `arm32` for ARM32 target.
-- `--define 'test_build 1'` is to enable test build so that we can use `nnpackage_run`
+- `--define 'test_build 1'` is to enable test build so that we can use `onert_run`
 
 Now take a cup of coffee.
 
@@ -300,7 +300,7 @@ Assume `mobilenet_v2_1.4_224` nnpackage is already copied to
 `/opt/usr/home/owner/media/models` folder with `sdb` command.
 
 ```
-sh-3.2# BACKENDS="cpu" Product/out/bin/nnpackage_run \
+sh-3.2# BACKENDS="cpu" Product/out/bin/onert_run \
 --nnpackage /opt/usr/home/owner/media/models/mobilenet_v2_1.4_224
 
 Package Filename /opt/usr/home/owner/media/models/mobilenet_v2_1.4_224
index bf524d7..f83ec5b 100644 (file)
@@ -70,13 +70,13 @@ Unfortunately, the debug build on the x86_64 architecture currently has an error
 
 ```
 $ export BUILD_TYPE=release
-$ make -f Makefile.template install
+$ make -f Makefile.template
 ```
 
 Or you can simply do something like this:
 
 ```
-$ BUILD_TYPE=release make -f Makefile.template install
+$ BUILD_TYPE=release make -f Makefile.template
 ```
 
 The build method described here is a `native build` in which the build environment and execution environment are same. So, this command creates a runtime binary targeting the current build architecture, probably x86_64, as the execution environment. You can find the build output in the ./Product folder as follows:
@@ -84,12 +84,8 @@ The build method described here is a `native build` in which the build environme
 ```
 $ tree -L 2 ./Product
 ./Product
-├── obj -> /home/sjlee/star/one/Product/x86_64-linux.release/obj
 ├── out -> /home/sjlee/star/one/Product/x86_64-linux.release/out
 └── x86_64-linux.release
-    ├── BUILD
-    ├── CONFIGURE
-    ├── INSTALL
     ├── obj
     └── out
 
@@ -98,17 +94,16 @@ $ tree -L 2 ./Product
 $ tree -L 3 ./Product/out
 ./Product/out
 ├── bin
-│   ├── nnapi_test
-│   ├── nnpackage_run
+│   ├── onert_run
 │   ├── tflite_comparator
 │   └── tflite_run
 ├── include
 │   ├── nnfw
-│   │   ├── NeuralNetworks.h
 │   │   ├── NeuralNetworksEx.h
 │   │   ├── NeuralNetworksExtensions.h
-│   │   ├── nnfw.h
-│   │   └── nnfw_experimental.h
+│   │   ├── NeuralNetworks.h
+│   │   ├── nnfw_experimental.h
+│   │   └── nnfw.h
 │   └── onert
 │       ├── backend
 │       ├── compiler
@@ -117,55 +112,59 @@ $ tree -L 3 ./Product/out
 │       └── util
 ├── lib
 │   ├── libbackend_cpu.so
-│   ├── libcircle_loader.so
+│   ├── libbackend_ruy.so
 │   ├── libneuralnetworks.so
 │   ├── libnnfw-dev.so
-│   ├── libonert_core.so
-│   └── libtflite_loader.so
+│   └── libonert_core.so
+├── nnapi-gtest
+│   ├── nnapi_gtest
+│   ├── nnapi_gtest.skip
+│   ├── nnapi_gtest.skip.noarch.interp
+│   └── nnapi_gtest.skip.x86_64-linux.cpu
 ├── test
-│   ├── FillFrom_runner
 │   ├── command
 │   │   ├── nnpkg-test
 │   │   ├── prepare-model
 │   │   ├── unittest
 │   │   └── verify-tflite
+│   ├── FillFrom_runner
 │   ├── list
 │   │   ├── benchmark_nnpkg_model_list.txt
-│   │   ├── frameworktest_list.aarch64.acl_cl.txt
-│   │   ├── frameworktest_list.aarch64.acl_neon.txt
-│   │   ├── frameworktest_list.aarch64.cpu.txt
-│   │   ├── frameworktest_list.armv7l.acl_cl.txt
-│   │   ├── frameworktest_list.armv7l.acl_neon.txt
-│   │   ├── frameworktest_list.armv7l.cpu.txt
-│   │   ├── frameworktest_list.noarch.interp.txt
-│   │   ├── frameworktest_list.x86_64.cpu.txt
 │   │   ├── nnpkg_test_list.armv7l-linux.acl_cl
 │   │   ├── nnpkg_test_list.armv7l-linux.acl_neon
 │   │   ├── nnpkg_test_list.armv7l-linux.cpu
 │   │   ├── nnpkg_test_list.noarch.interp
-│   │   ├── tflite_loader_list.aarch64.txt
-│   │   └── tflite_loader_list.armv7l.txt
+│   │   ├── tflite_comparator.aarch64.acl_cl.list
+│   │   ├── tflite_comparator.aarch64.acl_neon.list
+│   │   ├── tflite_comparator.aarch64.cpu.list
+│   │   ├── tflite_comparator.armv7l.acl_cl.list
+│   │   ├── tflite_comparator.armv7l.acl_neon.list
+│   │   ├── tflite_comparator.armv7l.cpu.list
+│   │   ├── tflite_comparator.noarch.interp.list
+│   │   └── tflite_comparator.x86_64.cpu.list
 │   ├── models
-│   │   ├── nnfw_api_gtest
 │   │   ├── run_test.sh
 │   │   └── tflite
 │   ├── nnpkgs
 │   │   └── FillFrom
 │   └── onert-test
-├── unittest
-│   ├── nnapi_gtest
-│   ├── nnapi_gtest.skip
-│   ├── nnapi_gtest.skip.noarch.interp
-│   └── nnapi_gtest.skip.x86_64-linux.cpu
-└── unittest_standalone
+└── unittest
+    ├── ndarray_test
     ├── nnfw_api_gtest
-    ├── test_compute
-    ├── test_onert
-    ├── test_onert_backend_cpu_common
+    ├── nnfw_api_gtest_models
+    │   ├── add
+    │   ├── add_invalid_manifest
+    │   ├── add_no_manifest
+    │   ├── if_dynamic
+    │   ├── mobilenet_v1_1.0_224
+    │   └── while_dynamic
+    ├── nnfw_lib_misc_test
+    ├── test_cker
+    ├── test_onert_core
     ├── test_onert_frontend_nnapi
     └── tflite_test
 
-20 directories, 47 files
+26 directories, 46 files
 
 ```
 
@@ -185,24 +184,8 @@ inception_v3.tflite
 The result of running the inception_v3 model using runtime is as follows. Please consider that this is a test that simply checks execution latency without considering the accuracy of the model.
 
 ```
-$ USE_NNAPI=1 ./Product/out/bin/tflite_run ./inception_v3.tflite
-nnapi function 'ANeuralNetworksModel_create' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_addOperand' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_setOperandValue' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_addOperation' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_identifyInputsAndOutputs' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_finish' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksCompilation_create' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksCompilation_finish' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-input tensor indices = [317,]
-nnapi function 'ANeuralNetworksExecution_create' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_setInput' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_setOutput' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_startCompute' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksEvent_wait' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksEvent_free' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_free' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-output tensor indices = [316(max:905),]
+$ ./Product/out/bin/onert_run --modelfile ./inception_v3.tflite
+Model Filename ./inception_v3.tflite
 ===================================
 MODEL_LOAD   takes 1.108 ms
 PREPARE      takes 0.190 ms
@@ -212,10 +195,8 @@ EXECUTE      takes 183.895 ms
 - MIN      :  183.895 ms
 - GEOMEAN  :  183.895 ms
 ===================================
-nnapi function 'ANeuralNetworksCompilation_free' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_free' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
 ```
-Here, `USE_NNAPI=1` means that **ONE** runtime is used for model inference. If omitted, the model will be executed using Tensorflow lite, the basic framework for verification. From the previous build result, you can see that it is the path to the directory where `libneuralnetworks.so` and `libonert_core.so` are located.
+If you use `tflite_run` instead of `onert_run`, the model will be executed using Tensorflow lite, the basic framework for verification. From the previous build result, you can see that it is the path to the directory where `tflite_run` and `onert_run` are located.
 
 If you come here without any problems, you have all of the basic environments for runtime development.
 
index fe7b140..7c68554 100644 (file)
@@ -34,8 +34,9 @@ This section explains the steps to create a pull request (PR).
 1. Create a commit
 
    It is time to create a commit for submission once you are convinced that your contribution is
-   ready to go. Please include signed-off message at the end of commit message. If not, your pull
-   request will be **rejected** by CI.
+   ready to go. Please include 
+   [signed-off message](https://github.com/Samsung/ONE/wiki/ONE-Developer's-Certificate-of-Origin) 
+   at the end of commit message. If not, your pull request will be **rejected** by CI.
 
 1. Check code format locally
 
index 4c8fe1d..3e27057 100644 (file)
@@ -89,8 +89,8 @@ normal build and cross build as follows.
 ```
 $ export ROOTFS_DIR=xxx
 ...
-$ make all install                        # do normal build
-$ TARGET_ARCH=aarch64 make all install    # do cross build
+$ make                         # do normal build
+$ TARGET_ARCH=aarch64 make     # do cross build
 ```
 
 ### Run test
index 32c64f8..91481a9 100644 (file)
@@ -106,11 +106,11 @@ Give `TARGET_ARCH` variable to set the target architecture.
 If you used `ROOTFS_DIR` to prepare in alternative folder, you should also give this to makefile.
 
 ```
-$ CROSS_BUILD=1 TARGET_ARCH=armv7l make all install
+$ CROSS_BUILD=1 TARGET_ARCH=armv7l make
 
 # If ROOTFS_DIR is in alternative folder
 $ ROOTFS_DIR=/path/to/your/rootfs/arm \
-CROSS_BUILD=1 TARGET_ARCH=armv7l make all install
+CROSS_BUILD=1 TARGET_ARCH=armv7l make
 ```
 
 You can also omit the `CROSS_BUILD=1` option if you explicitly pass `ROOTFS_DIR`. In that case, if
@@ -121,8 +121,8 @@ normal build and cross build as follows.
 ```
 $ export ROOTFS_DIR=xxx
 ...
-$ make all install    # do normal build
-$ TARGET_ARCH=armv7l make all install    # do cross build
+$ make                       # do normal build
+$ TARGET_ARCH=armv7l make    # do cross build
 ```
 
 ### Run test
@@ -151,12 +151,10 @@ $ ssh odroid
 sjlee@odroid's password:
 ...
 $ cd ~/one/Product
-$ ln ${PWD}/armv7l-linux.debug/obj obj
 $ ln ${PWD}/armv7l-linux.debug/out out
 $ cd ..
 $ ls -la Product
 drwxrwxr-x  5 sjlee sjlee 4096 Jun  4 20:55 armv7l-linux.debug
-lrwxrwxrwx  1 sjlee sjlee   51 Jun  4 20:54 obj -> /home/sjlee/one/Product/armv7l-linux.debug/obj
 lrwxrwxrwx  1 sjlee sjlee   51 Jun  4 20:55 out -> /home/sjlee/one/Product/armv7l-linux.debug/out
 ```
 
index 32e1b83..a839777 100644 (file)
@@ -18,7 +18,7 @@ ONE runtime has many ways to use specific backend during inference
 - Same as `nnfw_set_available_backends`
 - Example
 ```bash
-BACKENDS=cpu ./Product/out/bin/nnpackage_run ...
+BACKENDS=cpu ./Product/out/bin/onert_run ...
 ```
 
 ### 2. OP_BACKEND_[OP_TYPE]
@@ -27,7 +27,7 @@ BACKENDS=cpu ./Product/out/bin/nnpackage_run ...
 - Example
   - Execute `Conv2D` operator on ruy backend and others on cpu backend
 ```bash
-OP_BACKEND_Conv2D=ruy BACKENDS="cpu;ruy" ./Product/out/bin/nnpackage_run ...
+OP_BACKEND_Conv2D=ruy BACKENDS="cpu;ruy" ./Product/out/bin/onert_run ...
 ```
 
 ### 3. OP_BACKEND_MAP
@@ -36,5 +36,5 @@ OP_BACKEND_Conv2D=ruy BACKENDS="cpu;ruy" ./Product/out/bin/nnpackage_run ...
 - Example
   - Execute `operator 10` on `acl_cl` backend and others on `acl_neon` backend
 ```bash
-OP_BACKEND_MAP="10=acl_cl" BACKENDS="acl_neon;acl_cl" ./Product/out/bin/nnpackage_run ...
+OP_BACKEND_MAP="10=acl_cl" BACKENDS="acl_neon;acl_cl" ./Product/out/bin/onert_run ...
 ```
diff --git a/docs/release/1.22/index.rst b/docs/release/1.22/index.rst
new file mode 100644 (file)
index 0000000..e3c330d
--- /dev/null
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Fri Mar 24 14:03:12 2023.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.22
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.22.0.md
diff --git a/docs/release/1.22/release-note-1.22.0.md b/docs/release/1.22/release-note-1.22.0.md
new file mode 100644 (file)
index 0000000..2e6dee7
--- /dev/null
@@ -0,0 +1,12 @@
+# Release Note 1.22.0
+
+## ONE Compiler
+
+- Introduce new optimization options: `unroll_unidirseqlstm`, `forward_transpose_op`, `fold_fully_connected`, `fuse_prelu`
+- Support more Ops for fake quantization: `Depth2Space`, `Space2Depth`, `Pack`, `Unpack`, `Abs`
+- Support more Ops for quantization: `Abs`, `ReduceProd`
+- Introduce _visq_ tool for quantization error visualization
+- Introduce _Environment_ section into configuration file
+- Improve speed of `convert_nchw_to_nhwc` option
+- Support `Add`, `Mul` of index-type (int32, int64) tensors in _one-quantize_
+- Support ubuntu 20.04
index f71eb5d..5671ae0 100644 (file)
@@ -51,8 +51,11 @@ function(ExternalSource_Download PREFIX)
     foreach(retry_count RANGE 5)
       message(STATUS "(Trial Count : ${retry_count})")
 
+      # For external mirror server
+      envoption(EXTERNAL_SERVER_USERPWD "")
       file(DOWNLOAD ${URL} "${DOWNLOAD_PATH}"
                     STATUS status
+                    USERPWD "${EXTERNAL_SERVER_USERPWD}"
                     LOG log)
 
       list(GET status 0 status_code)
@@ -72,7 +75,7 @@ function(ExternalSource_Download PREFIX)
       if(retry_count EQUAL 5)
         message(FATAL_ERROR "Download ${PREFIX} from ${URL} - failed")
       endif()
-      
+
       # Retry after 10 seconds when download fails
       execute_process(COMMAND sleep 10)
     endforeach()
index 890055f..0240611 100644 (file)
@@ -51,6 +51,10 @@ endif()
 
 if("${TARGET_ARCH}" STREQUAL "x86_64")
   set(TARGET_ARCH_BASE ${TARGET_ARCH})
+elseif("${TARGET_ARCH}" STREQUAL "armv8-m")
+  set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7-r")
+  set(TARGET_ARCH_BASE "arm")
 elseif("${TARGET_ARCH}" STREQUAL "armv7em")
   set(TARGET_ARCH_BASE "arm")
 elseif("${TARGET_ARCH}" STREQUAL "armv7l")
diff --git a/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfig.cmake b/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfig.cmake
new file mode 100644 (file)
index 0000000..4c82af2
--- /dev/null
@@ -0,0 +1,14 @@
+function(_CMSIS_NN_import)
+    nnas_include(ExternalSourceTools)
+    nnas_include(OptionTools)
+
+    envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+    envoption(CMSIS_NN_4_0_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/CMSIS-NN/archive/refs/tags/v4.0.0.tar.gz)
+
+    ExternalSource_Download(CMSIS_NN DIRNAME CMSIS-NN-4.0.0 ${CMSIS_NN_4_0_0_URL})
+
+    set(CMSIS_NNSource_DIR ${CMSIS_NN_SOURCE_DIR} PARENT_SCOPE)
+    set(CMSIS_NNSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_CMSIS_NN_import)
+
+_CMSIS_NN_import()
diff --git a/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfigVersion.cmake b/infra/cmake/packages/CMSIS-NN-4.0.0/CMSIS-NNConfigVersion.cmake
new file mode 100644 (file)
index 0000000..5fa88e6
--- /dev/null
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "4.0.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+    set(PACKAGE_VERSION_EXACT TRUE)
+    set(PACKAGE_VERSION_COMPATIBLE TRUE)
+    set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
index 872ff72..c844f4c 100644 (file)
@@ -30,14 +30,12 @@ _GTest_build()
 # Note: cmake supports GTest and does not find GTestConfig.cmake or GTest-config.cmake.
 # Refer to "https://cmake.org/cmake/help/v3.5/module/FindGTest.html"
 # find_package(GTest) creates options like GTEST_FOUND, not GTest_FOUND.
-if(GTEST_FOUND)
-  message(STATUS "Found GTest: true")
-else(GTEST_FOUND)
+if(NOT GTEST_FOUND)
   message(STATUS "GTEST_FOUND false: call find_package(GTest)")
   # Reset package config directory cache to prevent recursive find
   unset(GTest_DIR CACHE)
   find_package(GTest)
-endif(GTEST_FOUND)
+endif(NOT GTEST_FOUND)
 find_package(Threads)
 
 if(${GTEST_FOUND} AND TARGET Threads::Threads)
diff --git a/infra/cmake/packages/TensorFlowGpuConfig.cmake b/infra/cmake/packages/TensorFlowGpuConfig.cmake
deleted file mode 100644 (file)
index 7a7f786..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-# TensorFlowGpuConfig.cmake
-
-function(_Build_TfliteGpuDelagate_)
-  nnas_find_package(TensorFlowGpuSource REQUIRED)
-  nnas_find_package(AbseilSource REQUIRED)
-  nnas_find_package(Farmhash REQUIRED)
-  nnas_find_package(Fp16Source REQUIRED)
-
-  if(NOT TARGET TensorFlowGpu)
-    nnas_include(ExternalProjectTools)
-    add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLiteGpu" TensorFlowLiteGpu)
-  endif()
-  set(TENSORFLOWGPU_SOURCE_DIR ${TENSORFLOWGPU_SOURCE_DIR} PARENT_SCOPE)
-  set(TensorFlowGpu_DIR ${TensorFlowGpu_DIR} PARENT_SCOPE)
-endfunction(_Build_TfliteGpuDelagate_)
-
-if(BUILD_TENSORFLOW_LITE_GPU)
-  _Build_TfliteGpuDelagate_()
-  set(TensorFlowGpu_FOUND TRUE PARENT_SCOPE)
-else(BUILD_TENSORFLOW_LITE_GPU)
-  set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
-endif(BUILD_TENSORFLOW_LITE_GPU)
diff --git a/infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch b/infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch
deleted file mode 100644 (file)
index bf423dc..0000000
+++ /dev/null
@@ -1,292 +0,0 @@
-diff --git a/tensorflow/lite/delegates/gpu/api.h b/tensorflow/lite/delegates/gpu/api.h
-index 7892d0ce..fae4fb69 100644
---- a/tensorflow/lite/delegates/gpu/api.h
-+++ b/tensorflow/lite/delegates/gpu/api.h
-@@ -43,11 +43,18 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/common/data_type.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
- #include "tensorflow/lite/delegates/gpu/common/util.h"
-+
-+#ifdef TFLITE_GPU_LIB_FIX
- #include <vulkan/vulkan.h>
-+#endif
- #define GL_NO_PROTOTYPES
- #define EGL_NO_PROTOTYPES
-+
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
-+#endif
-+
- #undef GL_NO_PROTOTYPES
- #undef EGL_NO_PROTOTYPES
-@@ -80,6 +87,7 @@ enum class ObjectType {
-   VULKAN_TEXTURE
- };
-+#ifdef TFLITE_GPU_LIB_FIX
- struct OpenGlBuffer {
-   OpenGlBuffer() = default;
-   explicit OpenGlBuffer(GLuint new_id) : id(new_id) {}
-@@ -95,6 +103,7 @@ struct OpenGlTexture {
-   GLuint id = GL_INVALID_INDEX;
-   GLenum format = GL_INVALID_ENUM;
- };
-+#endif
- struct OpenClBuffer {
-   OpenClBuffer() = default;
-@@ -111,6 +120,7 @@ struct OpenClTexture {
-   // TODO(akulik): should it specify texture format?
- };
-+#ifdef TFLITE_GPU_LIB_FIX
- struct VulkanBuffer {
-   VulkanBuffer() = default;
-   explicit VulkanBuffer(VkBuffer buffer_, VkDeviceSize size_,
-@@ -143,6 +153,7 @@ struct VulkanMemory {
-   VkDeviceSize size;
-   VkDeviceSize offset;
- };
-+#endif
- struct CpuMemory {
-   CpuMemory() = default;
-@@ -228,10 +239,15 @@ bool IsValid(const TensorObjectDef& def);
- // @return the number of elements in a tensor object.
- uint32_t NumElements(const TensorObjectDef& def);
-+#ifdef TFLITE_GPU_LIB_FIX
- using TensorObject =
-     absl::variant<absl::monostate, OpenGlBuffer, OpenGlTexture, CpuMemory,
-                   OpenClBuffer, OpenClTexture, VulkanBuffer, VulkanTexture>;
--
-+#else
-+using TensorObject =
-+    absl::variant<absl::monostate, CpuMemory,
-+                  OpenClBuffer, OpenClTexture>;
-+#endif
- // @return true if object is set and corresponding values are defined.
- bool IsValid(const TensorObjectDef& def, const TensorObject& object);
-diff --git a/tensorflow/lite/delegates/gpu/cl/api.h b/tensorflow/lite/delegates/gpu/cl/api.h
-index 65671117..c339f3f0 100644
---- a/tensorflow/lite/delegates/gpu/cl/api.h
-+++ b/tensorflow/lite/delegates/gpu/cl/api.h
-@@ -20,7 +20,9 @@ limitations under the License.
- #define EGL_NO_PROTOTYPES
- #endif
-+#ifdef TFLITE_GPU_LIB_FIX
- #include <EGL/egl.h>
-+#endif
- #include <cstdint>
- #include <memory>
-@@ -115,9 +117,10 @@ struct InferenceEnvironmentOptions {
-   // It is the error to set egl_display, egl_context AND context at the same
-   // time. If egl_display and egl_context are set, they will be used to create
-   // GL-aware CL context.
-+#ifdef TFLITE_GPU_LIB_FIX
-   EGLDisplay egl_display = EGL_NO_DISPLAY;
-   EGLContext egl_context = EGL_NO_CONTEXT;
--
-+#endif //TFLITE_GPU_LIB_FIX
-   // Should contain data returned from
-   // InferenceEnvironment::GetSerializedBinaryCache method.
-   // Invalid or incompatible data will be discarded. Compiled binary may become
-@@ -125,7 +128,11 @@ struct InferenceEnvironmentOptions {
-   absl::Span<const uint8_t> serialized_binary_cache;
-   bool IsGlAware() const {
-+#ifdef TFLITE_GPU_LIB_FIX
-     return egl_context != EGL_NO_CONTEXT && egl_display != EGL_NO_DISPLAY;
-+#else //TFLITE_GPU_LIB_FIX
-+    return false;
-+#endif //TFLITE_GPU_LIB_FIX
-   }
- };
-diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.h b/tensorflow/lite/delegates/gpu/cl/arguments.h
-index a5435c4f..e088355b 100644
---- a/tensorflow/lite/delegates/gpu/cl/arguments.h
-+++ b/tensorflow/lite/delegates/gpu/cl/arguments.h
-@@ -23,7 +23,9 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
- #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
- #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/util.h"
- #include "tensorflow/lite/delegates/gpu/common/access_type.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
-@@ -78,11 +80,12 @@ class Arguments : public ArgumentsBinder {
-   ~Arguments() override = default;
-  private:
-+#ifdef TFLITE_GPU_LIB_FIX
-   friend flatbuffers::Offset<data::Arguments> Encode(
-       const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
-   friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
-                              Arguments* args);
--
-+#endif
-   void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
-   void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
-   void AddImage2DArray(const std::string& name,
-diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_object.h b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
-index abd77a44..ac1b7f00 100644
---- a/tensorflow/lite/delegates/gpu/cl/gpu_object.h
-+++ b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
-@@ -23,7 +23,9 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
- #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/common/access_type.h"
- #include "tensorflow/lite/delegates/gpu/common/data_type.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
-@@ -165,10 +167,12 @@ class GPUObjectDescriptor {
-   AccessType GetAccess() const { return access_type_; }
-  protected:
-+#ifdef TFLITE_GPU_LIB_FIX
-   friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
-       const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
-   friend void Decode(const data::GPUObjectDescriptor* fb_obj,
-                      GPUObjectDescriptor* obj);
-+#endif
-   mutable std::map<std::string, std::string> state_vars_;
-   AccessType access_type_;
- };
-diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
-index ca0c0319..f3cbc863 100644
---- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
-+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
-@@ -151,6 +151,7 @@ CLNode& CLNode::operator=(CLNode&& node) {
-   return *this;
- }
-+#ifdef TFLITE_GPU_LIB_FIX
- absl::Status InferenceContext::InitFromGraph(
-     const CreateInferenceInfo& create_info, const GraphFloat32& graph,
-     Environment* env, std::vector<uint8_t>* serialized_model) {
-@@ -239,6 +240,7 @@ absl::Status InferenceContext::RestoreDeserialized(
-   }
-   return absl::OkStatus();
- }
-+#endif
- absl::Status InferenceContext::InitFromGraphWithTransforms(
-     const CreateInferenceInfo& create_info, GraphFloat32* graph,
-diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h
-index ec8055eb..871af9dd 100644
---- a/tensorflow/lite/delegates/gpu/cl/inference_context.h
-+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h
-@@ -31,7 +31,9 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
- #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
- #include "tensorflow/lite/delegates/gpu/cl/precision.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
- #include "tensorflow/lite/delegates/gpu/common/model.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
-@@ -100,12 +102,14 @@ class InferenceContext {
-  private:
-   enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
-+#ifdef TFLITE_GPU_LIB_FIX
-   friend flatbuffers::Offset<data::InferenceContext> Encode(
-       const InferenceContext& inference,
-       flatbuffers::FlatBufferBuilder* builder);
-   friend absl::Status Decode(CLContext* context,
-                              const data::InferenceContext* fb_inference,
-                              InferenceContext* inference);
-+#endif
-   void CopyInAndOutIds(const GraphFloat32& graph);
-   absl::Status ConvertOperations(const DeviceInfo& device_info,
-diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
-index 57d8690c..8178e2de 100644
---- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
-+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
-@@ -30,7 +30,9 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
- #include "tensorflow/lite/delegates/gpu/cl/precision.h"
- #include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
- #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
- #include "tensorflow/lite/delegates/gpu/common/data_type.h"
-@@ -169,11 +171,12 @@ class GPUOperation {
-   bool check_src_channels_size_ = false;
-  protected:
-+#ifdef TFLITE_GPU_LIB_FIX
-   friend flatbuffers::Offset<data::GPUOperation> Encode(
-       const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
-   friend absl::Status Decode(CLContext* context,
-                              const data::GPUOperation* fb_op, GPUOperation* op);
--
-+#endif
-   virtual absl::Status BindArguments(ArgumentsBinder* args) {
-     return absl::OkStatus();
-   }
-diff --git a/tensorflow/lite/delegates/gpu/cl/program_cache.cc b/tensorflow/lite/delegates/gpu/cl/program_cache.cc
-index 285aa06d..f636a909 100644
---- a/tensorflow/lite/delegates/gpu/cl/program_cache.cc
-+++ b/tensorflow/lite/delegates/gpu/cl/program_cache.cc
-@@ -18,9 +18,13 @@ limitations under the License.
- #include <cstdint>
- #include <string>
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/cl_program.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/compiled_program_cache_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/util.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
- #include <farmhash.h>
-@@ -82,6 +86,7 @@ absl::Status ProgramCache::GetOrCreateCLKernel(const std::string& code,
-   return GetOrCreateCLKernel(code, function_name, {}, context, device, result);
- }
-+#ifdef TFLITE_GPU_LIB_FIX
- absl::Status ProgramCache::AddSerializedCache(
-     const CLContext& context, const CLDevice& device,
-     absl::Span<const uint8_t> serialized_cache) {
-@@ -143,6 +148,7 @@ absl::Status ProgramCache::GetSerializedCache(
-               builder.GetSize());
-   return absl::OkStatus();
- }
-+#endif
- }  // namespace cl
- }  // namespace gpu
-diff --git a/tensorflow/lite/delegates/gpu/common/types.h b/tensorflow/lite/delegates/gpu/common/types.h
-index 4ddb46f3..2b692f0b 100644
---- a/tensorflow/lite/delegates/gpu/common/types.h
-+++ b/tensorflow/lite/delegates/gpu/common/types.h
-@@ -34,9 +34,9 @@ class alignas(2) half {
-   HalfBits bits;
-   half() = default;
--
-+#ifdef TFLITE_GPU_LIB_FIX
-   half(const half& f) : bits(f.bits) {}
--
-+#endif
-   explicit half(float other) { bits = fp16_ieee_from_fp32_value(other); }
-   void operator=(float f) { *this = half(f); }
diff --git a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
deleted file mode 100644 (file)
index 369816a..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-#
-# Download Tensorflow 2.4.1, use gpu delegate codes only
-#
-
-function(_TensorFlowGpuSource_Import)
-  SET(PATCH_FILE_CHECK "20211014")
-  SET(DATE_STAMP_PATH "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU.stamp")
-
-  set(PATCH_DONE FALSE)
-  if(EXISTS ${DATE_STAMP_PATH})
-    file(STRINGS ${DATE_STAMP_PATH} OBTAINED_CONTENT)
-    if(${OBTAINED_CONTENT} STREQUAL "${PATCH_FILE_CHECK}")
-      set(PATCH_DONE "TRUE")
-    endif()
-  endif()
-
-  if(${PATCH_DONE} STREQUAL "TRUE")
-    message(STATUS "Skip downloading TensorFlowGpuSource")
-    set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU" PARENT_SCOPE)
-    set(TensorFlowGpuSource_DIR "${TensorFlowGpuSource_DIR}" PARENT_SCOPE)
-    set(TensorFlowGpuSource_FOUND TRUE PARENT_SCOPE)
-    return()
-  else(${PATCH_DONE} STREQUAL "TRUE")
-    # PATCH_DONE FALSE
-    message(STATUS "TensorFlowGpuSource patch not found!")
-  endif(${PATCH_DONE} STREQUAL "TRUE")
-
-  # Download TFLite Source Code
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  envoption(TENSORFLOW_2_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.4.1.tar.gz)
-  ExternalSource_Download(TFLITE_GPU_DELEGATE DIRNAME TENSORFLOW-2.4.1 ${TENSORFLOW_2_4_1_URL})
-
-  # Patch for non used codes on onert backend/gpu_cl
-  # ToDo: Do it more simpler
-  set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU")
-
-  # remove & copy gpu delegate source codes only
-  if(EXISTS ${TENSORFLOWGPU_SOURCE_DIR})
-    file(REMOVE_RECURSE "${TENSORFLOWGPU_SOURCE_DIR}")
-  endif()
-
-  file(MAKE_DIRECTORY "${TENSORFLOWGPU_SOURCE_DIR}")
-  execute_process(
-    WORKING_DIRECTORY "${TFLITE_GPU_DELEGATE_SOURCE_DIR}"
-    COMMAND bash -c "cp -r --parents ./tensorflow/lite/delegates/gpu ../TENSORFLOW_GPU"
-  )
-
-  # Create Stamp
-  set(_remove_path "${TENSORFLOWGPU_SOURCE_DIR}.stamp")
-  if(EXISTS ${_remove_path})
-    file(REMOVE ${_remove_path})
-  endif()
-  execute_process(
-    WORKING_DIRECTORY "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU"
-    COMMAND bash -c "patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/TensorFlowGpuSource/patch_for_gpu_cl_build.patch"
-  )
-  file(WRITE ${DATE_STAMP_PATH} "${PATCH_FILE_CHECK}")
-  set(TENSORFLOWGPU_SOURCE_DIR "${TENSORFLOWGPU_SOURCE_DIR}" PARENT_SCOPE)
-  set(TensorFlowGpuSource_DIR "${TensorFlowGpuSource_DIR}" PARENT_SCOPE)
-  set(TensorFlowGpuSource_FOUND TRUE PARENT_SCOPE)
-
-  execute_process(
-    WORKING_DIRECTORY "${NNAS_EXTERNALS_DIR}"
-    COMMAND bash -c "rm -rf ${TFLITE_GPU_DELEGATE_SOURCE_DIR}.stamp"
-    COMMAND bash -c "rm -rf ${TFLITE_GPU_DELEGATE_SOURCE_DIR}"
-  )
-endfunction(_TensorFlowGpuSource_Import)
-
-if(NOT TensorFlowGpuSource_FOUND)
-   _TensorFlowGpuSource_Import()
-else()
-  set(TensorFlowGpuSource_FOUND FALSE PARENT_SCOPE)
-endif(NOT TensorFlowGpuSource_FOUND)
diff --git a/infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt b/infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt
deleted file mode 100644 (file)
index c69e0bb..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-# Tensorflow Lite GPU delegate library 2.4.1
-#
-
-set(LIB_TENSORFLOW_GPU_DELEGATE "TensorFlowGpu")
-
-#TENSORFLOWGPU_SOURCE_DIR
-set(REF_TENSORFLOW_SRC_BASE ${TENSORFLOWGPU_SOURCE_DIR})
-set(REF_TENSORFLOW_LITE_SRC_BASE ${REF_TENSORFLOW_SRC_BASE}/tensorflow/lite)
-set(REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE "${REF_TENSORFLOW_LITE_SRC_BASE}/delegates/gpu")
-
-set(SRC_BASE "${REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE}")
-file(GLOB GPU_CL_SRC_LIST   "${SRC_BASE}/cl/*.cc"
-                                    "${SRC_BASE}/cl/kernels/*.cc"
-                                    "${SRC_BASE}/cl/kernels/special/*.cc"
-                                    "${SRC_BASE}/cl/kernels/selectors/*.cc"
-                                    "${SRC_BASE}/cl/selectors/*.cc"
-                                    "${SRC_BASE}/common/*.cc"
-# Available, but not needed yet
-#                                    "${SRC_BASE}/common/default/*.cc"
-#                                    "${SRC_BASE}/common/memory_managements/*.cc"
-#                                    "${SRC_BASE}/common/transformations/*.cc"
-                                     )
-
-file(GLOB GPU_CL_HDRS_GLOB   "${SRC_BASE}/cl/*.h"
-                                    "${SRC_BASE}/cl/kernels/*.h"
-                                    "${SRC_BASE}/cl/kernels/special/*.h"
-                                    "${SRC_BASE}/cl/kernels/selectors/*.h"
-                                    "${SRC_BASE}/cl/selectors/*.h"
-                                    "${SRC_BASE}/common/*.h"
-                                    "${SRC_BASE}/common/default/*.h"
-                                    "${SRC_BASE}/common/memory_managements/*.h"
-                                    "${SRC_BASE}/common/transformations/*.h"
-                                    )
-list(APPEND GPU_CL_SRC_LIST "${_GPU_CL_HDRS_GLOB}")
-
-file(GLOB REMOVE_TEST_SRCS          "${SRC_BASE}/cl/*_test*.cc"
-                                    "${SRC_BASE}/cl/testing/*.cc"
-                                    "${SRC_BASE}/cl/kernels/*_test*.cc"
-                                    "${SRC_BASE}/common/*_test*.cc"
-                                    "${SRC_BASE}/common/transformations/*_test*.cc"
-                                    )
-# Not available
-file(GLOB REMOVE_SRCS               "${SRC_BASE}/cl/*gl*.cc"
-                                    "${SRC_BASE}/cl/gpu_api_delegate.cc"
-                                    "${SRC_BASE}/cl/serialization.cc"
-                                    "${SRC_BASE}/common/lstm_parser.cc"
-                                    "${SRC_BASE}/common/model_builder.cc"
-                                    "${SRC_BASE}/common/model_builder_helper.cc"
-                                    "${SRC_BASE}/common/object_reader.cc"
-                                    "${SRC_BASE}/common/quantization_util.cc"
-                                    "${SRC_BASE}/common/memory_management/*_test.cc"
-                                    )
-
-list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_TEST_SRCS})
-list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_SRCS})
-list(APPEND TFLITE_GPU_SRCS ${GPU_CL_SRC_LIST})
-
-add_library(${LIB_TENSORFLOW_GPU_DELEGATE} STATIC ${TFLITE_GPU_SRCS})
-target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Fp16Source_DIR}/include")
-target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TENSORFLOWGPU_SOURCE_DIR}")
-target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE abseil farmhash fp16)
-
-add_library(tflitegpu_ignore_warnings INTERFACE)
-target_compile_options(tflitegpu_ignore_warnings INTERFACE -Wno-unused-parameter -Wno-sign-compare)
-target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} INTERFACE tflitegpu_ignore_warnings)
-
-# GL codes are not used on gpu_cl
-target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DCL_DELEGATE_NO_GL")
-
-# Applying PIC first, currently used on gpu_cl only
-set_target_properties(${LIB_TENSORFLOW_GPU_DELEGATE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
index 993a6ad..461da6f 100644 (file)
@@ -157,6 +157,10 @@ function check_python_files() {
   FILES_TO_CHECK_PYTHON=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`)
   # Exceptional case: one-cmds don't have '.py' extension: ignore non-python source (cmake, etc) and ignore shell script: one-prepare-venv
   FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/one-cmds/[^(\./)]*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`)
+  # Exceptional case: onecc-docker don't have '.py' extension.
+  FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/onecc-docker/onecc-docker$'`)
+  # Exceptional case: visq don't have '.py' extension.
+  FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/visq/visq$'`)
 
   for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
     skip=${s#'.'/}/
index df6377d..c841dc0 100644 (file)
@@ -66,13 +66,10 @@ done
 "${LCOV_PATH}" -e "${RAW_COVERAGE_INFO_PATH}" -o "${EXTRACTED_COVERAGE_INFO_PATH}" \
   "${CANDIDATES[@]}"
 
-
-opencl_files=($(find ./runtime/onert/backend/gpu_cl/open_cl/ \( -name "*.cc" -o -name "*.h" \) -exec realpath {} \; ))
-
 # Exclude test files from coverage report
 # Exclude flatbuffer generated files from coverage report
 "${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
-  '*.test.cpp' '*.test.cc' '*/test/*' '*/tests/*' '*_schema_generated.h' "${opencl_files[@]}"
+  '*.test.cpp' '*.test.cc' '*/test/*' '*/tests/*' '*_schema_generated.h'
 
 # Final coverage data
 cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH}
index ddca70a..ba5eea9 100644 (file)
@@ -1,3 +1,16 @@
+one (1.22.0) bionic; urgency=medium
+
+  * Introduce new optimization options: `unroll_unidirseqlstm`, `forward_transpose_op`, `fold_fully_connected`, `fuse_prelu`
+  * Support more Ops for fake quantization: `Depth2Space`, `Space2Depth`, `Pack`, `Unpack`, `Abs`
+  * Support more Ops for quantization: `Abs`, `ReduceProd`
+  * Introduce _visq_ tool for quantization error visualization
+  * Introduce _Environment_ section into configuration file
+  * Improve speed of `convert_nchw_to_nhwc` option
+  * Support `Add`, `Mul` of index-type (int32, int64) tensors in _one-quantize_
+  * Support ubuntu 20.04
+
+ -- seongwoo <mhs4670go@naver.com>  Fri, 24 Mar 2023 13:58:16 +0900
+
 one (1.21.0) bionic; urgency=medium
 
   * Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool
index b7a681d..47f53ad 100644 (file)
@@ -1,6 +1,7 @@
 # {FILES_TO_INSTALL} {DEST_DIR}
 # bin
 usr/bin/circledump usr/share/one/bin/
+usr/bin/circle-opselector usr/share/one/bin/
 usr/bin/circle-tensordump usr/share/one/bin/
 usr/bin/tflchef usr/share/one/bin/
 usr/bin/tflchef-file usr/share/one/bin/
index 65e46d1..641bbe6 100644 (file)
@@ -2,12 +2,14 @@
 # bin
 usr/bin/circle2circle usr/share/one/bin/
 usr/bin/circle-eval-diff usr/share/one/bin/
+usr/bin/circle-interpreter usr/share/one/bin/
 usr/bin/circle-operator usr/share/one/bin/
 usr/bin/circle-partitioner usr/share/one/bin/
 usr/bin/circle-quantizer usr/share/one/bin/
+usr/bin/dalgona usr/share/one/bin/
 usr/bin/generate_bcq_metadata.py usr/share/one/bin/
 usr/bin/generate_bcq_output_arrays.py usr/share/one/bin/
-usr/bin/model2nnpkg.sh usr/share/one/bin/
+usr/bin/model2nnpkg usr/share/one/bin/
 usr/bin/onecc usr/share/one/bin/
 usr/bin/onecc.template.cfg usr/share/one/bin/
 usr/bin/one-build usr/share/one/bin/
@@ -32,14 +34,24 @@ usr/bin/onelib/CfgRunner.py usr/share/one/bin/onelib/
 usr/bin/onelib/OptionBuilder.py usr/share/one/bin/onelib/
 usr/bin/onelib/TopologicalSortHelper.py usr/share/one/bin/onelib/
 usr/bin/onelib/WorkflowRunner.py usr/share/one/bin/onelib/
+usr/bin/onelib/Command.py usr/share/one/bin/onelib/
+usr/bin/onelib/utils.py usr/share/one/bin/onelib/
+usr/bin/onelib/export_constant.py usr/share/one/bin/onelib/
 usr/bin/onnx_legalizer.py usr/share/one/bin/
 usr/bin/rawdata2hdf5 usr/share/one/bin/
 usr/bin/record-minmax usr/share/one/bin/
 usr/bin/tf2nnpkg usr/share/one/bin/
 usr/bin/tf2tfliteV2.py usr/share/one/bin/
 usr/bin/tflite2circle usr/share/one/bin/
-usr/bin/utils.py usr/share/one/bin/
+usr/bin/visq usr/share/one/bin/
+usr/bin/visqlib/DumpFakeQuantFM.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/DumpFP32FM.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/Palette.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/QErrorComputer.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/Util.py usr/share/one/bin/visqlib/
 # lib
 usr/lib/* usr/share/one/lib/
 # doc
 usr/doc/* usr/share/one/doc/
+# optimization
+usr/optimization/* usr/share/one/optimization/
index 145634d..447fb21 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/make -f
 export DH_VERBOSE = 1
 export NNAS_BUILD_PREFIX = build
-export PRESET = 20220323
+export PRESET = 20221125
 export _DESTDIR = debian/tmp/usr
 
 %:
@@ -14,9 +14,6 @@ override_dh_auto_install:
        cmake --build "$(NNAS_BUILD_PREFIX)/nncc" -- install
 
 override_dh_install:
-       install -t "$(_DESTDIR)/bin" -D "tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
        install -T -m 755 -D "infra/packaging/res/tf2nnpkg.${PRESET}" "$(_DESTDIR)/bin/tf2nnpkg"
        dh_install
 
-override_dh_builddeb:
-       dh_builddeb --destdir=$(NNAS_BUILD_PREFIX)
index 97170ee..a228196 100755 (executable)
@@ -3,7 +3,7 @@ DEBVER := $(shell dpkg-parsechangelog -SVersion)
 export DH_VERBOSE = 1
 export _DESTDIR = debian/tmp/
 export BUILD_TYPE=release
-export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_NNPACKAGE_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_NNAPI_TEST=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_BENCHMARK_MODEL=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_8_0=0 -DBUILD_TENSORFLOW_LITE=0
+export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_ONERT_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_8_0=0 -DBUILD_TENSORFLOW_LITE=0
 export DEBIAN_BUILD=1
 export INSTALL_PATH=debian/tmp/usr/
 %:
index f7ffc73..383fddc 100644 (file)
@@ -41,12 +41,12 @@ RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
 # Additonal tools
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive \
-    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
-RUN apt-get update && apt-get -qqy install python3.8 python3.8-venv
+    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN apt-get update && apt-get -qqy install python3.8 python3.8-venv python3.8-dev
 RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install yapf==0.22.0 numpy
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
 RUN python3.8 -m pip install --upgrade pip
-RUN python3.8 -m pip install numpy
+RUN python3.8 -m pip install numpy flatbuffers
 
 # Install google test (source)
 RUN apt-get update && apt-get -qqy install libgtest-dev
@@ -131,5 +131,15 @@ ENV LC_ALL "C.UTF-8"
 # setup adb server
 EXPOSE 5037
 
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
 # Clean archives (to reduce image size)
 RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
index 1cdeffb..2848579 100644 (file)
@@ -34,9 +34,9 @@ RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
 # Additonal tools
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive \
-    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
+    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
 RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install yapf==0.22.0 numpy
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
 
 # Install google test (source)
 RUN apt-get update && apt-get -qqy install libgtest-dev
@@ -48,5 +48,15 @@ RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_4.2.19_ub
 RUN unzip -d tmp sdb.zip && rm sdb.zip
 RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
 
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
 # Clean archives (to reduce image size)
 RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
diff --git a/infra/docker/jammy/Dockerfile b/infra/docker/jammy/Dockerfile
new file mode 100644 (file)
index 0000000..aa500b0
--- /dev/null
@@ -0,0 +1,60 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:jammy
+
+ARG UBUNTU_MIRROR
+
+# Install 'add-apt-repository'
+RUN apt-get update && apt-get -qqy install software-properties-common
+
+# Build tool
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-arm-linux-gnueabihf g++-aarch64-linux-gnu
+
+# Debian build tool
+RUN apt-get update && apt-get -qqy install fakeroot devscripts debhelper python3-all dh-python
+
+# Install extra dependencies (Caffe, nnkit)
+RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
+
+# Install protocol buffer
+RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
+
+# Additonal tools
+# TODO install clang-format (No official clang-format-8 package for ubuntu jammy)
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive \
+    apt-get -qqy install doxygen graphviz wget zip unzip python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
+
+# Install google test (source)
+RUN apt-get update && apt-get -qqy install libgtest-dev
+
+# TODO: Install gbs & sdb
+# gbs & sdb are not support ubuntu jammy yet
+
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN apt-get update && apt-get -qqy install sudo
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+# Clean archives (to reduce image size)
+RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
index 0dc6fdf..af2adfc 100644 (file)
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING      = UTF-8
 # title of most generated pages and in a few other places.
 # The default value is: My Project.
 
-PROJECT_NAME           = nnas
+PROJECT_NAME           = "ONE - On-device Neural Engine"
 
 # The PROJECT_NUMBER tag can be used to enter a project or revision number. This
 # could be handy for archiving the generated documentation or if some version
@@ -252,7 +252,7 @@ TCL_SUBST              =
 # members will be omitted, etc.
 # The default value is: NO.
 
-OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_FOR_C  = YES
 
 # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
 # Python sources only. Doxygen will then generate output that is more tailored
@@ -623,13 +623,13 @@ STRICT_PROTO_MATCHING  = NO
 # list. This list is created by putting \todo commands in the documentation.
 # The default value is: YES.
 
-GENERATE_TODOLIST      = YES
+GENERATE_TODOLIST      = NO
 
 # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
 # list. This list is created by putting \test commands in the documentation.
 # The default value is: YES.
 
-GENERATE_TESTLIST      = YES
+GENERATE_TESTLIST      = NO
 
 # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
 # list. This list is created by putting \bug commands in the documentation.
@@ -642,7 +642,7 @@ GENERATE_BUGLIST       = YES
 # the documentation.
 # The default value is: YES.
 
-GENERATE_DEPRECATEDLIST= YES
+GENERATE_DEPRECATEDLIST= NO
 
 # The ENABLED_SECTIONS tag can be used to enable conditional documentation
 # sections, marked by \if <section_label> ... \endif and \cond <section_label>
@@ -790,7 +790,14 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  =
+INPUT                  = README.md \
+                         docs/howto/ \
+                         docs/overview/ \
+                         docs/runtime/ \
+                         compute/ \
+                         compiler/ \
+                         onert-micro/ \
+                         runtime/
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@@ -873,23 +880,14 @@ RECURSIVE              = YES
 # Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE                = Product/ \
-                         build/ \
-                         doxygen/ \
-                         report/ \
-                         externals/ \
-                         packaging/ \
-                         runtimes/contrib/ \
-                         runtimes/pure_arm_compute/ \
-                         tests/ \
-                         tools/
+EXCLUDE                =
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
 # from the input.
 # The default value is: NO.
 
-EXCLUDE_SYMLINKS       = NO
+EXCLUDE_SYMLINKS       = YES
 
 # If the value of the INPUT tag contains directories, you can use the
 # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
@@ -898,7 +896,17 @@ EXCLUDE_SYMLINKS       = NO
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories for example use the pattern */test/*
 
-EXCLUDE_PATTERNS       =
+EXCLUDE_PATTERNS       = *.test.* \
+                         */test/* \
+                         */tests/* \
+                         */unittest/* \
+                         *_generated.* \
+                         */3rdparty/* \
+                         */contrib/* \
+                         */compiler/*/*.md \
+                         */compute/*/*.md \
+                         */runtime/*/*.md
+
 
 # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 # (namespaces, classes, functions, etc.) that should be excluded from the
@@ -991,7 +999,7 @@ FILTER_SOURCE_PATTERNS =
 # (index.html). This can be useful if you have a project on for instance GitHub
 # and want to reuse the introduction page also for the doxygen output.
 
-USE_MDFILE_AS_MAINPAGE = docs/nnfw/roadmap.md
+USE_MDFILE_AS_MAINPAGE = README.md
 
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
@@ -1010,7 +1018,7 @@ SOURCE_BROWSER         = YES
 # classes and enums directly into the documentation.
 # The default value is: NO.
 
-INLINE_SOURCES         = NO
+INLINE_SOURCES         = YES
 
 # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
 # special comment blocks from generated source code fragments. Normal C, C++ and
@@ -1023,13 +1031,13 @@ STRIP_CODE_COMMENTS    = YES
 # function all documented functions referencing it will be listed.
 # The default value is: NO.
 
-REFERENCED_BY_RELATION = NO
+REFERENCED_BY_RELATION = YES
 
 # If the REFERENCES_RELATION tag is set to YES then for each documented function
 # all documented entities called/used by that function will be listed.
 # The default value is: NO.
 
-REFERENCES_RELATION    = NO
+REFERENCES_RELATION    = YES
 
 # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
 # to YES then the hyperlinks from functions in REFERENCES_RELATION and
@@ -2265,7 +2273,7 @@ DOT_FONTPATH           =
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-CLASS_GRAPH            = YES
+CLASS_GRAPH            = NO
 
 # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
 # graph for each documented class showing the direct and indirect implementation
@@ -2310,7 +2318,7 @@ UML_LIMIT_NUM_FIELDS   = 10
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-TEMPLATE_RELATIONS     = NO
+TEMPLATE_RELATIONS     = YES
 
 # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
 # YES then doxygen will generate a graph for each documented file showing the
@@ -2319,7 +2327,7 @@ TEMPLATE_RELATIONS     = NO
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-INCLUDE_GRAPH          = YES
+INCLUDE_GRAPH          = NO
 
 # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
 # set to YES then doxygen will generate a graph for each documented file showing
@@ -2328,7 +2336,7 @@ INCLUDE_GRAPH          = YES
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-INCLUDED_BY_GRAPH      = YES
+INCLUDED_BY_GRAPH      = NO
 
 # If the CALL_GRAPH tag is set to YES then doxygen will generate a call
 # dependency graph for every global function or class method.
@@ -2340,7 +2348,7 @@ INCLUDED_BY_GRAPH      = YES
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-CALL_GRAPH             = YES
+CALL_GRAPH             = NO
 
 # If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
 # dependency graph for every global function or class method.
@@ -2352,7 +2360,7 @@ CALL_GRAPH             = YES
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-CALLER_GRAPH           = YES
+CALLER_GRAPH           = NO
 
 # If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
 # hierarchy of all classes instead of a textual one.
@@ -2401,7 +2409,7 @@ INTERACTIVE_SVG        = NO
 # found. If left blank, it is assumed the dot tool can be found in the path.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_PATH               = /usr/local/bin/dot
+DOT_PATH               =
 
 # The DOTFILE_DIRS tag can be used to specify one or more directories that
 # contain dot files that are included in the documentation (see the \dotfile
@@ -2450,7 +2458,7 @@ PLANTUML_INCLUDE_PATH  =
 # Minimum value: 0, maximum value: 10000, default value: 50.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_GRAPH_MAX_NODES    = 50
+DOT_GRAPH_MAX_NODES    = 500
 
 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
 # generated by dot. A depth value of 3 means that only nodes reachable from the
index 768d797..bd53c33 100644 (file)
@@ -1,7 +1,4 @@
-# The libboost 1.74 uses IN_LIST operator, which requires the policy CMP0057, in a CMake file.
-# This policy requires ``cmake_minimum_required(VERSION 3.3)``.
-# Run "cmake --help-policy CMP0057" for policy details.
-cmake_minimum_required(VERSION 3.3)
+cmake_minimum_required(VERSION 3.10)
 
 project(nncc)
 
@@ -14,11 +11,6 @@ set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
 set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/")
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 
-# This feature works with CMake 3.5.2 or later. However, using previous versions does not produce
-# an error. We are still officially using CMake 3.1.0, but put this code for the sake of semantic
-# support in various development tools.
-# Todo: Someday, CMake needs to be updated to 3.7.2 or later to take advantage of improvements
-#       such as `cmake-server`.
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." CACHE
index 22d96e7..97631fc 100644 (file)
@@ -4,6 +4,7 @@
 #
 
 BUILD_TYPE?=Debug
+BUILD_JOBS?=1
 
 CURRENT_DIR=$(shell pwd)
 BUILDFOLDER=build
@@ -27,6 +28,7 @@ ARM32_BUILD_ITEMS+=;hermes;hermes-std
 ARM32_BUILD_ITEMS+=;loco;locop;logo-core;logo
 ARM32_BUILD_ITEMS+=;safemain;mio-circle04;mio-tflite280
 ARM32_BUILD_ITEMS+=;dio-hdf5
+ARM32_BUILD_ITEMS+=;luci-compute
 ARM32_BUILD_ITEMS+=;foder;circle-verify;souschef;arser;vconone
 ARM32_BUILD_ITEMS+=;luci
 ARM32_BUILD_ITEMS+=;luci-interpreter
@@ -43,6 +45,7 @@ ARM32_HOST_ITEMS+=;oops
 ARM32_HOST_ITEMS+=;hermes;hermes-std
 ARM32_HOST_ITEMS+=;loco;locop;logo-core;logo
 ARM32_HOST_ITEMS+=;safemain;mio-circle04;mio-tflite280
+ARM32_HOST_ITEMS+=;luci-compute
 ARM32_HOST_ITEMS+=;foder;circle-verify;souschef;arser;vconone
 ARM32_HOST_ITEMS+=;luci
 ARM32_HOST_ITEMS+=;luci-interpreter
@@ -54,12 +57,12 @@ ARM32_HOST_ITEMS+=;common-artifacts
 ARM32_HOST_ITEMS+=;luci-eval-driver;luci-value-test
 
 
-_SPACE_:=
-_SPACE_+=
+_EMPTY_:=
+_SPACE_:=$(_EMPTY_) $(_EMPTY_)
 ARM32_BUILD_WHITELIST=$(subst $(_SPACE_),,$(ARM32_BUILD_ITEMS))
 ARM32_HOST_WHITELIST=$(subst $(_SPACE_),,$(ARM32_HOST_ITEMS))
 
-NNCC_CFG_OPTION+= -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_COVERAGE=OFF
+NNCC_CFG_OPTION+= -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_COVERAGE=OFF -DEXTERNALS_BUILD_THREADS=$(BUILD_JOBS)
 
 NNCC_CFG_STRICT= -DENABLE_STRICT_BUILD=ON
 
@@ -105,19 +108,22 @@ int_configure_arm32:
                $(NNCC_ARM32_DEBUG) $(NNCC_CFG_STRICT) \
                -DCMAKE_TOOLCHAIN_FILE=$(ARM32_TOOLCHAIN_FILE) \
                -DCMAKE_INSTALL_PREFIX="$(ARM32_INSTALL_FOLDER)" \
+               -DBUILD_ARM32_NEON=ON \
                -DENABLE_TEST=ON
 
+# TODO remove BUILD_ARM32_NEON=ON as default is ON, after a while.
+#      explictly added to prevent using cached 'BUILD_ARM32_NEON=OFF'
 
 #
 # builds
 #
 int_build_arm32_host:
-       NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc build -j1
+       NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc build -j$(BUILD_JOBS)
 
 int_build_arm32:
        ROOTFS_DIR=$(ROOTFS_ARM) TARGET_ARCH=armv7l \
        BUILD_HOST_EXEC=$(CURRENT_DIR)/$(BUILD_ARM32_HOST) \
-       NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc build -j1
+       NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc build -j$(BUILD_JOBS)
 
 #
 # host test; run test in host to generate random input and expected outputs
index c800f33..87704db 100644 (file)
@@ -9,15 +9,16 @@ include("${CMAKE_CURRENT_LIST_DIR}/config_linux.cmake")
 
 # addition for arm-linux
 set(FLAGS_COMMON ${FLAGS_COMMON}
-    "-mcpu=cortex-a7"
+    "-march=armv7-a"
+    "-mtune=cortex-a8"
     "-mfloat-abi=hard"
-    "-ftree-vectorize"
     "-mfp16-format=ieee"
     )
 
 if(BUILD_ARM32_NEON)
   set(FLAGS_COMMON ${FLAGS_COMMON}
-      "-mfpu=neon-vfpv4"
+      "-mfpu=vfpv3-d16"
+      "-ftree-vectorize"
       )
 else(BUILD_ARM32_NEON)
   message(STATUS "ARMv7l: NEON is disabled")
index fb32957..2765c36 100644 (file)
@@ -11,7 +11,7 @@ DOCKER_PATH="$NNCC_PROJECT_PATH"
 
 export GIT_SSL_NO_VERIFY=1
 
-DOCKER_VOLUMES=" -v $HOST_PATH:$DOCKER_PATH"
+DOCKER_VOLUMES+=" -v $HOST_PATH:$DOCKER_PATH"
 
 DOCKER_ENV_VARS+=" -e http_proxy"
 DOCKER_ENV_VARS+=" -e no_proxy"
index 2a27eee..bb55a8f 100644 (file)
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.5.1)
+cmake_minimum_required(VERSION 3.10)
 
 project(nnfw)
 
@@ -64,11 +64,6 @@ endif()
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
-# This feature works with CMake 3.5.2 or later. However, using previous versions does not produce
-# an error. We are still officially using CMake 3.5.1, but put this code for the sake of semantic
-# support in various development tools.
-# Todo: Someday, CMake needs to be updated to 3.7.2 or later to take advantage of improvements
-#       such as `cmake-server`.
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
index 440f185..b2f4da5 100644 (file)
@@ -19,9 +19,7 @@ option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" ON)
 option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON)
 option(BUILD_TFLITE_RUN "Build tflite-run" ON)
 option(BUILD_TFLITE_VANILLA_RUN "Build tflite-vanilla-run" OFF)
-option(BUILD_TFLITE_BENCHMARK_MODEL "Build tflite benchmark model" OFF)
-option(BUILD_NNAPI_TEST "Build nnapi_test" ON)
-option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
+option(BUILD_ONERT_RUN "Build onert_run" ON)
 option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
 option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
 option(BUILD_TRIX_LOADER "Build trix loader" ON)
@@ -31,12 +29,11 @@ option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
 option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
 option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
-option(BUILD_NPUD "Build NPU daemon" ON)
-option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" ON)
+option(BUILD_NPUD "Build NPU daemon" OFF)
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
 #
 # Default build configuration for contrib
 #
-option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" OFF)
 option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" OFF)
 option(BUILD_BENCHMARK_ACL "Build ARM Compute Library Benchmarks" OFF)
 option(BUILD_DETECTION_APP "Build detection example app" OFF)
@@ -57,7 +54,6 @@ option(BUILD_TFLITE_ACCURACY "Build tflite accuracy tool" OFF)
 # Default external libraries source download and build configuration
 #
 option(DOWNLOAD_TENSORFLOW "Download Tensorflow source" ON)
-option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" OFF)
 option(DOWNLOAD_ABSEIL "Download Abseil source" ON)
 option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" OFF)
 option(DOWNLOAD_EIGEN "Download Eigen source" ON)
@@ -74,7 +70,6 @@ option(DOWNLOAD_OOURAFFT "Download Ooura FFT source" ON)
 option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
 option(BUILD_BOOST "Build boost source" OFF)
 option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
-option(BUILD_TENSORFLOW_LITE_2_8_0 "Build TensorFlow Lite 2.8.0 from the downloaded source" OFF)
 option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" OFF)
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
 option(DEBUG_ARMCOMPUTE "Build ARM Compute as debug type" OFF)
index e95ccca..242dcd3 100644 (file)
@@ -1,14 +1,16 @@
 # aarch64 android cmake options
 #
-# NOTE BUILD_ANDROID_TFLITE(JNI lib) is disabled due to BuiltinOpResolver issue.
-# tensorflow-lite does not build BuiltinOpResolver but JNI lib need it
-# Related Issue : #1403
-option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" OFF)
 option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" ON)
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
 # Need boost library
 option(DOWNLOAD_BOOST "Download boost source" ON)
 option(BUILD_BOOST "Build boost source" ON)
 option(BUILD_LOGGING "Build logging runtime" OFF)
-# Do not support npud
-option(BUILD_NPUD "Build NPU daemon" OFF)
+
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
index ed6e35e..daaa4c4 100644 (file)
@@ -3,9 +3,14 @@
 #
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
 option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
 
 option(BUILD_LOGGING "Build logging runtime" OFF)
 option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
index aa2d2f8..e28801f 100644 (file)
@@ -3,6 +3,7 @@
 #
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
 option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
 
@@ -11,6 +12,13 @@ option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
 
 option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
-option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+
 option(BUILD_GPU_CL "Build gpu_cl backend" ON)
 option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
index 325e7cc..c73a2be 100644 (file)
@@ -4,7 +4,9 @@
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
 option(BUILD_OPENCL_TOOL "Build OpenCL tool" ON)
 
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
 option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
-option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
 option(BUILD_GPU_CL "Build gpu_cl backend" ON)
 option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
index 9b487d9..c5f4a2c 100644 (file)
@@ -3,15 +3,22 @@
 #
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
 option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
 option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
 
 option(BUILD_LOGGING "Build logging runtime" OFF)
 option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
-option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
 
 option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
-option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+
 option(BUILD_GPU_CL "Build gpu_cl backend" ON)
 option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
index eea3722..24b4d95 100644 (file)
@@ -12,3 +12,7 @@ option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OF
 
 option(BUILD_XNNPACK "Build XNNPACK" OFF)
 option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
index f6a4efd..acc244a 100644 (file)
@@ -11,14 +11,25 @@ function(_ARMCompute_Import)
 
   message(STATUS "Search acl in ${ARMCompute_LIB_SEARCH_PATHS}")
 
-  if(NOT INCLUDE_DIR)
+  # ARMCompute v21.02 moves some headers into "src/".
+  # And we cannot build armcompute-ex library without these headers.
+  # So we need to download and use source code if our build root doesn't have headers in "src/" (tizen's devel package includes these headers).
+  # TODO Don't use headers in "src/"
+  find_path(HEADER_SRC_DIR NAMES src/core/CL/ICLKernel.h PATHS ${ARMCompute_INCLUDE_SEARCH_PATHS})
+  if(NOT INCLUDE_DIR OR NOT HEADER_SRC_DIR)
     nnas_find_package(ARMComputeSource QUIET)
     if (NOT ARMComputeSource_FOUND)
       set(ARMCompute_FOUND FALSE PARENT_SCOPE)
       return()
     endif()
-    set(INCLUDE_DIR ${ARMComputeSource_DIR} ${ARMComputeSource_DIR}/include)
-  endif(NOT INCLUDE_DIR)
+
+    # Clean if INCLUDE_DIR is NOT_FOUND
+    if(NOT INCLUDE_DIR)
+      unset(INCLUDE_DIR)
+    endif(NOT INCLUDE_DIR)
+
+    list(APPEND INCLUDE_DIR ${ARMComputeSource_DIR} ${ARMComputeSource_DIR}/include)
+  endif(NOT INCLUDE_DIR OR NOT HEADER_SRC_DIR)
 
   if(NOT CORE_LIBRARY)
     set(ARMCompute_FOUND FALSE PARENT_SCOPE)
index e71830a..1537882 100644 (file)
@@ -1,5 +1,5 @@
 function(_Eigen_import)
-  nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
+  nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 QUIET)
 
   if(NOT TensorFlowEigenSource_FOUND)
     set(Eigen_FOUND FALSE PARENT_SCOPE)
diff --git a/infra/nnfw/cmake/packages/GObject2.0Config.cmake b/infra/nnfw/cmake/packages/GObject2.0Config.cmake
new file mode 100644 (file)
index 0000000..f1bfb3a
--- /dev/null
@@ -0,0 +1,30 @@
+function(_GOBJECT_2_0_import)
+  nnfw_find_package(GLib2.0 REQUIRED)
+
+  find_library(GOBJECT_LIBRARIES
+    NAMES gobject-2.0)
+
+  # The gobject-2.0 requires glib-2.0 and access the header file based on
+  # the glib-2.0 include directory.
+  set(GOBJECT_INCLUDE_DIRS ${GLIB2.0_INCLUDE_DIRS})
+
+  set(GOBJECT_FOUND TRUE)
+
+  if(NOT GOBJECT_LIBRARIES)
+    set(GOBJECT_FOUND FALSE)
+  endif(NOT GOBJECT_LIBRARIES)
+
+  if(NOT GOBJECT_INCLUDE_DIRS)
+    set(GOBJECT_FOUND FALSE)
+  endif(NOT GOBJECT_INCLUDE_DIRS)
+
+  if(NOT GOBJECT_FOUND)
+    message(STATUS "Failed to find gobject-2.0")
+  endif(NOT GOBJECT_FOUND)
+
+  set(GOBJECT2.0_FOUND ${GOBJECT_FOUND} PARENT_SCOPE)
+  set(GOBJECT2.0_INCLUDE_DIRS ${GOBJECT_INCLUDE_DIRS} PARENT_SCOPE)
+  set(GOBJECT2.0_LIBRARIES ${GOBJECT_LIBRARIES} PARENT_SCOPE)
+endfunction(_GOBJECT_2_0_import)
+
+_GOBJECT_2_0_import()
index ab2b39e..d0f7b18 100644 (file)
@@ -44,7 +44,7 @@ if(${GTEST_FOUND} AND TARGET Threads::Threads)
       add_library(gmock INTERFACE)
       target_include_directories(gmock INTERFACE ${GMOCK_INCLUDE_DIR})
       target_link_libraries(gmock INTERFACE ${GMOCK_LIBRARIES} Threads::Threads)
-    endif(GMOCK_LIBRARIES)
+    endif(GMOCK_LIBRARIES AND GMOCK_INCLUDE_DIR)
   endif(NOT TARGET gmock)
 
   if(NOT TARGET gmock_main)
diff --git a/infra/nnfw/cmake/packages/Gio2.0Config.cmake b/infra/nnfw/cmake/packages/Gio2.0Config.cmake
new file mode 100644 (file)
index 0000000..26d3607
--- /dev/null
@@ -0,0 +1,32 @@
+function(_GIO_2_0_import)
+  nnfw_find_package(GLib2.0 REQUIRED)
+  nnfw_find_package(GObject2.0 REQUIRED)
+
+  find_library(GIO_LIBRARIES
+    NAMES gio-2.0)
+
+  # The gio-2.0 requires glib-2.0 and access the header file based on
+  # the glib-2.0 include directory.
+  set(GIO_INCLUDE_DIRS ${GLIB2.0_INCLUDE_DIRS} ${GOBJECT2.0_INCLUDE_DIRS})
+  set(GIO_LIBRARIES ${GIO_LIBRARIES} ${GOBJECT2.0_LIBRARIES})
+
+  set(GIO_FOUND TRUE)
+
+  if(NOT GIO_LIBRARIES)
+    set(GIO_FOUND FALSE)
+  endif(NOT GIO_LIBRARIES)
+
+  if(NOT GIO_INCLUDE_DIRS)
+    set(GIO_FOUND FALSE)
+  endif(NOT GIO_INCLUDE_DIRS)
+
+  if(NOT GIO_FOUND)
+    message(STATUS "Failed to find gio-2.0")
+  endif(NOT GIO_FOUND)
+
+  set(GIO2.0_FOUND ${GIO_FOUND} PARENT_SCOPE)
+  set(GIO2.0_INCLUDE_DIRS ${GIO_INCLUDE_DIRS} PARENT_SCOPE)
+  set(GIO2.0_LIBRARIES ${GIO_LIBRARIES} PARENT_SCOPE)
+endfunction(_GIO_2_0_import)
+
+_GIO_2_0_import()
diff --git a/infra/nnfw/cmake/packages/Giounix2.0Config.cmake b/infra/nnfw/cmake/packages/Giounix2.0Config.cmake
new file mode 100644 (file)
index 0000000..05f1874
--- /dev/null
@@ -0,0 +1,30 @@
+function(_GIO_UNIX_2_0_import)
+  nnfw_find_package(Gio2.0 REQUIRED)
+
+  find_path(GIO_UNIX_INCLUDE_DIR
+    NAMES gio/gunixfdlist.h
+    PATH_SUFFIXES gio-unix-2.0)
+
+  # The gio-unix-2.0 requires gio-2.0 and link the gio-2.0 library.
+  set(GIO_UNIX_LIBRARIES ${GIO2.0_LIBRARIES})
+
+  set(GIO_UNIX_FOUND TRUE)
+
+  if(NOT GIO_UNIX_LIBRARIES)
+    set(GIO_UNIX_FOUND FALSE)
+  endif(NOT GIO_UNIX_LIBRARIES)
+
+  if(NOT GIO_UNIX_INCLUDE_DIR)
+    set(GIO_UNIX_FOUND FALSE)
+  endif(NOT GIO_UNIX_INCLUDE_DIR)
+
+  if(NOT GIO_UNIX_FOUND)
+    message(STATUS "Failed to find gio-unix-2.0")
+  endif(NOT GIO_UNIX_FOUND)
+
+  set(GIO_UNIX_2.0_FOUND ${GIO_UNIX_FOUND} PARENT_SCOPE)
+  set(GIO_UNIX_2.0_INCLUDE_DIRS ${GIO_UNIX_INCLUDE_DIR} PARENT_SCOPE)
+  set(GIO_UNIX_2.0_LIBRARIES ${GIO_UNIX_LIBRARIES} PARENT_SCOPE)
+endfunction(_GIO_UNIX_2_0_import)
+
+_GIO_UNIX_2_0_import()
diff --git a/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfig.cmake
deleted file mode 100644 (file)
index 253b290..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-function(_Eigen_import)
-  nnas_find_package(EigenSource QUIET)
-
-  if(NOT EigenSource_FOUND)
-    set(TensorFlowEigen_1_13_1_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT EigenSource_FOUND)
-
-  if(NOT TARGET eigen-tf-1.13.1)
-    add_library(eigen-tf-1.13.1 INTERFACE)
-    target_include_directories(eigen-tf-1.13.1 SYSTEM INTERFACE "${EigenSource_DIR}")
-    # Add EIGEN_MPL2_ONLY to remove license issue posibility
-    target_compile_definitions(eigen-tf-1.13.1 INTERFACE EIGEN_MPL2_ONLY)
-  endif(NOT TARGET eigen-tf-1.13.1)
-
-  set(TensorFlowEigen_1_13_1_FOUND TRUE PARENT_SCOPE)
-endfunction(_Eigen_import)
-
-_Eigen_import()
diff --git a/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowEigen-1.13.1/TensorFlowEigenConfigVersion.cmake
deleted file mode 100644 (file)
index ed79ecd..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "1.13.1")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/cmake/packages/TensorFlowGpuConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowGpuConfig.cmake
new file mode 100644 (file)
index 0000000..5d20dd3
--- /dev/null
@@ -0,0 +1,51 @@
+# TensorFlowGpuConfig.cmake
+macro(return_unless VAR)
+if(NOT ${VAR})
+  message("TensorFlowGpu: ${VAR} NOT TRUE")
+  set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
+  return()
+endif(NOT ${VAR})
+endmacro(return_unless)
+
+function(_Build_TfliteGpuDelagate_)
+  nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowSource_FOUND)
+
+  nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowGEMMLowpSource_FOUND)
+
+  nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowEigenSource_FOUND)
+
+  nnas_find_package(AbseilSource REQUIRED)
+  return_unless(AbseilSource_FOUND)
+
+  nnas_find_package(Farmhash REQUIRED)
+  return_unless(Farmhash_FOUND)
+
+  nnas_find_package(Fp16Source REQUIRED)
+  return_unless(Fp16Source_FOUND)
+
+  nnas_find_package(VulkanSource QUIET)
+  return_unless(VulkanSource_FOUND)
+
+  nnas_find_package(Opengl_HeadersSource QUIET)
+  return_unless(Opengl_HeadersSource_FOUND)
+
+  nnas_find_package(Egl_HeadersSource QUIET)
+  return_unless(Egl_HeadersSource_FOUND)
+
+  if(NOT TARGET TensorFlowGpu)
+    nnas_include(ExternalProjectTools)
+    add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLiteGpu" TensorFlowLiteGpu)
+  endif()
+  set(TensorFlowSource_DIR ${TensorFlowSource_DIR} PARENT_SCOPE)
+  set(TensorFlowGpu_DIR ${TensorFlowGpu_DIR} PARENT_SCOPE)
+endfunction(_Build_TfliteGpuDelagate_)
+
+if(BUILD_TENSORFLOW_LITE_GPU)
+  _Build_TfliteGpuDelagate_()
+  set(TensorFlowGpu_FOUND TRUE PARENT_SCOPE)
+else(BUILD_TENSORFLOW_LITE_GPU)
+  set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
+endif(BUILD_TENSORFLOW_LITE_GPU)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
deleted file mode 100644 (file)
index f872b88..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
-
-#
-# Tensorflow Lite library
-#
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c" "${TENSORFLOW_LITE_BASE}/*.cc" "${TENSORFLOW_LITE_BASE}/core/*.cc")
-file(GLOB TFLITE_CORE_TESTS "${TENSORFLOW_LITE_BASE}/*test*.cc")
-list(REMOVE_ITEM TFLITE_CORE_SRCS ${TFLITE_CORE_TESTS})
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-file(GLOB_RECURSE TFLITE_KERNEL_TESTS "${TENSORFLOW_LITE_BASE}/kernels/*test*.cc")
-list(REMOVE_ITEM TFLITE_KERNEL_SRCS ${TFLITE_KERNEL_TESTS})
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-file(GLOB TFLITE_LIB_TESTS "${TENSORFLOW_LITE_BASE}/c/*test*.cc")
-list(REMOVE_ITEM TFLITE_LIB_SRCS ${TFLITE_LIB_TESTS})
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c" "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-file(GLOB TFLITE_API_TESTS "${TENSORFLOW_LITE_BASE}/core/api/*test*.cc")
-list(REMOVE_ITEM TFLITE_API_SRCS ${TFLITE_API_TESTS})
-
-file(GLOB TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/*.cc")
-file(GLOB TFLITE_PROFILING_TESTS "${TENSORFLOW_LITE_BASE}/profiling/*test*.cc")
-list(REMOVE_ITEM TFLITE_PROFILING_SRCS ${TFLITE_PROFILING_TESTS})
-
-# We will use our own summarizer
-list(REMOVE_ITEM TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/profile_summarizer.cc")
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-
-list(APPEND TFLITE_SRCS "${FarmhashSource_DIR}/src/farmhash.cc")
-
-# externals for spectrogram
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
-
-list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${AbseilSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${GEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${FarmhashSource_DIR}/src")
-
-if(NEON2SSESource_FOUND)
-  list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
-endif(NEON2SSESource_FOUND)
-
-add_library(tensorflow-lite STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_compile_definitions(tensorflow-lite PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK")
-set_property(TARGET tensorflow-lite PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite eigen-tf-1.13.1 flatbuffers::flatbuffers ${LIB_PTHREAD} dl)
-
-# Define TF_LITE_DISABLE_X86_NEON for debug build
-# If we upgrade NEON2SSE version, we can remove below line
-if(NEON2SSESource_FOUND)
-  target_compile_definitions(tensorflow-lite PRIVATE $<$<CONFIG:Debug>:TF_LITE_DISABLE_X86_NEON>)
-endif(NEON2SSESource_FOUND)
-
-if(ANDROID)
-  target_link_libraries(tensorflow-lite log)
-  target_include_directories(tensorflow-lite PUBLIC "${NDK_DIR}/..")
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
deleted file mode 100644 (file)
index b03d593..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-# NOTE This line prevents multiple definitions of tensorflow-lite target
-if(TARGET tensorflow-lite)
-  set(TensorFlowLite_FOUND TRUE)
-  return()
-endif(TARGET tensorflow-lite)
-
-if(BUILD_TENSORFLOW_LITE)
-  macro(return_unless VAR)
-    if(NOT ${VAR})
-      set(TensorFlowLite_FOUND PARENT_SCOPE)
-      return()
-    endif(NOT ${VAR})
-  endmacro(return_unless)
-
-  # Required packages
-  nnas_find_package(Abseil QUIET)
-  return_unless(Abseil_FOUND)
-  nnfw_find_package(TensorFlowEigen EXACT 1.13.1 QUIET)
-  return_unless(TensorFlowEigen_1_13_1_FOUND)
-  nnas_find_package(FarmhashSource QUIET)
-  return_unless(FarmhashSource_FOUND)
-  nnfw_find_package(FlatBuffers QUIET)
-  return_unless(FlatBuffers_FOUND)
-  nnas_find_package(GEMMLowpSource QUIET)
-  return_unless(GEMMLowpSource_FOUND)
-  nnas_find_package(TensorFlowSource EXACT 1.13.1 QUIET)
-  return_unless(TensorFlowSource_FOUND)
-  nnas_find_package(OouraFFTSource QUIET)
-  return_unless(OouraFFTSource_FOUND)
-
-  # Optional packages
-  nnas_find_package(NEON2SSESource QUIET)
-
-  nnas_include(ExternalProjectTools)
-  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite)
-
-  set(TensorFlowLite_FOUND TRUE)
-  return()
-endif(BUILD_TENSORFLOW_LITE)
-
-# Use pre-built TensorFlow Lite
-find_path(TFLITE_INCLUDE_DIR NAMES  tensorflow/lite/interpreter.h)
-find_library(TFLITE_LIB NAMES       tensorflow-lite)
-
-if(NOT TFLITE_INCLUDE_DIR)
-  # Tizen install TensorFlow Lite 1.13.1 headers in /usr/include/tensorflow1
-  find_path(TFLITE_INCLUDE_DIR NAMES tensorflow/lite/interpreter.h PATHS "/usr/include/tensorflow1")
-  if(NOT TFLITE_INCLUDE_DIR)
-    set(TensorFlowLite_FOUND FALSE)
-    return()
-  endif(NOT TFLITE_INCLUDE_DIR)
-endif(NOT TFLITE_INCLUDE_DIR)
-
-if(NOT TFLITE_LIB)
-  set(TensorFlowLite_FOUND FALSE)
-  return()
-endif(NOT TFLITE_LIB)
-
-message(STATUS "Found TensorFlow Lite: TRUE (include: ${TFLITE_INCLUDE_DIR}, lib: ${TFLITE_LIB}")
-
-# TODO Use IMPORTED target
-add_library(tensorflow-lite INTERFACE)
-target_include_directories(tensorflow-lite SYSTEM INTERFACE ${TFLITE_INCLUDE_DIR})
-target_link_libraries(tensorflow-lite INTERFACE ${TFLITE_LIB})
-find_package(Flatbuffers)
-if(Flatbuffers_FOUND)
-  target_link_libraries(tensorflow-lite INTERFACE flatbuffers::flatbuffers)
-endif(Flatbuffers_FOUND)
-
-# Prefer -pthread to -lpthread
-set(THREADS_PREFER_PTHREAD_FLAG TRUE)
-set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
-find_package(Threads QUIET)
-
-if(Threads_FOUND)
-  target_link_libraries(tensorflow-lite INTERFACE ${CMAKE_THREAD_LIBS_INIT})
-endif(Threads_FOUND)
-
-set(TensorFlowLite_FOUND TRUE)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfigVersion.cmake
deleted file mode 100644 (file)
index ed79ecd..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "1.13.1")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
index d7e1d06..cbc10d2 100644 (file)
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.8.0/tensorflow/lite/CMakeLists.txt
 #
-# Tensorflow Lite library 2.3.0
+# Tensorflow Lite library 2.8.0
 #
-set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
-
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
-     "${TENSORFLOW_LITE_BASE}/*.cc"
-     "${TENSORFLOW_LITE_BASE}/core/*.cc")
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
-     "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/platform_profiler.cc")
-
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
-
-file(GLOB TFLITE_SCHEMA_UTIL_SRCS "${TENSORFLOW_LITE_BASE}/schema/*.cc")
-
-# Moved to kerenls/internal/utils
-#file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
-
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
-#list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_SCHEMA_UTIL_SRCS})
-
-# externals
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
-
-# Build with mmap? true
-# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
-set(BUILD_WITH_MMAP TRUE)
-if(${BUILD_WITH_MMAP})
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
-else()
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
+set(TFLITE_SOURCE_DIR ${TensorFlowSource_DIR}/tensorflow/lite)
+
+# Generate TensorFlow Lite FlatBuffer code.
+# We used to have an actual compilation logic with flatc but decided to use
+# schema_generated.h since flatc doesn't work with cross compilation.
+set(TFLITE_FLATBUFFERS_SCHEMA_DIR "${TFLITE_SOURCE_DIR}/schema")
+
+macro(populate_source_vars SOURCE_DIR SOURCES_VAR)
+  cmake_parse_arguments(ARGS "RECURSE" "" "FILTER" ${ARGN})
+  if(ARGS_RECURSE)
+    set(GLOB_OP GLOB_RECURSE)
+  else()
+    set(GLOB_OP GLOB)
+  endif()
+  set(DEFAULT_FILE_FILTER ".*(_test|test_util)\\.(c|cc|h)$")
+  file(${GLOB_OP} FOUND_SOURCES "${SOURCE_DIR}/*.*")
+  list(FILTER FOUND_SOURCES INCLUDE REGEX ".*\\.(c|cc|h)$")
+  list(FILTER FOUND_SOURCES EXCLUDE REGEX "${DEFAULT_FILE_FILTER}")
+  foreach(FILE_FILTER ${ARGS_FILTER})
+    list(FILTER FOUND_SOURCES EXCLUDE REGEX "${FILE_FILTER}")
+  endforeach()
+  list(APPEND ${SOURCES_VAR} ${FOUND_SOURCES})
+endmacro()
+# Simplifies inclusion of non-test sources and headers from a directory
+# relative to TFLITE_SOURCE_DIR. See populate_source_vars() for the
+# description of arguments including and following SOURCES_VAR.
+macro(populate_tflite_source_vars RELATIVE_DIR SOURCES_VAR)
+  populate_source_vars(
+    "${TFLITE_SOURCE_DIR}/${RELATIVE_DIR}" ${SOURCES_VAR} ${ARGN}
+  )
+endmacro()
+
+# Build a list of source files to compile into the TF Lite library.
+populate_tflite_source_vars("." TFLITE_SRCS)
+
+# This particular file is excluded because the more explicit approach to enable
+# XNNPACK delegate is preferred to the weak-symbol one.
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*tflite_with_xnnpack\\.cc$")
+
+# Exclude Flex related files.
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*with_selected_ops\\.cc$")
+
+# Use MMAP
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*mmap_allocation_disabled\\.cc$")
+
+if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
+  list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*minimal_logging_android\\.cc$")
 endif()
-
-# Build with nnapi? true
-# caution: this nnapi delegate comes from tflite, not ours.
-set(BUILD_WITH_NNAPI TRUE)
-if(${BUILD_WITH_NNAPI})
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/utils.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/serialization.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
-else()
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
+if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "iOS")
+  list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*minimal_logging_ios\\.cc$")
 endif()
 
-# ios: we don't support ios
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
-
-# android
-if(NOT ANDROID)
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
+populate_tflite_source_vars("core" TFLITE_CORE_SRCS)
+populate_tflite_source_vars("core/api" TFLITE_CORE_API_SRCS)
+populate_tflite_source_vars("c" TFLITE_C_SRCS)
+populate_tflite_source_vars("delegates" TFLITE_DELEGATES_SRCS)
+
+# Enable NNAPI
+populate_tflite_source_vars("delegates/nnapi"
+TFLITE_DELEGATES_NNAPI_SRCS
+FILTER "(_test_list|_disabled)\\.(cc|h)$"
+)
+populate_tflite_source_vars(
+"nnapi" TFLITE_NNAPI_SRCS FILTER "(_disabled)\\.(cc|h)$"
+)
+
+# Disable XNNPack
+
+# Enable  experimental support for resource (need for build success)
+populate_tflite_source_vars("experimental/resource"
+TFLITE_EXPERIMENTAL_RESOURCE_SRCS
+)
+
+# Enable Ruy
+populate_tflite_source_vars("experimental/ruy"
+  TFLITE_EXPERIMENTAL_RUY_SRCS
+  FILTER
+  ".*(test(_fast|_slow|_special_specs))\\.(cc|h)$"
+  ".*(benchmark|tune_tool|example)\\.(cc|h)$"
+)
+populate_tflite_source_vars("experimental/ruy/profiler"
+  TFLITE_EXPERIMENTAL_RUY_PROFILER_SRCS
+  FILTER ".*(test|test_instrumented_library)\\.(cc|h)$"
+)
+list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DTFLITE_WITH_RUY")
+
+populate_tflite_source_vars("kernels"
+  TFLITE_KERNEL_SRCS
+  FILTER "(.*_test_util_internal|test_.*|.*_ops_wrapper)\\.(cc|h)"
+)
+populate_tflite_source_vars("kernels/internal" TFLITE_KERNEL_INTERNAL_SRCS)
+populate_tflite_source_vars("kernels/internal/optimized"
+  TFLITE_KERNEL_INTERNAL_OPT_SRCS
+)
+populate_tflite_source_vars("kernels/internal/optimized/integer_ops"
+  TFLITE_KERNEL_INTERNAL_OPT_INTEGER_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/optimized/sparse_ops"
+  TFLITE_KERNEL_INTERNAL_OPT_SPARSE_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference"
+  TFLITE_KERNEL_INTERNAL_REF_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference/integer_ops"
+  TFLITE_KERNEL_INTERNAL_REF_INTEGER_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference/sparse_ops"
+  TFLITE_KERNEL_INTERNAL_REF_SPARSE_OPS_SRCS
+)
+set(TFLITE_PROFILER_SRCS ${TFLITE_SOURCE_DIR}/profiling/platform_profiler.cc)
+if(CMAKE_SYSTEM_NAME MATCHES "Android")
+  list(APPEND TFLITE_PROFILER_SRCS
+    ${TFLITE_SOURCE_DIR}/profiling/atrace_profiler.cc
+  )
 endif()
 
-# exclude some source files
-file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
-     "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
-     "${TENSORFLOW_LITE_BASE}/*example*.cc"
-     "${TENSORFLOW_LITE_BASE}/*tool*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
-
-# exclude some kernels (requires python3-dev package)
-# TODO Enable these kernels by installing package on build system
-file(GLOB_RECURSE TFLITE_KERNEL_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/variable_ops_wrapper.cc"
-     "${TENSORFLOW_LITE_BASE}/kernels/gradient/*.cc"
-     "${TENSORFLOW_LITE_BASE}/kernels/perception/*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_KERNEL_EXCLS})
-
-# exclude kernel shim
-file(GLOB_RECURSE TFLITE_SHIM_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/shim/*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_SHIM_EXCLS})
+# Common include directories
+set(TFLITE_INCLUDE_DIRS
+  "${TENSORFLOW_SOURCE_DIR}"
+  "${TFLITE_FLATBUFFERS_SCHEMA_DIR}"
+)
 
 # include headers
-list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include")
-#list(APPEND TFLITE_INCLUDES "${Pybind11Source_DIR}/include")
+list(APPEND TFLITE_INCLUDE_DIRS "${TensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDE_DIRS "${TensorFlowGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDE_DIRS "${Fp16Source_DIR}/include")
+#list(APPEND TFLITE_INCLUDE_DIRS "${Pybind11Source_DIR}/include")
+list(APPEND TFLITE_INCLUDE_DIRS "${CpuInfoSource_DIR}")
 
 if(NEON2SSESource_FOUND)
-  list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
+  list(APPEND TFLITE_INCLUDE_DIRS "${NEON2SSESource_DIR}")
 endif(NEON2SSESource_FOUND)
 
-add_library(tensorflow-lite-2.8.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.8.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_include_directories(tensorflow-lite-2.8.0 PRIVATE ${CpuInfoSource_DIR})
-target_compile_definitions(tensorflow-lite-2.8.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
+# TFLite library
+add_library(tensorflow-lite-2.8.0 STATIC
+  ${TFLITE_CORE_API_SRCS}
+  ${TFLITE_CORE_SRCS}
+  ${TFLITE_C_SRCS}
+  ${TFLITE_DELEGATES_NNAPI_SRCS}
+  ${TFLITE_DELEGATES_SRCS}
+  ${TFLITE_EXPERIMENTAL_RESOURCE_SRCS}
+  ${TFLITE_EXPERIMENTAL_RUY_PROFILER_SRCS}
+  ${TFLITE_EXPERIMENTAL_RUY_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_OPT_INTEGER_OPS_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_OPT_SPARSE_OPS_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_OPT_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_REF_INTEGER_OPS_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_REF_SPARSE_OPS_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_REF_SRCS}
+  ${TFLITE_KERNEL_INTERNAL_SRCS}
+  ${TFLITE_KERNEL_SRCS}
+  ${TFLITE_NNAPI_SRCS}
+  ${TFLITE_SRCS}
+  ${TFLITE_PROFILER_SRCS}
+  ${TFLITE_SOURCE_DIR}/kernels/internal/utils/sparsity_format_converter.cc
+  ${TFLITE_SOURCE_DIR}/schema/schema_utils.cc
+  ${OouraFFTSource_DIR}/fftsg.c
+  ${OouraFFTSource_DIR}/fftsg2d.c
+)
+target_include_directories(tensorflow-lite-2.8.0
+  SYSTEM PUBLIC
+    ${TFLITE_INCLUDE_DIRS}
+)
+
+target_compile_definitions(tensorflow-lite-2.8.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO -DNNAPI_VERBOSE_VALIDATION")
 set_property(TARGET tensorflow-lite-2.8.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
 target_link_libraries(tensorflow-lite-2.8.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
-if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
+if(NOT ANDROID)
   target_link_libraries(tensorflow-lite-2.8.0 rt)
 endif()
 
@@ -117,5 +181,5 @@ endif(NEON2SSESource_FOUND)
 
 if(ANDROID)
   target_link_libraries(tensorflow-lite-2.8.0 log)
-  target_include_directories(tensorflow-lite-2.8.0 PUBLIC "${NDK_DIR}/..")
+  #target_include_directories(tensorflow-lite-2.8.0 PUBLIC "${NDK_DIR}/..")
 endif()
index 1c80618..60f7f54 100644 (file)
@@ -1,8 +1,14 @@
-if(BUILD_TENSORFLOW_LITE_2_8_0)
+# NOTE This line prevents multiple definitions of tensorflow-lite target
+if(TARGET tensorflow-lite-2.8.0)
+  set(TensorFlowLite_FOUND TRUE)
+  return()
+endif(TARGET tensorflow-lite-2.8.0)
+
+if(BUILD_TENSORFLOW_LITE)
   macro(return_unless VAR)
   if(NOT ${VAR})
     message("TFLite 2.8: ${VAR} NOT TRUE")
-    set(TensorFlowLite_2_8_0_FOUND FALSE PARENT_SCOPE)
+    set(TensorFlowLite_FOUND FALSE)
     return()
   endif(NOT ${VAR})
   endmacro(return_unless)
@@ -13,9 +19,9 @@ if(BUILD_TENSORFLOW_LITE_2_8_0)
   nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
   return_unless(TensorFlowSource_FOUND)
 
-  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl
-  nnas_find_package(AbseilSource QUIET)
-  return_unless(AbseilSource_FOUND)
+  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.8.0/tensorflow/workspace2.bzl
+  nnas_find_package(Abseil QUIET)
+  return_unless(Abseil_FOUND)
   nnfw_find_package(Eigen QUIET)
   return_unless(Eigen_FOUND)
   nnas_find_package(Farmhash QUIET)
@@ -45,6 +51,46 @@ if(BUILD_TENSORFLOW_LITE_2_8_0)
   nnas_include(ExternalProjectTools)
   add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.8.0)
 
-  set(TensorFlowLite_2_8_0_FOUND TRUE)
+  set(TensorFlowLite_FOUND TRUE)
   return()
 endif()
+
+# Use pre-built TensorFlow Lite
+find_path(TFLITE_INCLUDE_DIR NAMES  tensorflow/lite/c/c_api.h)
+find_library(TFLITE_LIB NAMES       tensorflow2-lite)
+
+if(NOT TFLITE_INCLUDE_DIR)
+  # Tizen install TensorFlow Lite 2.8 headers in /usr/include/tensorflow2
+  find_path(TFLITE_INCLUDE_DIR NAMES tensorflow/lite/c/c_api.h PATHS "/usr/include/tensorflow2")
+  if(NOT TFLITE_INCLUDE_DIR)
+    set(TensorFlowLite_FOUND FALSE)
+    return()
+  endif(NOT TFLITE_INCLUDE_DIR)
+endif(NOT TFLITE_INCLUDE_DIR)
+
+if(NOT TFLITE_LIB)
+  set(TensorFlowLite_FOUND FALSE)
+  return()
+endif(NOT TFLITE_LIB)
+
+message(STATUS "Found TensorFlow Lite: TRUE (include: ${TFLITE_INCLUDE_DIR}, lib: ${TFLITE_LIB}")
+
+# TODO Use IMPORTED target
+add_library(tensorflow-lite-2.8.0 INTERFACE)
+target_include_directories(tensorflow-lite-2.8.0 SYSTEM INTERFACE ${TFLITE_INCLUDE_DIR})
+target_link_libraries(tensorflow-lite-2.8.0 INTERFACE ${TFLITE_LIB})
+find_package(Flatbuffers)
+if(Flatbuffers_FOUND)
+  target_link_libraries(tensorflow-lite-2.8.0 INTERFACE flatbuffers::flatbuffers)
+endif(Flatbuffers_FOUND)
+
+# Prefer -pthread to -lpthread
+set(THREADS_PREFER_PTHREAD_FLAG TRUE)
+set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
+find_package(Threads QUIET)
+
+if(Threads_FOUND)
+  target_link_libraries(tensorflow-lite-2.8.0 INTERFACE ${CMAKE_THREAD_LIBS_INIT})
+endif(Threads_FOUND)
+
+set(TensorFlowLite_FOUND TRUE)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt
new file mode 100644 (file)
index 0000000..73264d1
--- /dev/null
@@ -0,0 +1,73 @@
+#
+# Tensorflow Lite GPU delegate library 2.8.0
+#
+
+set(LIB_TENSORFLOW_GPU_DELEGATE "TensorFlowGpu")
+
+#TENSORFLOWGPU_SOURCE_DIR
+set(TENSORFLOWSOURCE_DIR ${TensorFlowSource_DIR})
+set(TENSORFLOW_LITE_BASE ${TENSORFLOWSOURCE_DIR}/tensorflow/lite)
+set(REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE "${TENSORFLOW_LITE_BASE}/delegates/gpu")
+
+set(SRC_BASE "${REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE}")
+file(GLOB GPU_CL_SRC_LIST   "${SRC_BASE}/cl/*.cc"
+                                    "${SRC_BASE}/cl/kernels/*.cc"
+                                    "${SRC_BASE}/common/*.cc"
+                                    "${SRC_BASE}/common/selectors/*.cc"
+                                    "${SRC_BASE}/common/selectors/default/*.cc"
+                                    "${SRC_BASE}/common/task/*.cc"
+                                    "${SRC_BASE}/common/tasks/*.cc"
+                                    "${SRC_BASE}/common/tasks/special/*.cc"
+                                    "${SRC_BASE}/common/memory_management/*.cc"
+                                    "${SRC_BASE}/common/transformations/*.cc"
+                                     )
+
+file(GLOB REMOVE_TEST_SRCS          "${SRC_BASE}/cl/*_test*.cc"
+                                    "${SRC_BASE}/cl/testing/*.cc"
+                                    "${SRC_BASE}/cl/kernels/*_test*.cc"
+                                    "${SRC_BASE}/common/*_test*.cc"
+                                    "${SRC_BASE}/common/tasks/*_test*.cc"
+                                    "${SRC_BASE}/common/transformations/*_test*.cc"
+                                    )
+# Not available
+file(GLOB REMOVE_SRCS               "${SRC_BASE}/cl/*gl*.cc"
+                                    "${SRC_BASE}/cl/gpu_api_delegate.cc"
+                                    "${SRC_BASE}/cl/serialization.cc"
+                                    "${SRC_BASE}/common/lstm_parser.cc"
+                                    "${SRC_BASE}/common/model_builder.cc"
+                                    "${SRC_BASE}/common/model_builder_helper.cc"
+                                    "${SRC_BASE}/common/object_reader.cc"
+                                    "${SRC_BASE}/common/quantization_util.cc"
+                                    "${SRC_BASE}/common/memory_management/*_test.cc"
+                                    )
+
+list(APPEND GPU_CL_SRC_LIST "${TENSORFLOW_LITE_BASE}/experimental/acceleration/compatibility/android_info.cc")
+
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_TEST_SRCS})
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_SRCS})
+list(APPEND TFLITE_GPU_SRCS ${GPU_CL_SRC_LIST})
+
+add_library(${LIB_TENSORFLOW_GPU_DELEGATE} STATIC ${TFLITE_GPU_SRCS})
+
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Opencl_Headers_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Fp16Source_DIR}/include")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowGEMMLowpSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowEigenSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${VulkanSource_DIR}/include")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Opengl_HeadersSource_DIR}/api")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Egl_HeadersSource_DIR}/api")
+
+target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE abseil farmhash fp16 flatbuffers)
+
+# GL codes are not used on gpu_cl
+target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DCL_DELEGATE_NO_GL")
+target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DTFLITE_GPU_BINARY_RELEASE" "-DEGL_NO_X11")
+
+# deprecated-copy warning on header (gcc 9.4.0)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.4)
+  target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PUBLIC "-Wno-deprecated-copy")
+endif()
+
+# Applying PIC first, currently used on gpu_cl only
+set_target_properties(${LIB_TENSORFLOW_GPU_DELEGATE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/infra/nnfw/command/prepare-model b/infra/nnfw/command/prepare-model
new file mode 100644 (file)
index 0000000..35600e1
--- /dev/null
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+import "build.configuration"
+
+# This command is used to download test materials on host environment
+# by using test command on host
+
+# Common variables
+DRIVER_PATH=$NNFW_PROJECT_PATH/tests/scripts
+CACHE_PATH=${CACHE_PATH:-$WORKSPACE_PATH/out/test/cache}
+
+COMMAND_FILE=$DRIVER_PATH/command/prepare-model
+if [[ ! -f $COMMAND_FILE ]]; then
+  echo "ERROR: '$COMMAND' is not supported"
+  exit 255
+fi
+
+source $COMMAND_FILE $@
index 2b5994a..23278bb 100644 (file)
@@ -3,11 +3,10 @@
 profile = profile.tizen
 
 [profile.tizen]
-repos = repo.tizen_base,repo.tizen_mobile
-buildroot = /home/GBS-ROOT/
+repos = repo.base, repo.unified
 
-[repo.tizen_mobile]
-url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/packages/
+[repo.unified]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen-7.0/Tizen-7.0-Unified/latest/repos/standard/packages/
 
-[repo.tizen_base]
-url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
+[repo.base]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen-7.0/Tizen-7.0-Base/latest/repos/standard/packages/
diff --git a/infra/onert-micro/CMakeLists.txt b/infra/onert-micro/CMakeLists.txt
new file mode 100644 (file)
index 0000000..21533c1
--- /dev/null
@@ -0,0 +1,61 @@
+cmake_minimum_required(VERSION 3.15)
+
+project(onert-micro)
+
+enable_testing()
+
+set(CMAKE_CXX_STANDARD 14)
+
+set(CMAKE_SKIP_BUILD_RPATH FALSE)
+set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/")
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if (NOT DEFINED TARGET_ARCH)
+    set(TARGET_ARCH "armv7em")
+endif()
+
+if (NOT DEFINED TARGET_CPU)
+    set(TARGET_CPU "cortex-m7")
+endif()
+
+if (NOT DEFINED TARGET_OS)
+    set(TARGET_OS "generic")
+endif()
+
+include(utils.cmake)
+
+nnas_find_package(GTest QUIET)
+
+option(ENABLE_TEST "Build Tests using Google Test" ${GTest_FOUND})
+
+if(${ENABLE_TEST} AND NOT ${GTest_FOUND})
+    message(FATAL_ERROR "Google Test is required to enable test")
+endif(${ENABLE_TEST} AND NOT ${GTest_FOUND})
+
+option(ENABLE_COVERAGE "Build for coverage test" OFF)
+if(${ENABLE_COVERAGE} AND NOT ${ENABLE_TEST})
+    message(FATAL_ERROR "Test should be enabled to measure test coverage")
+endif(${ENABLE_COVERAGE} AND NOT ${ENABLE_TEST})
+
+if(${ENABLE_TEST})
+    include(CTest)
+endif(${ENABLE_TEST})
+
+###
+### Target
+###
+add_library(onert_micro_common INTERFACE)
+if(ENABLE_STRICT_BUILD)
+    target_compile_options(onert_micro_common INTERFACE -Werror -Wall -Wextra -Wno-reorder)
+endif(ENABLE_STRICT_BUILD)
+
+add_library(onert_micro_coverage INTERFACE)
+if(ENABLE_COVERAGE)
+    target_compile_options(onert_micro_coverage INTERFACE -g -O0 -fprofile-arcs -ftest-coverage)
+    target_link_libraries(onert_micro_coverage INTERFACE gcov)
+endif(ENABLE_COVERAGE)
+
+add_subdirectory("${NNAS_PROJECT_SOURCE_DIR}/onert-micro" "${CMAKE_BINARY_DIR}/onert-micro")
diff --git a/infra/onert-micro/cmake/ApplyCompileFlags.cmake b/infra/onert-micro/cmake/ApplyCompileFlags.cmake
new file mode 100644 (file)
index 0000000..fb99fbd
--- /dev/null
@@ -0,0 +1,35 @@
+#
+# Platform independent compile flag setting
+#
+# flags for build type: debug, release
+set(CMAKE_C_FLAGS_DEBUG     "-O0 -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O0 -g -DDEBUG")
+set(CMAKE_C_FLAGS_RELEASE   "-O3 -DNDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
+
+#
+# Platform specific compile flag setting
+#
+if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+    include("${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+endif()
+
+#
+# Apply compile flags
+# note: this should be placed after cmake/buildtool/config/config_xxx.cmake files
+#
+# add common flags
+foreach(FLAG ${FLAGS_COMMON})
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
+
+# add c flags
+foreach(FLAG ${FLAGS_CONLY})
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+endforeach()
+
+# add cxx flags
+foreach(FLAG ${FLAGS_CXXONLY})
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
diff --git a/infra/onert-micro/cmake/CfgOptionFlags.cmake b/infra/onert-micro/cmake/CfgOptionFlags.cmake
new file mode 100644 (file)
index 0000000..ffbc7b2
--- /dev/null
@@ -0,0 +1,18 @@
+# Platform specific configuration
+# note: this should be placed before default setting for option setting priority
+#       (platform specific setting have higher priority)
+#
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/options/options_${TARGET_PLATFORM}.cmake")
+
+###
+### Configuration
+###
+option(DOWNLOAD_RUY "Download ruy source" ON)
+option(DOWNLOAD_EIGEN "Download Eigen source" ON)
+option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
+option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
+option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
+
+option(DOWNLOAD_GTEST "Download Google Test source" ON)
+option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
diff --git a/infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake b/infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake
new file mode 100644 (file)
index 0000000..544be03
--- /dev/null
@@ -0,0 +1,66 @@
+set(CMAKE_SYSTEM_NAME Generic)
+
+set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_C_COMPILER "${C_COMPILER}")
+set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
+set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
+set(CMAKE_OBJCOPY "${OBJCOPY}")
+
+set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU")
+
+# Convert TARGET_CPU=Cortex-M33+nofp+nodsp into
+#   - CMAKE_SYSTEM_PROCESSOR=cortex-m33
+#   - TARGET_CPU_FEATURES=no-fp;no-dsp
+string(REPLACE "+" ";" TARGET_CPU_FEATURES ${TARGET_CPU})
+list(POP_FRONT TARGET_CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
+string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} CMAKE_SYSTEM_PROCESSOR)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".elf")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Select C/C++ version
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_CXX_STANDARD 14)
+
+# Compile options
+add_compile_options(
+        -mcpu=${TARGET_CPU}
+        -mthumb
+        "$<$<CONFIG:DEBUG>:-gdwarf-3>"
+        "$<$<COMPILE_LANGUAGE:CXX>:-funwind-tables;-frtti;-fexceptions>")
+
+# Compile definescd
+add_compile_definitions(
+        "$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>")
+
+# Link options
+add_link_options(
+        -mcpu=${TARGET_CPU}
+        -mthumb
+        --specs=nosys.specs)
+
+# Set floating point unit
+if("${TARGET_CPU}" MATCHES "\\+fp")
+    set(FLOAT hard)
+elseif("${TARGET_CPU}" MATCHES "\\+nofp")
+    set(FLOAT soft)
+elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
+        "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
+    set(FLOAT hard)
+else()
+    set(FLOAT soft)
+endif()
+
+if (FLOAT)
+    add_compile_options(-mfloat-abi=${FLOAT})
+    add_link_options(-mfloat-abi=${FLOAT})
+endif()
+
+# Compilation warnings
+add_compile_options(
+        -Wno-all
+)
diff --git a/infra/onert-micro/cmake/buildtool/config/config_linux.cmake b/infra/onert-micro/cmake/buildtool/config/config_linux.cmake
new file mode 100644 (file)
index 0000000..d7b17cf
--- /dev/null
@@ -0,0 +1,11 @@
+#
+# linux common compile options
+#
+
+# Disable annoying ABI compatibility warning.
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+  list(APPEND FLAGS_CXXONLY "-Wno-psabi")
+endif()
+
+# lib pthread as a variable (pthread must be disabled on android)
+set(LIB_PTHREAD pthread)
diff --git a/infra/onert-micro/cmake/buildtool/config/config_x86_64-linux.cmake b/infra/onert-micro/cmake/buildtool/config/config_x86_64-linux.cmake
new file mode 100644 (file)
index 0000000..528e483
--- /dev/null
@@ -0,0 +1,12 @@
+#
+# x86_64 linux compile options
+#
+message(STATUS "Building for x86-64 Linux")
+
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# SIMD for x86
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-msse4"
+    )
diff --git a/infra/onert-micro/cmake/options/options_armv7-r-generic.cmake b/infra/onert-micro/cmake/options/options_armv7-r-generic.cmake
new file mode 100644 (file)
index 0000000..d671b73
--- /dev/null
@@ -0,0 +1,3 @@
+#
+# armv7em generic cmake options
+#
diff --git a/infra/onert-micro/cmake/options/options_armv7em-generic.cmake b/infra/onert-micro/cmake/options/options_armv7em-generic.cmake
new file mode 100644 (file)
index 0000000..d671b73
--- /dev/null
@@ -0,0 +1,3 @@
+#
+# armv7em generic cmake options
+#
diff --git a/infra/onert-micro/cmake/options/options_armv8-m-generic.cmake b/infra/onert-micro/cmake/options/options_armv8-m-generic.cmake
new file mode 100644 (file)
index 0000000..cbd70de
--- /dev/null
@@ -0,0 +1,3 @@
+#
+# armv8-m generic cmake options
+#
diff --git a/infra/onert-micro/cmake/options/options_x86_64-linux.cmake b/infra/onert-micro/cmake/options/options_x86_64-linux.cmake
new file mode 100644 (file)
index 0000000..0fb72f1
--- /dev/null
@@ -0,0 +1,3 @@
+#
+# x86_64 linux cmake options
+#
diff --git a/infra/onert-micro/utils.cmake b/infra/onert-micro/utils.cmake
new file mode 100644 (file)
index 0000000..4c78e2c
--- /dev/null
@@ -0,0 +1,53 @@
+set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." CACHE
+        INTERNAL "Where to find nnas top-level source directory"
+        )
+
+set(NNAS_EXTERNALS_DIR
+        "${NNAS_PROJECT_SOURCE_DIR}/externals" CACHE
+        INTERNAL "Where to download external dependencies"
+        )
+set(ONERT_MICRO_OVERLAY_DIR "${CMAKE_BINARY_DIR}/overlay" CACHE
+        INTERNAL "Where locally built external dependencies are installed")
+
+# Share package build script with runtime
+set(EXT_OVERLAY_DIR ${ONERT_MICRO_OVERLAY_DIR})
+
+# This allows find_package to access configurations installed inside overlay
+list(APPEND CMAKE_PREFIX_PATH "${EXT_OVERLAY_DIR}")
+
+macro(nnas_include PREFIX)
+    include("${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/modules/${PREFIX}.cmake")
+endmacro(nnas_include)
+
+macro(nnas_find_package PREFIX)
+    find_package(${PREFIX}
+            CONFIG NO_DEFAULT_PATH
+            PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
+            ${ARGN})
+endmacro(nnas_find_package)
+
+macro(nnas_find_package_folder PREFIX FIND_FOLDER)
+    find_package(${PREFIX}
+            CONFIG NO_DEFAULT_PATH
+            PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages ${FIND_FOLDER}
+            ${ARGN})
+endmacro(nnas_find_package_folder)
+
+###
+### CMake configuration
+###
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Type of build" FORCE)
+endif(NOT CMAKE_BUILD_TYPE)
+message(STATUS "Use '${CMAKE_BUILD_TYPE}' configuration")
+
+# identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
+# note: this should be placed before flags and options setting
+nnas_include(IdentifyPlatform)
+
+# Configuration flags
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/CfgOptionFlags.cmake")
+
+# apply compilation flags
+# NOTE this should be after all option
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/ApplyCompileFlags.cmake")
index 5d6bdd9..0c1370f 100644 (file)
@@ -8,7 +8,7 @@ if [[ -z "${NNAS_PROJECT_PATH}" ]]; then
 fi
 
 # The default preset
-PRESET="20220323"
+PRESET="20221125"
 
 # Test is enabled by default
 DISABLE_TEST=false
index 0eac106..69251d0 100644 (file)
@@ -35,6 +35,8 @@ function preset_configure()
   REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
   REQUIRED_UNITS+=("one-cmds")
   REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
 
   # Dependent modules needed for build
   REQUIRED_UNITS+=("circlechef")
index 14917b3..c5a3f0e 100644 (file)
@@ -30,6 +30,8 @@ function preset_configure()
   REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
   REQUIRED_UNITS+=("one-cmds")
   REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
 
   # Dependent modules needed for build
   REQUIRED_UNITS+=("circlechef")
diff --git a/infra/packaging/preset/20221125 b/infra/packaging/preset/20221125
new file mode 100644 (file)
index 0000000..d798087
--- /dev/null
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20221125"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Compute
+  REQUIRED_UNITS+=("luci-compute")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Python interface for circle schema
+  REQUIRED_UNITS+=("pics")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+  REQUIRED_UNITS+=("circle-opselector")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20221125_windows b/infra/packaging/preset/20221125_windows
new file mode 100644 (file)
index 0000000..75c6426
--- /dev/null
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+PRESET="20221125"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Compute
+  REQUIRED_UNITS+=("luci-compute")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Python interface for circle schema
+  REQUIRED_UNITS+=("pics")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+  REQUIRED_UNITS+=("dalgona")
+  REQUIRED_UNITS+=("visq")
+
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20221125 b/infra/packaging/res/tf2nnpkg.20221125
new file mode 100644 (file)
index 0000000..a7446e6
--- /dev/null
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg" -o "${OUTPUT_DIR}" -m "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/build_android_runtime_release.sh b/infra/scripts/build_android_runtime_release.sh
deleted file mode 100755 (executable)
index 99858eb..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare ndk
-if [ ! -n "$NDK_DIR" ]; then
-  export NDK_DIR=$ROOT_PATH/tools/cross/ndk/r20/ndk
-  echo "It will use default external path"
-fi
-
-export TARGET_OS=android
-export CROSS_BUILD=1
-export BUILD_TYPE=release
-make -f Makefile.template install
index 4a1385d..0beaf67 100755 (executable)
@@ -50,10 +50,10 @@ function TFLiteModelVerification()
 
   export BACKENDS=$1
   if [[ "$2" == "" ]]; then
-    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+    $INSTALL_PATH/test/onert-test verify-tflite \
       --reportdir=$ROOT_PATH/$3
   else
-    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+    $INSTALL_PATH/test/onert-test verify-tflite \
       --list=$2 \
       --reportdir=$ROOT_PATH/$3
   fi
@@ -74,7 +74,7 @@ function NNAPIGTest()
 
   # Backup original nnapi_gtest.skip
   # TODO Pass skiplist to test-driver.sh
-  SKIPLIST_FILE="${INSTALL_PATH}/unittest/nnapi_gtest.skip"
+  SKIPLIST_FILE="${INSTALL_PATH}/nnapi-gtest/nnapi_gtest.skip"
   BACKUP_FILE="${SKIPLIST_FILE}.backup"
   if [[ "$2" != "" ]]; then
     cp ${SKIPLIST_FILE} ${BACKUP_FILE}
@@ -84,7 +84,7 @@ function NNAPIGTest()
   export BACKENDS=$1
   $INSTALL_PATH/test/onert-test unittest \
     --reportdir=$ROOT_PATH/$3 \
-    --unittestdir=$INSTALL_PATH/unittest
+    --unittestdir=$INSTALL_PATH/nnapi-gtest
   unset BACKENDS
 
   # TODO Pass skiplist to test-driver.sh
@@ -129,27 +129,3 @@ function NNPackageTest()
 
   popd > /dev/null
 }
-
-# $1: (required) backend
-# $2: (required) test list file relative path from nnfw root directory
-#                pass empty string if there is no skiplist
-# $3: (required) relative path to report from nnfw root directory
-function NNAPIFrontendTest()
-{
-  [[ $# -ne 3 ]] && echo "NNAPIFrontendTest: Invalid function argument setting" && exit 1
-
-  pushd ${ROOT_PATH} > /dev/null
-
-  export BACKENDS=$1
-  if [[ "$2" == "" ]]; then
-    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
-      --reportdir=$ROOT_PATH/$3
-  else
-    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
-      --list=$2 \
-      --reportdir=$ROOT_PATH/$3
-  fi
-  unset BACKENDS
-
-  popd > /dev/null
-}
index 51cba92..55e48f4 100644 (file)
@@ -12,6 +12,7 @@ DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
 DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
 DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
 DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite280;dio-hdf5"
+DEBUG_BUILD_ITEMS+=";luci-compute"
 DEBUG_BUILD_ITEMS+=";tflite2circle"
 DEBUG_BUILD_ITEMS+=";luci"
 DEBUG_BUILD_ITEMS+=";luci-interpreter"
@@ -20,14 +21,17 @@ DEBUG_BUILD_ITEMS+=";circle2circle;record-minmax;circle-quantizer"
 DEBUG_BUILD_ITEMS+=";circle-eval-diff"
 DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver;circle-operator"
 DEBUG_BUILD_ITEMS+=";circle-verify"
-DEBUG_BUILD_ITEMS+=";circle-tensordump"
+DEBUG_BUILD_ITEMS+=";circle-tensordump;circle-opselector"
 DEBUG_BUILD_ITEMS+=";tflchef;circlechef"
 DEBUG_BUILD_ITEMS+=";common-artifacts"
 DEBUG_BUILD_ITEMS+=";circle2circle-dredd-recipe-test"
 DEBUG_BUILD_ITEMS+=";record-minmax-conversion-test"
 DEBUG_BUILD_ITEMS+=";tf2tfliteV2;tf2tfliteV2-conversion-test"
 DEBUG_BUILD_ITEMS+=";tflite2circle-conversion-test"
-DEBUG_BUILD_ITEMS+=";pota-quantization-value-test"
+DEBUG_BUILD_ITEMS+=";pota-quantization-value-test;pics"
 DEBUG_BUILD_ITEMS+=";circle-part-value-test"
 DEBUG_BUILD_ITEMS+=";circle-quantizer-dredd-recipe-test"
 DEBUG_BUILD_ITEMS+=";circle-operator-test"
+DEBUG_BUILD_ITEMS+=";circle-interpreter;circle-interpreter-test"
+DEBUG_BUILD_ITEMS+=";dalgona;dalgona-test"
+DEBUG_BUILD_ITEMS+=";visq"
diff --git a/infra/scripts/docker_build_cross_aarch64_runtime.sh b/infra/scripts/docker_build_cross_aarch64_runtime.sh
deleted file mode 100755 (executable)
index f73894f..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=aarch64"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_cross_arm_runtime.sh b/infra/scripts/docker_build_cross_arm_runtime.sh
deleted file mode 100755 (executable)
index 17d75de..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_cross_arm_runtime_release.sh b/infra/scripts/docker_build_cross_arm_runtime_release.sh
deleted file mode 100755 (executable)
index 377bc3e..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e BUILD_TYPE=release"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_cross_coverage.sh b/infra/scripts/docker_build_cross_coverage.sh
deleted file mode 100755 (executable)
index 454bf27..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-NNAS_WORKSPACE=${NNAS_WORKSPACE:-build}
-if [[ -z "${ARCHIVE_PATH}" ]]; then
-  ARCHIVE_PATH=${NNAS_WORKSPACE}/archive
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e COVERAGE_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
-     make all install build_coverage_suite"
-./nnfw docker-run bash -c "$CMD"
-
-mkdir -p ${ARCHIVE_PATH}
-# TODO change workspace usage in makefile
-mv Product/out/coverage-suite.tar.gz ${ARCHIVE_PATH}/
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh
deleted file mode 100755 (executable)
index b3428e0..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-export BUILD_OPTIONS
-
-CMD="export OPTIONS='$BUILD_OPTIONS' && \
-     export BUILD_TYPE=Release && \
-     cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-# Model download server setting
-if [[ -z $MODELFILE_SERVER ]]; then
-  echo "Need model file server setting"
-  exit 1
-fi
-
-export DOCKER_ENV_VARS=" -e MODELFILE_SERVER=$MODELFILE_SERVER"
-./nnfw docker-run-user ./infra/scripts/test_ubuntu_runtime.sh --backend cpu
-./nnfw docker-run-user ./infra/scripts/test_ubuntu_runtime.sh --interp
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_tizen_cross.sh b/infra/scripts/docker_build_tizen_cross.sh
deleted file mode 100755 (executable)
index 42e79a7..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
-  echo "It will use default rootfs path"
-else
-  DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
-  DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
-  DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
-  DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
-  echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e TARGET_OS=tizen"
-DOCKER_ENV_VARS+=" -e BUILD_TYPE=release"
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
-  echo "It will not use mirror server"
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="export OPTIONS+=' -DGENERATE_RUNTIME_NNAPI_TESTS=ON' && \
-     cp -nv Makefile.template Makefile && \
-     make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
diff --git a/infra/scripts/docker_build_tizen_gbs.sh b/infra/scripts/docker_build_tizen_gbs.sh
deleted file mode 100755 (executable)
index 2d508f4..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-GBS_RPM_DIR=$ROOT_PATH/Product/out/rpm
-mkdir -p $GBS_RPM_DIR
-DOCKER_VOLUMES=" -v $GBS_RPM_DIR:/opt/rpm"
-
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name for tizen gbs build"
-  DOCKER_IMAGE_NAME="nnfw_docker_tizen"
-fi
-
-DOCKER_ENV_VARS=" --privileged"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-CMD="gbs -c $ROOT_PATH/infra/nnfw/config/gbs.conf build \
-         -A armv7l --profile=profile.tizen --clean --include-all --define '$GBS_DEFINE' && \
-     cp -rf /home/GBS-ROOT/local/repos/tizen/armv7l/RPMS/*.rpm /opt/rpm/"
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
index afdd3b9..9fb7def 100755 (executable)
@@ -68,6 +68,8 @@ REQUIRED_UNITS+=("oops" "safemain" "foder" "crew" "arser" "vconone")
 REQUIRED_UNITS+=("hermes" "hermes-std")
 # loco IR and related utilities
 REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+# Compute
+REQUIRED_UNITS+=("luci-compute")
 # Circle compiler library (.circle -> .circle)
 REQUIRED_UNITS+=("luci")
 # Flatbuffer I/O
diff --git a/infra/scripts/docker_coverage_report.sh b/infra/scripts/docker_coverage_report.sh
deleted file mode 100755 (executable)
index 2c3ee30..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-
-# coverage test data: ${ARCHIVE_PATH}/coverage-data.tar.gz
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
-  echo "It will use default docker image name"
-fi
-
-NNAS_WORKSPACE=${NNAS_WORKSPACE:-build}
-if [[ -z "${ARCHIVE_PATH}" ]]; then
-  ARCHIVE_PATH=${NNAS_WORKSPACE}/archive
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-tar -zxf ${ARCHIVE_PATH}/coverage-data.tar.gz
-
-CMD="GCOV_PATH=arm-linux-gnueabihf-gcov NNAS_WORKSPACE=Product ./nnas gen-coverage-report runtime compute &&
-     tar -zcf coverage/coverage_report.tar.gz coverage/html &&
-     python runtime/3rdparty/lcov-to-cobertura-xml/lcov_cobertura.py coverage/coverage.info -o coverage/nnfw_coverage.xml"
-
-./nnfw docker-run-user bash -c "$CMD"
-
-popd > /dev/null
index d00eb73..74fae6b 100755 (executable)
@@ -10,7 +10,4 @@ do
   NNPackageTest ${BACKEND} "Product/out/test/list/nnpkg_test_list.armv7l-linux.${BACKEND}"
 done
 
-# Interpreter test
-export DISABLE_COMPILE=1
-NNPackageTest "interp" "Product/out/test/list/nnpkg_test_list.noarch.interp"
 unset DISABLE_COMPILE
index 6cb4bb7..97043ce 100755 (executable)
@@ -35,9 +35,6 @@ export TRACE_FILEPATH=trace.json
 TFLiteModelVerification "acl_cl" "Product/out/test/list/tflite_comparator.armv7l.acl_cl.list" "report/acl_cl/trace"
 unset TRACE_FILEPATH
 
-# Interpreter
-./infra/scripts/test_ubuntu_runtime.sh --interp
-
 # nnpackage test suite
 if [[ -e ${ARCHIVE_PATH}/nnpkg-test-suite.tar.gz ]]; then
   tar -zxf ${ARCHIVE_PATH}/nnpkg-test-suite.tar.gz -C ./
diff --git a/infra/scripts/test_ubuntu_npud.sh b/infra/scripts/test_ubuntu_npud.sh
new file mode 100755 (executable)
index 0000000..3b33042
--- /dev/null
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+set -eo pipefail
+
+CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_PATH="$(cd ${CURRENT_PATH}/../../ && pwd)"
+
+# Install path on CI
+INSTALL_PATH="$ROOT_PATH/Product/out"
+MODEL_PATH="${INSTALL_PATH}/npud-gtest/models"
+
+# Install dbus configuration file
+DBUS_CONF="${INSTALL_PATH}/share/org.tizen.npud.conf"
+mkdir -p /usr/share/dbus-1/system.d/
+cp ${DBUS_CONF} /usr/share/dbus-1/system.d/
+
+service dbus restart
+
+function TestPrepared()
+{
+  if [[ -z "${MODELFILE}" ]]; then
+    echo "Model file is not set. Try to use default setting."
+    exit 1
+  fi
+
+  mkdir -p ${MODEL_PATH}
+  if [[ "${MODELFILE: -7}" == ".tar.gz" ]]; then
+    curl -o model.tar.gz -kLsSO ${MODELFILE}
+    tar -zxf model.tar.gz -C ${MODEL_PATH}
+  else
+    echo "The file format is not supported."
+    echo "Supported format: tar.gz"
+    exit 1
+  fi
+}
+
+function TestCleanUp()
+{
+  rm -rf ${MODEL_PATH}
+}
+
+function NpudTest()
+{
+  pushd ${ROOT_PATH} > /dev/null
+
+  $INSTALL_PATH/npud-gtest/npud_gtest
+  EXITCODE=$?
+  if [ ${EXITCODE} -ne 0 ]; then
+    exit ${EXITCODE}
+  fi
+
+  popd > /dev/null
+}
+
+TestPrepared
+
+DEVICE_MODULE_PATH=${INSTALL_PATH}/lib GTEST_MODEL_PATH=${MODEL_PATH} NpudTest
+
+TestCleanUp
index 17bdf6e..9a98e5b 100755 (executable)
@@ -7,10 +7,8 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 BACKEND="cpu"
 TEST_OS="linux"
 TEST_PLATFORM="$TEST_ARCH-$TEST_OS"
-TFLITE_LOADER="1"
 LINEAR_ONLY="0"
 RUN_INTERP="0"
-NNAPI_FRONTEND="0"
 
 function Usage()
 {
@@ -18,7 +16,6 @@ function Usage()
   echo ""
   echo "Options:"
   echo "      --backend <BACKEND>     Runtime backend to test (default: ${BACKEND})"
-  echo "      --nnapi-frontend        NNAPI Frontend test"
   echo "      --linear-only           Use Linear executor only"
 }
 
@@ -38,24 +35,10 @@ do
       BACKEND=$(echo ${1#*=} | tr '[:upper:]' '[:lower:]')
       shift
       ;;
-    --tflite-loader)
-      TFLITE_LOADER="1"
-      NNAPI_FRONTEND="1" # For CI test
-      echo "[INFO] \"--tflite-loader\" argument is deprecated"
-      shift
-      ;;
-    --nnapi-frontend)
-      NNAPI_FRONTEND="1"
-      shift
-      ;;
     --linear-only)
       LINEAR_ONLY="1"
       shift
       ;;
-    --interp)
-      RUN_INTERP="1"
-      shift;
-      ;;
     *)
       # Ignore
       shift
@@ -65,16 +48,9 @@ done
 
 CheckTestPrepared
 
-if [ $RUN_INTERP = "1" ]; then
-  TEST_PLATFORM="noarch"
-  TEST_ARCH="noarch"
-  BACKEND="interp"
-  echo "[[ Interpreter test ]]"
-else
-  echo "[[ ${TEST_PLATFORM}: ${BACKEND} backend test ]]"
-fi
+echo "[[ ${TEST_PLATFORM}: ${BACKEND} backend test ]]"
 
-UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
+UNITTEST_SKIPLIST="Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
 TFLITE_TESTLIST="Product/out/test/list/tflite_comparator.${TEST_ARCH}.${BACKEND}.list"
 REPORT_BASE="report/${BACKEND}"
 EXECUTORS=("Linear" "Dataflow" "Parallel")
@@ -82,34 +58,16 @@ EXECUTORS=("Linear" "Dataflow" "Parallel")
 if [ $LINEAR_ONLY = "1" ]; then
   EXECUTORS=("Linear")
 fi
-if [ $RUN_INTERP = "1" ]; then
-  EXECUTORS=("Interpreter")
-fi
 
 for EXECUTOR in "${EXECUTORS[@]}";
 do
   echo "[EXECUTOR]: ${EXECUTOR}"
   REPORT_PATH="${REPORT_BASE}/${EXECUTOR}"
 
-  if [ $EXECUTOR = "Interpreter" ]; then
-    export DISABLE_COMPILE=1
-    BACKEND=""
-  else
-    export EXECUTOR="${EXECUTOR}"
-  fi
+  export EXECUTOR="${EXECUTOR}"
 
   NNAPIGTest "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
   TFLiteModelVerification "${BACKEND}" "${TFLITE_TESTLIST}" "${REPORT_PATH}"
 
-  if [ $EXECUTOR = "Interpreter" ]; then
-    unset DISABLE_COMPILE
-  else
-    unset EXECUTOR
-  fi
+  unset EXECUTOR
 done
-
-# TODO Support more backends
-NNAPI_FRONTEND_TESTLIST="Product/out/test/list/nnapi_test.${TEST_ARCH}.list"
-if [[ $NNAPI_FRONTEND = "1" ]]; then
-  NNAPIFrontendTest "${BACKEND}" "${NNAPI_FRONTEND_TESTLIST}" "${REPORT_BASE}/nnapi/${EXECUTOR}"
-fi
index 2510d9c..a6fd2a4 100755 (executable)
@@ -17,7 +17,7 @@ pushd ${ROOT_PATH} > /dev/null
 echo ""
 echo "==== Run standalone unittest begin ===="
 echo ""
-Product/out/test/onert-test unittest --unittestdir=Product/out/unittest_standalone
+Product/out/test/onert-test unittest --unittestdir=Product/out/unittest
 echo ""
 echo "==== Run standalone unittest end ===="
 echo ""
@@ -33,7 +33,7 @@ BACKENDS=(acl_cl acl_neon cpu)
 
 # Get the intersect of framework test list files
 TESTLIST_PREFIX="Product/out/test/list/tflite_comparator.${TEST_ARCH}"
-SKIPLIST_PREFIX="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
+SKIPLIST_PREFIX="Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
 sort $TESTLIST_PREFIX.${BACKENDS[0]}.list > $TESTLIST_PREFIX.intersect.list
 sort $SKIPLIST_PREFIX.${BACKENDS[0]} > $SKIPLIST_PREFIX.union
 for BACKEND in "${BACKENDS[@]:1}"; do
@@ -59,5 +59,5 @@ export OP_BACKEND_Pool2D="acl_cl"
 export OP_BACKEND_FullyConnected="acl_neon"
 export ACL_LAYOUT="NCHW"
 export RUY_THREADS=4
-NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
+NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
 TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.list" "report/mixed"
index 37576ac..5610756 100755 (executable)
@@ -25,15 +25,18 @@ function install_model()
 {
     # download tflite model files
     pushd $HOST_HOME
-    tests/scripts/models/run_test.sh --download=on --run=off
+    TEMP_PATH=$(mktemp -d)
+    CACHE_PATH=$TEMP_PATH/cache
+    mkdir -p $CACHE_PATH
+    ./nnfw prepare-model --cachedir=$CACHE_PATH
     # TODO Since this command removes model file(.zip),
     # We must always download the file unlike model file(.tflite).
     # Because caching applies only to tflite file.
-    find tests -name "*.zip" -exec rm {} \;
-    tar -zcf cache.tar.gz -C tests/scripts/models cache
-    $SDB_CMD push cache.tar.gz $TEST_ROOT/.
-    rm -rf cache.tar.gz
-    $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test/models
+    find $CACHE_PATH -name "*.zip" -exec rm {} \;
+    tar -zcf $TEMP_PATH/cache.tar.gz -C $TEMP_PATH cache
+    $SDB_CMD push $TEMP_PATH/cache.tar.gz $TEST_ROOT/
+    rm -rf $TEMP_PATH
+    $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test
     popd
 }
 
@@ -153,7 +156,6 @@ if [ -z "${GCOV_DIR}" ]; then
   ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
   ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
   ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
-  ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --interp"
 else
   mkdir -p ${GCOV_DIR}
   rm -rf ${GCOV_DIR}/*
@@ -169,7 +171,6 @@ else
   ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
   ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
   ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
-  ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --interp"
 
   # More test to check coverage
   ${SDB_CMD} shell "rm -rf ${GCOV_DATA_PATH} && mkdir -p ${GCOV_DATA_PATH}"
diff --git a/onert-micro/CMakeLists.txt b/onert-micro/CMakeLists.txt
new file mode 100644 (file)
index 0000000..d5ea95d
--- /dev/null
@@ -0,0 +1,217 @@
+set(ARM_C_COMPILER "arm-none-eabi-gcc")
+set(ARM_ASM_COMPILER "arm-none-eabi-gcc")
+set(ARM_CXX_COMPILER "arm-none-eabi-g++")
+set(ARM_OBJCOPY "arm-none-eabi-objcopy")
+
+find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
+
+if (NOT ARM_C_COMPILER_PATH)
+    message(STATUS "Build luci-micro: FALSE(ARM compiler is NOT FOUND)")
+    return()
+endif ()
+
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if (NOT FlatBuffers_FOUND)
+    message(STATUS "Build luci-micro: FALSE(FlatBuffers 2.0 NOT FOUND)")
+    return()
+endif (NOT FlatBuffers_FOUND)
+
+message(STATUS "Build luci-micro: TRUE")
+
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.4/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+        COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+        WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        DEPENDS "${SCHEMA_FILE}"
+        )
+
+FlatBuffers_Target(luci_micro_circle_schema
+        OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/circle-generated/circle"
+        INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+        SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+        SCHEMA_FILES "schema.fbs"
+        )
+
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+add_subdirectory(luci-interpreter/src/core/reader)
+
+# Choosing Kernel: reference mcu, optimized cmsisnn, optimized linux
+if (NOT KERNELS)
+    message(STATUS "KERNEL variable is not defined, default reference mcu kernels will be used")
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/mcu")
+elseif("${KERNELS}" STREQUAL "mcu")
+    message(STATUS "ONERT_MICRO will use reference mcu kernels")
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/mcu")
+elseif("${KERNELS}" STREQUAL "cmsisnn")
+    message(STATUS "ONERT_MICRO will use optimized cmsisnn kernels")
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/cmsisnn")
+elseif("${KERNELS}" STREQUAL "linux")
+    message(STATUS "ONERT_MICRO will use optimized linux kernels")
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/linux")
+else()
+    message(STATUS "Build onert-micro: FAILED (Non-existent kernel variable. Choose one of the following options: mcu, cmsisnn, linux)")
+    return()
+endif()
+
+if (USE_STATIC_ALLOC)
+    # TODO: enable it
+    message(STATUS "FAILED ONERT-MICRO is not support Static Memory Manager now")
+    return()
+else()
+    message(STATUS "USE_STATIC_ALLOC variable is not defined, default dynamic memory manager will be used")
+endif()
+
+set(CMAKE_ARM_OPTIONS
+        -DLUCI_INTERPRETER_STATIC=ON
+        -DLUCI_STATIC=ON
+        -DBUILD_CMSIS_NN_FUNCTIONS=ON
+        -DTARGET_CPU=${TARGET_CPU}
+        -DTARGET_ARCH=${TARGET_ARCH}
+        "-DEXT_OVERLAY_DIR=${CMAKE_CURRENT_BINARY_DIR}/../../overlay"
+        "-DFlatbuffers_DIR=${CMAKE_CURRENT_BINARY_DIR}/../../overlay/lib/cmake/flatbuffers"
+        "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake"
+        "-DLUCI_INTERPRETER_PAL_DIR=${LUCI_INTERPRETER_PAL_DIR}"
+        "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
+        "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
+        -DC_COMPILER=${ARM_C_COMPILER}
+        -DCXX_COMPILER=${ARM_CXX_COMPILER}
+        -DASM_COMPILER=${ARM_ASM_COMPILER}
+        -DOBJCOPY=${ARM_OBJCOPY}
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+        -DENABLE_TEST=OFF
+        -DBUILD_GTEST=OFF
+        "-DNNAS_ROOT=${NNAS_PROJECT_SOURCE_DIR}"
+        -DENABLE_STRICT_BUILD=OFF
+        "-DGENERATED_INCLUDE_DIR=${CMAKE_CURRENT_BINARY_DIR}/gen"
+        )
+
+if (GENERATE_KERNELS_LIST_FROM)
+    set(GENERATED_KERNELS_LIST_PATH "${LUCI_INTERPRETER_PAL_DIR}/GeneratedKernelsToBuild.lst")
+    list(APPEND CMAKE_ARM_OPTIONS "-DLUCI_INTERPRETER_KERNELS_BUILD_LIST=${GENERATED_KERNELS_LIST_PATH}")
+endif ()
+
+if (DIS_QUANT)
+    message(STATUS "ONERT-MICRO will not use part for QUANTIZED models")
+    add_definitions(-DDIS_QUANT)
+    list(APPEND CMAKE_ARM_OPTIONS "-DDIS_QUANT=ON")
+endif()
+
+if (DIS_FLOAT)
+    message(STATUS "ONERT-MICRO will not use part for FLOAT models")
+    add_definitions(-DDIS_FLOAT)
+    list(APPEND CMAKE_ARM_OPTIONS "-DDIS_FLOAT=ON")
+endif()
+
+set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm")
+file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}")
+
+set(MICRO_ARM_BUILD_DEPENDENCY "${MICRO_ARM_BUILD_DIR}/CMakeCache.txt")
+
+add_custom_command(
+        OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
+        COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
+        WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+        DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
+        VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm_cmake DEPENDS "${MICRO_ARM_BUILD_DEPENDENCY}")
+
+# Generate KernelsToBuild list from circle model
+if (GENERATE_KERNELS_LIST_FROM)
+    add_executable(generator_kernels_list_exec helpers/GenerateKernelsListHelper.cpp)
+
+    target_link_libraries(generator_kernels_list_exec luci_micro_circle_reader)
+    target_link_libraries(generator_kernels_list_exec luci_micro_circle_schema)
+
+    target_include_directories(generator_kernels_list_exec PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+
+    add_custom_target(
+            generate_kernels_list ALL
+            COMMAND generator_kernels_list_exec ${GENERATE_KERNELS_LIST_FROM} ${GENERATED_KERNELS_LIST_PATH}
+            COMMENT "Generating KernelsToBuild list"
+    )
+    add_dependencies(generate_kernels_list luci_micro_circle_reader)
+    add_dependencies(luci_interpreter_micro_arm_cmake generate_kernels_list)
+
+endif ()
+
+# To remove GENERATE_KERNELS_LIST_FROM and KERNELS variable from cmake cache
+unset(GENERATE_KERNELS_LIST_FROM CACHE)
+unset(KERNELS CACHE)
+unset(USE_STATIC_KERNEL CACHE)
+unset(DIS_QUANT CACHE)
+unset(DIS_FLOAT CACHE)
+
+set(MICRO_ARM_BINARY "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/libluci_interpreter_micro.a")
+
+add_custom_command(
+        OUTPUT "${MICRO_ARM_BINARY}"
+        COMMAND "${CMAKE_MAKE_PROGRAM}" luci_interpreter_micro -j ${CPU_COUNT}
+        WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+        DEPENDS luci_interpreter_micro_arm_cmake luci_micro_circle_schema
+        VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm DEPENDS "${MICRO_ARM_BINARY}")
+
+add_subdirectory(eval-driver)
+
+if (NOT DEFINED BUILD_TEST)
+    return()
+endif ()
+
+#MBED OS QEMU build
+nnas_find_package(MbedOSSource EXACT 6.15 QUIET)
+
+if (NOT MbedOSSource_FOUND)
+    message(STATUS "Skipping luci-micro: MbedOSSource not found")
+    return()
+endif ()
+
+set(MBED_OS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/tests/mbed-os")
+file(MAKE_DIRECTORY "${MBED_OS_BUILD_DIR}")
+
+set(MBED_OS_BUILD_DEPENDENCY "${MBED_OS_BUILD_DIR}/CMakeCache.txt")
+
+set(ONERTMICRO_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/onert-micro")
+
+add_custom_command(
+        OUTPUT "${MBED_OS_BUILD_DEPENDENCY}"
+        COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os"
+        -DMICRO_ARM_BUILD_DIR=${MICRO_ARM_BUILD_DIR}
+        -DMbedOSSource_DIR=${MbedOSSource_DIR}
+        -DFlatBuffersSource_DIR=${FlatBuffersSource_DIR}
+        -DONERTMICRO_SRC_DIR=${ONERTMICRO_SRC_DIR}
+        WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+        DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os/CMakeLists.txt"
+        VERBATIM
+)
+
+add_custom_target(mbed_os_cmake DEPENDS "${MBED_OS_BUILD_DEPENDENCY}")
+
+set(MBED_OS_BINARY "${MBED_OS_BUILD_DIR}/libmbed_os.a")
+
+add_custom_command(
+        OUTPUT "${MBED_OS_BINARY}"
+        COMMAND "${CMAKE_MAKE_PROGRAM}" mbed_os -j ${CPU_COUNT}
+        WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+        DEPENDS mbed_os_cmake
+        VERBATIM
+)
+
+add_custom_target(mbed_os_arm DEPENDS "${MBED_OS_BINARY}")
+
+set(BUILD_TEST_BINARY "${MBED_OS_BUILD_DIR}/build_test.bin")
+
+add_custom_command(
+        OUTPUT "${BUILD_TEST_BINARY}"
+        COMMAND "${CMAKE_MAKE_PROGRAM}" build_test -j ${CPU_COUNT}
+        WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+        DEPENDS mbed_os_arm "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os/main.cpp" ${MICRO_ARM_BINARY}
+        VERBATIM
+)
+add_custom_target(onert_micro_build_test_arm DEPENDS "${BUILD_TEST_BINARY}")
diff --git a/onert-micro/eval-driver/CMakeLists.txt b/onert-micro/eval-driver/CMakeLists.txt
new file mode 100644 (file)
index 0000000..2a1b73a
--- /dev/null
@@ -0,0 +1,13 @@
+set(SRCS_EVAL_TESTER Driver.cpp)
+
+add_executable(onert_micro_eval_driver ${SRCS_EVAL_TESTER})
+
+# This variable is needed to separate standalone interpreter libraries from the libraries used in driver
+set(READER_SUFFIX "_driver")
+
+add_subdirectory(${NNAS_PROJECT_SOURCE_DIR}/onert-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
+
+target_include_directories(onert_micro_eval_driver PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+target_link_libraries(onert_micro_eval_driver PUBLIC luci_interpreter_micro)
+
+install(TARGETS onert_micro_eval_driver DESTINATION bin)
diff --git a/onert-micro/eval-driver/Driver.cpp b/onert-micro/eval-driver/Driver.cpp
new file mode 100644 (file)
index 0000000..7ec4219
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <stdexcept>
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <iostream>
+
+namespace
+{
+
+using DataBuffer = std::vector<char>;
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.read(data, data_size).fail())
+    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+} // namespace
+
+/*
+ * @brief EvalDriver main
+ *
+ *        Driver for testing luci-inerpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+  if (argc != 5)
+  {
+    std::cerr
+      << "Usage: " << argv[0]
+      << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+    return EXIT_FAILURE;
+  }
+
+  const char *filename = argv[1];
+  const int32_t num_inputs = atoi(argv[2]);
+  const char *input_prefix = argv[3];
+  const char *output_file = argv[4];
+
+  std::ifstream file(filename, std::ios::binary | std::ios::in);
+  if (!file.good())
+  {
+    std::string errmsg = "Failed to open file";
+    throw std::runtime_error(errmsg.c_str());
+  }
+
+  file.seekg(0, std::ios::end);
+  auto fileSize = file.tellg();
+  file.seekg(0, std::ios::beg);
+
+  // reserve capacity
+  DataBuffer model_data(fileSize);
+
+  // read the data
+  file.read(model_data.data(), fileSize);
+  if (file.fail())
+  {
+    std::string errmsg = "Failed to read file";
+    throw std::runtime_error(errmsg.c_str());
+  }
+
+  // Create interpreter.
+  luci_interpreter::Interpreter interpreter(model_data.data());
+
+  // Set input.
+  // Data for n'th input is read from ${input_prefix}n
+  // (ex: Add.circle.input0, Add.circle.input1 ..)
+  int num_inference = 1;
+  for (int j = 0; j < num_inference; ++j)
+  {
+    for (int32_t i = 0; i < num_inputs; i++)
+    {
+      auto input_data = reinterpret_cast<char *>(interpreter.allocateInputTensor(i));
+      readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data,
+                       interpreter.getInputDataSizeByIndex(i));
+    }
+
+    // Do inference.
+    interpreter.interpret();
+  }
+
+  // Get output.
+  int num_outputs = 1;
+  for (int i = 0; i < num_outputs; i++)
+  {
+    auto data = interpreter.readOutputTensor(i);
+
+    // Output data is written in ${output_file}
+    // (ex: Add.circle.output0)
+    writeDataToFile(std::string(output_file) + std::to_string(i), reinterpret_cast<char *>(data),
+                    interpreter.getOutputDataSizeByIndex(i));
+  }
+  return EXIT_SUCCESS;
+}
+
+int entry(int argc, char **argv);
+
+#ifdef NDEBUG
+int main(int argc, char **argv)
+{
+  try
+  {
+    return entry(argc, argv);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "ERROR: " << e.what() << std::endl;
+  }
+
+  return 255;
+}
+#else  // NDEBUG
+int main(int argc, char **argv)
+{
+  // NOTE main does not catch internal exceptions for debug build to make it easy to
+  //      check the stacktrace with a debugger
+  return entry(argc, argv);
+}
+#endif // !NDEBUG
diff --git a/onert-micro/helpers/GenerateKernelsListHelper.cpp b/onert-micro/helpers/GenerateKernelsListHelper.cpp
new file mode 100644 (file)
index 0000000..1420020
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <iostream>
+#include <fstream>
+#include <set>
+
+std::string get_register_kernel_str(const circle::BuiltinOperator builtin_operator)
+{
+  switch (builtin_operator)
+  {
+    case circle::BuiltinOperator_ADD:
+      return "REGISTER_KERNEL(ADD, Add)";
+    case circle::BuiltinOperator_ARG_MAX:
+      return "REGISTER_KERNEL(ARG_MAX, ArgMax)";
+    case circle::BuiltinOperator_AVERAGE_POOL_2D:
+      return "REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)";
+    case circle::BuiltinOperator_BATCH_TO_SPACE_ND:
+      return "REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)";
+    case circle::BuiltinOperator_CAST:
+      return "REGISTER_KERNEL(CAST, Cast)";
+    case circle::BuiltinOperator_CONCATENATION:
+      return "REGISTER_KERNEL(CONCATENATION, Concatenation)";
+    case circle::BuiltinOperator_CONV_2D:
+      return "REGISTER_KERNEL(CONV_2D, Conv2D)";
+    case circle::BuiltinOperator_DEPTH_TO_SPACE:
+      return "REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)";
+    case circle::BuiltinOperator_DEPTHWISE_CONV_2D:
+      return "REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)";
+    case circle::BuiltinOperator_DEQUANTIZE:
+      return "REGISTER_KERNEL(DEQUANTIZE, Dequantize)";
+    case circle::BuiltinOperator_DIV:
+      return "REGISTER_KERNEL(DIV, Div)";
+    case circle::BuiltinOperator_ELU:
+      return "REGISTER_KERNEL(ELU, Elu)";
+    case circle::BuiltinOperator_EXP:
+      return "REGISTER_KERNEL(EXP, Exp)";
+    case circle::BuiltinOperator_EXPAND_DIMS:
+      return "REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)";
+    case circle::BuiltinOperator_FILL:
+      return "REGISTER_KERNEL(FILL, Fill)";
+    case circle::BuiltinOperator_FLOOR:
+      return "REGISTER_KERNEL(FLOOR, Floor)";
+    case circle::BuiltinOperator_FLOOR_DIV:
+      return "REGISTER_KERNEL(FLOOR_DIV, FloorDiv)";
+    case circle::BuiltinOperator_EQUAL:
+      return "REGISTER_KERNEL(EQUAL, Equal)";
+    case circle::BuiltinOperator_FULLY_CONNECTED:
+      return "REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)";
+    case circle::BuiltinOperator_GREATER:
+      return "REGISTER_KERNEL(GREATER, Greater)";
+    case circle::BuiltinOperator_GREATER_EQUAL:
+      return "REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)";
+    case circle::BuiltinOperator_INSTANCE_NORM:
+      return "REGISTER_KERNEL(INSTANCE_NORM, InstanceNorm)";
+    case circle::BuiltinOperator_L2_NORMALIZATION:
+      return "REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)";
+    case circle::BuiltinOperator_L2_POOL_2D:
+      return "REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)";
+    case circle::BuiltinOperator_LEAKY_RELU:
+      return "REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)";
+    case circle::BuiltinOperator_LESS:
+      return "REGISTER_KERNEL(LESS, Less)";
+    case circle::BuiltinOperator_LESS_EQUAL:
+      return "REGISTER_KERNEL(LESS_EQUAL, LessEqual)";
+    case circle::BuiltinOperator_LOGICAL_AND:
+      return "REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)";
+    case circle::BuiltinOperator_LOGICAL_NOT:
+      return "REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)";
+    case circle::BuiltinOperator_LOGICAL_OR:
+      return "REGISTER_KERNEL(LOGICAL_OR, LogicalOr)";
+    case circle::BuiltinOperator_LOGISTIC:
+      return "REGISTER_KERNEL(LOGISTIC, Logistic)";
+    case circle::BuiltinOperator_MAXIMUM:
+      return "REGISTER_KERNEL(MAXIMUM, Maximum)";
+    case circle::BuiltinOperator_MAX_POOL_2D:
+      return "REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)";
+    case circle::BuiltinOperator_MINIMUM:
+      return "REGISTER_KERNEL(MINIMUM, Minimum)";
+    case circle::BuiltinOperator_MIRROR_PAD:
+      return "REGISTER_KERNEL(MIRROR_PAD, MirrorPad)";
+    case circle::BuiltinOperator_MUL:
+      return "REGISTER_KERNEL(MUL, Mul)";
+    case circle::BuiltinOperator_NEG:
+      return "REGISTER_KERNEL(NEG, Neg)";
+    case circle::BuiltinOperator_NOT_EQUAL:
+      return "REGISTER_KERNEL(NOT_EQUAL, NotEqual)";
+    case circle::BuiltinOperator_PAD:
+      return "REGISTER_KERNEL(PAD, Pad)";
+    case circle::BuiltinOperator_PADV2:
+      return "REGISTER_KERNEL(PADV2, PadV2)";
+    case circle::BuiltinOperator_PRELU:
+      return "REGISTER_KERNEL(PRELU, PRelu)";
+    case circle::BuiltinOperator_QUANTIZE:
+      return "REGISTER_KERNEL(QUANTIZE, Quantize)";
+    case circle::BuiltinOperator_RESHAPE:
+      return "REGISTER_KERNEL(RESHAPE, Reshape)";
+    case circle::BuiltinOperator_RESIZE_BILINEAR:
+      return "REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)";
+    case circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+      return "REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)";
+    case circle::BuiltinOperator_RSQRT:
+      return "REGISTER_KERNEL(RSQRT, Rsqrt)";
+    case circle::BuiltinOperator_SHAPE:
+      return "REGISTER_KERNEL(SHAPE, Shape)";
+    case circle::BuiltinOperator_SOFTMAX:
+      return "REGISTER_KERNEL(SOFTMAX, Softmax)";
+    case circle::BuiltinOperator_SPACE_TO_BATCH_ND:
+      return "REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)";
+    case circle::BuiltinOperator_SPACE_TO_DEPTH:
+      return "REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)";
+    case circle::BuiltinOperator_STRIDED_SLICE:
+      return "REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)";
+    case circle::BuiltinOperator_SQRT:
+      return "REGISTER_KERNEL(SQRT, Sqrt)";
+    case circle::BuiltinOperator_SQUARE:
+      return "REGISTER_KERNEL(SQUARE, Square)";
+    case circle::BuiltinOperator_SQUARED_DIFFERENCE:
+      return "REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)";
+    case circle::BuiltinOperator_SQUEEZE:
+      return "REGISTER_KERNEL(SQUEEZE, Squeeze)";
+    case circle::BuiltinOperator_SUB:
+      return "REGISTER_KERNEL(SUB, Sub)";
+    case circle::BuiltinOperator_SVDF:
+      return "REGISTER_KERNEL(SVDF, SVDF)";
+    case circle::BuiltinOperator_TANH:
+      return "REGISTER_KERNEL(TANH, Tanh)";
+    case circle::BuiltinOperator_TRANSPOSE:
+      return "REGISTER_KERNEL(TRANSPOSE, Transpose)";
+    case circle::BuiltinOperator_TRANSPOSE_CONV:
+      return "REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)";
+    case circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
+      return "REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)";
+    default:
+      assert(false && "Not supported kernel");
+  }
+}
+
+std::vector<char> loadFile(const std::string &path)
+{
+  std::ifstream file(path, std::ios::binary | std::ios::in);
+  if (!file.good())
+  {
+    assert(false && "Failed to open file");
+  }
+
+  file.unsetf(std::ios::skipws);
+
+  file.seekg(0, std::ios::end);
+  auto fileSize = file.tellg();
+  file.seekg(0, std::ios::beg);
+
+  // reserve capacity
+  std::vector<char> data(fileSize);
+
+  // read the data
+  file.read(data.data(), fileSize);
+  if (file.fail())
+  {
+    assert(false && "Failed to read file");
+  }
+
+  return data;
+}
+
+// Parse model and write to std::ofstream &os models operations
+void run(std::ofstream &os, const circle::Model *model)
+{
+  luci_interpreter::CircleReader reader;
+  reader.parse(model);
+  const uint32_t subgraph_size = reader.num_subgraph();
+
+  // Set to avoid duplication in generated list
+  std::set<circle::BuiltinOperator> operations_set;
+
+  for (uint32_t g = 0; g < subgraph_size; g++)
+  {
+    reader.select_subgraph(g);
+    auto ops = reader.operators();
+    for (uint32_t i = 0; i < ops.size(); ++i)
+    {
+      const auto op = ops.at(i);
+      auto op_builtin_operator = reader.builtin_code(op);
+
+      auto result = operations_set.insert(op_builtin_operator);
+      if (result.second)
+      {
+        os << get_register_kernel_str(op_builtin_operator) << std::endl;
+      }
+    }
+  }
+}
+
+int main(int argc, char **argv)
+{
+  if (argc != 3)
+  {
+    assert(false && "Should be 2 arguments: circle model path, and path for generated model\n");
+  }
+
+  std::string model_file(argv[1]);
+  std::string generated_file_path(argv[2]);
+
+  std::vector<char> model_data = loadFile(model_file);
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << model_file << "'" << std::endl;
+    return 255;
+  }
+
+  // Open or create file
+  std::ofstream out;
+  out.open(generated_file_path);
+
+  if (out.is_open())
+    run(out, circle_model);
+  else
+    std::cout << "SMTH GOES WRONG WHILE OPEN FILE" << std::endl;
+  return 0;
+}
diff --git a/onert-micro/luci-interpreter/CMakeLists.txt b/onert-micro/luci-interpreter/CMakeLists.txt
new file mode 100644 (file)
index 0000000..b07ded2
--- /dev/null
@@ -0,0 +1,29 @@
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
+set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+if (NOT LUCI_INTERPRETER_PAL_DIR)
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/mcu")
+endif()
+
+if (NOT LUCI_INTERPRETER_KERNELS_BUILD_LIST)
+    set(KERNEL_REGISTER_FILE "${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst")
+else()
+    set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_KERNELS_BUILD_LIST})
+endif()
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+    set(LUCI_INTERPRETER_SUFFIX "")
+else()
+    set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
+
+if (DIS_QUANT)
+    add_definitions(-DDIS_QUANT)
+endif()
+
+if (DIS_FLOAT)
+    add_definitions(-DDIS_FLOAT)
+endif()
+
+add_compile_options(-fno-exceptions)
+add_compile_options(-Os)
+add_subdirectory(src)
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/Interpreter.h b/onert-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
new file mode 100644 (file)
index 0000000..483c789
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_INTERPRETER_H
+#define LUCI_INTERPRETER_INTERPRETER_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#ifdef USE_STATIC_ALLOC
+#include "luci_interpreter/InterpreterConfigure.h"
+#include "memory_managers/StaticMemoryManager.h"
+#else
+#include "memory_managers/SimpleMemoryManager.h"
+#endif // USE_STATIC_ALLOC
+
+#include "loader/ModuleLoader.h"
+#include <memory>
+
+namespace luci_interpreter
+{
+
+class Interpreter
+{
+public:
+  // Construct default interpreter with dynamic allocations and with input allocations
+  explicit Interpreter(const char *model_data_raw);
+
+#ifdef USE_STATIC_ALLOC
+  // Construct interpreter with configurations
+  explicit Interpreter(const char *model_data_raw, const InterpreterConfigure &configuration);
+#endif // USE_STATIC_ALLOC
+
+  ~Interpreter();
+
+  void allocateAndWriteInputTensor(int32_t input_tensor_index, const void *data, size_t data_size);
+  uint8_t *allocateInputTensor(int32_t input_tensor_index);
+
+  uint8_t *readOutputTensor(int32_t output_tensor_index);
+
+  int32_t getInputDataSizeByIndex(int32_t input_tensor_index);
+  int32_t getOutputDataSizeByIndex(int32_t output_tensor_index);
+
+  void interpret();
+
+private:
+  // _default_memory_manager should be before _runtime_module due to
+  // the order of deletion in the destructor
+  MemoryManager _memory_manager{};
+  RuntimeModule _runtime_module{};
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_INTERPRETER_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/InterpreterConfigure.h b/onert-micro/luci-interpreter/include/luci_interpreter/InterpreterConfigure.h
new file mode 100644 (file)
index 0000000..5619c93
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_INTERPRETER_CONFIGURE_H
+#define ONERT_MICRO_INTERPRETER_CONFIGURE_H
+
+namespace luci_interpreter
+{
+#ifdef USE_STATIC_ALLOC
+
+enum MemoryManagerType
+{
+  STATIC,
+  DYNAMIC
+};
+
+class InterpreterConfigure
+{
+public:
+  void setAllocateInputValue(bool allocate_input) { _allocate_input = allocate_input; }
+  bool getAllocateInputValue() const { return _allocate_input; }
+
+  InterpreterConfigure &setMemoryManager(MemoryManagerType mm_type)
+  {
+    switch (mm_type)
+    {
+      case MemoryManagerType::STATIC:
+        _use_static_manager = true;
+        break;
+      case MemoryManagerType::DYNAMIC:
+        _use_static_manager = false;
+        break;
+      default:
+        assert(false);
+    }
+    return *this;
+  }
+
+  // TODO: remove this method
+  InterpreterConfigure &configStaticMemoryManager(uint32_t input_buf_size, uint32_t temp_buf_size,
+                                                  uint32_t output_buf_size)
+  {
+    assert(_use_static_manager);
+    _input_buf_size = input_buf_size;
+    _temp_buf_size = temp_buf_size;
+    _output_buf_size = output_buf_size;
+    return *this;
+  }
+
+  bool isStaticManager() const { return _use_static_manager; }
+
+private:
+  bool _use_static_manager = false;
+  bool _allocate_input = true;
+
+public:
+  // TODO: remove it and read these values from circle file
+  uint32_t _input_buf_size = 0;
+  uint32_t _temp_buf_size = 0;
+  uint32_t _output_buf_size = 0;
+};
+
+#endif
+
+} // namespace luci_interpreter
+
+#endif // ONERT_MICRO_INTERPRETER_CONFIGURE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/DataType.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
new file mode 100644 (file)
index 0000000..e34d224
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
+#define LUCI_INTERPRETER_CORE_DATATYPE_H
+
+#include <cstdint>
+#include <cstddef>
+#include <string>
+#include <cassert>
+
+namespace luci_interpreter
+{
+// TODO check can we remove it
+/**
+ * @brief "scalar" value type
+ */
+enum class DataType
+{
+  Unknown, // Unknown type (serves as a default value)
+
+  U8,  // 8-bit unsigned integer
+  U16, // 16-bit unsigned integer
+  U32, // 32-bit unsigned integer
+  U64, // 64-bit unsigned integer
+
+  S8,  // 8-bit signed integer
+  S16, // 16-bit signed integer
+  S32, // 32-bit signed integer
+  S64, // 64-bit signed integer
+
+  FLOAT16, // IEEE 16-bit floating-point
+  FLOAT32, // IEEE 32-bit floating-point
+  FLOAT64, // IEEE 64-bit floating-point
+
+  // WARNING the size of Bool may vary for NN frameworks
+  // TODO we need to find a way to resolve this issue
+  BOOL, // Boolean
+};
+
+/**
+ * @brief C++ scalar type corresponding to each DataType
+ */
+template <DataType DT> struct DataTypeImpl
+{
+  // using Type = ...
+};
+
+// TODO Support other enum values
+template <> struct DataTypeImpl<DataType::S8>
+{
+  // Use C++ int8_t type for 8bit integer
+  using Type = int8_t;
+};
+
+template <> struct DataTypeImpl<DataType::U8>
+{
+  // Use C++ uint8_t type for unsigned 8bit integer
+  using Type = uint8_t;
+};
+
+template <> struct DataTypeImpl<DataType::S16>
+{
+  // Use C++ int16_t type for 16bit integer
+  using Type = int16_t;
+};
+
+template <> struct DataTypeImpl<DataType::U16>
+{
+  // Use C++ uint16_t type for unsigned 16bit integer
+  using Type = uint16_t;
+};
+
+template <> struct DataTypeImpl<DataType::S32>
+{
+  // Use C++ int32_t type for 32bit integer
+  using Type = int32_t;
+};
+
+template <> struct DataTypeImpl<DataType::U32>
+{
+  // Use C++ uint32_t type for unsigned 32bit integer
+  using Type = uint32_t;
+};
+
+template <> struct DataTypeImpl<DataType::S64>
+{
+  // Use C++ int64_t type for 64bit integer
+  using Type = int64_t;
+};
+
+template <> struct DataTypeImpl<DataType::U64>
+{
+  // Use C++ uint64_t type for unsigned 64bit integer
+  using Type = uint64_t;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT16>
+{
+  // float16 type with 16bit value, encoded with help of FP16 library
+  // https://github.com/Maratyszcza/FP16/
+  using Type = uint16_t;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT32>
+{
+  // Use C++ float type for IEEE 32-bit floating-point numbers
+  using Type = float;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT64>
+{
+  // Use C++ double type for IEEE 64-bit floating-point numbers
+  using Type = double;
+};
+
+// NOTE DataTypeImpl for BOOL is subject to change
+template <> struct DataTypeImpl<DataType::BOOL>
+{
+  // Use C++ uint8_t type for bool
+  using Type = uint8_t;
+};
+
+/**
+ * @brief Returns the size of the data type.
+ * @note If you need the size at compile time, use `sizeof(typename DataTypeImpl<DT>::Type)`.
+ */
+inline uint32_t size(DataType data_type)
+{
+  switch (data_type)
+  {
+    case DataType::S8:
+      return sizeof(DataTypeImpl<DataType::S8>::Type);
+    case DataType::U8:
+      return sizeof(DataTypeImpl<DataType::U8>::Type);
+    case DataType::S16:
+      return sizeof(DataTypeImpl<DataType::S16>::Type);
+    case DataType::U16:
+      return sizeof(DataTypeImpl<DataType::U16>::Type);
+    case DataType::S32:
+      return sizeof(DataTypeImpl<DataType::S32>::Type);
+    case DataType::U32:
+      return sizeof(DataTypeImpl<DataType::U32>::Type);
+    case DataType::S64:
+      return sizeof(DataTypeImpl<DataType::S64>::Type);
+    case DataType::U64:
+      return sizeof(DataTypeImpl<DataType::U64>::Type);
+    case DataType::FLOAT16:
+      return sizeof(DataTypeImpl<DataType::FLOAT16>::Type);
+    case DataType::FLOAT32:
+      return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
+    case DataType::FLOAT64:
+      return sizeof(DataTypeImpl<DataType::FLOAT64>::Type);
+    case DataType::BOOL:
+      return sizeof(DataTypeImpl<DataType::BOOL>::Type);
+    default:
+      // TODO Support remaining data types.
+      assert(false);
+      return UINT32_MAX; // Avoid compiler warning.
+  }
+}
+
+inline size_t getDataTypeSize(DataType data_type) { return size(data_type); }
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/ParamsType.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/ParamsType.h
new file mode 100644 (file)
index 0000000..af0687f
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
+#define __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
+
+#include <vector>
+#include <cstdint>
+#include <utility>
+
+namespace luci_interpreter
+{
+// TODO check can we remove it
+enum class FusedActFunc
+{
+  UNDEFINED, // This is not defined by TFLite or Circle. This was added to
+  // prevent programming error.
+  NONE,
+  RELU,
+  RELU_N1_TO_1,
+  RELU6,
+  TANH,
+  SIGN_BIT
+};
+
+enum class Padding
+{
+  UNDEFINED, // This is not defined by TFLite. This was added to prevent programming error.
+
+  SAME,
+  VALID,
+};
+
+enum class MirrorPadMode
+{
+  UNDEFINED, // This is not defined by Circle. This was added to prevent programming error.
+
+  REFLECT,
+  SYMMETRIC,
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
new file mode 100644 (file)
index 0000000..318ec0e
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
+#define LUCI_INTERPRETER_CORE_TENSOR_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class Tensor
+{
+public:
+#ifndef DIS_QUANT
+  static float scale(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return 0;
+    }
+
+    return *quant_params->scale()->cbegin();
+  }
+
+  static int32_t zero_point(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return 0;
+    }
+
+    return *quant_params->zero_point()->cbegin();
+  }
+
+  static const std::vector<float> scales(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return {};
+    }
+    assert(quant_params->scale() != nullptr);
+    std::vector<float> scales(quant_params->scale()->cbegin(), quant_params->scale()->cend());
+
+    return scales;
+  }
+
+  static const std::vector<int32_t> zero_points(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return {};
+    }
+    assert(quant_params->zero_point() != nullptr);
+    std::vector<int32_t> zero_points(quant_params->zero_point()->cbegin(),
+                                     quant_params->zero_point()->cend());
+
+    return zero_points;
+  }
+
+  static int32_t quantized_dimension(const circle::Tensor *circle_tensor)
+  {
+    const auto *quant_params = circle_tensor->quantization();
+    if (quant_params == nullptr)
+    {
+      assert(false && "There is no quantization params");
+      return 0;
+    }
+    return quant_params->quantized_dimension();
+  }
+#endif
+
+  static DataType element_type(const circle::Tensor *circle_tensor)
+  {
+    return luci_datatype(circle_tensor->type());
+  }
+
+  static int num_dims(const circle::Tensor *circle_tensor)
+  {
+    // TODO check removing of wrap
+    auto const &const_dims = wrap(circle_tensor->shape());
+    return const_dims.size();
+  }
+
+  static int32_t dim(const circle::Tensor *circle_tensor, int i)
+  {
+    // TODO check removing of wrap
+    assert(i >= 0);
+    auto const &const_dims = wrap(circle_tensor->shape());
+    assert(i < const_dims.size());
+
+    return const_dims[i];
+  }
+
+  static int32_t num_elements(const circle::Tensor *circle_tensor)
+  {
+    int32_t result = 1;
+    auto const &const_dims = wrap(circle_tensor->shape());
+    for (const int32_t dim : const_dims)
+    {
+      result *= dim;
+    }
+    return result;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_TENSOR_H
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReader.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReader.h
new file mode 100644 (file)
index 0000000..3967e40
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
+#define __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
+
+#include "luci_interpreter/core/ParamsType.h"
+#include "luci_interpreter/core/DataType.h"
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+namespace
+{
+
+using ExecutionPlanTable = std::map<uint32_t, std::vector<uint32_t>>;
+
+template <typename VECTORTYPE> uint32_t read_u32(const VECTORTYPE &buffer, uint32_t idx)
+{
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+  uint32_t val = 0;
+  val += (buffer.at(idx + 0) << 0 * 8);
+  val += (buffer.at(idx + 1) << 1 * 8);
+  val += (buffer.at(idx + 2) << 2 * 8);
+  val += (buffer.at(idx + 3) << 3 * 8);
+  return val;
+}
+
+} // namespace
+
+namespace read_metadata
+{
+
+template <typename VECTORTYPE>
+ExecutionPlanTable decode_execution_plan(const VECTORTYPE &execution_plan_data)
+{
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+  ExecutionPlanTable execution_plan_table;
+  uint32_t idx = 0;
+
+  if (execution_plan_data.size() < 4)
+    assert(false && "Op table decode error : invalid entry number");
+
+  uint32_t entry_number = read_u32(execution_plan_data, idx);
+  idx += sizeof(uint32_t);
+
+  while (idx < execution_plan_data.size())
+  {
+    if (idx + 2 * sizeof(uint32_t) > execution_plan_data.size())
+      assert(false && "Op table decode error : invalid entry item");
+
+    uint32_t id = read_u32(execution_plan_data, idx);
+    idx += sizeof(uint32_t);
+
+    uint32_t size = read_u32(execution_plan_data, idx);
+
+    if (size == 0)
+      assert(false && "Op table decode error : empty execution plan entry");
+
+    idx += sizeof(uint32_t);
+
+    if (idx + sizeof(uint32_t) * size > execution_plan_data.size())
+      assert(false && "Source table decode error : invalid entry data");
+
+    std::vector<uint32_t> execution_plan_vector;
+    uint32_t position = read_u32(execution_plan_data, idx);
+    idx += sizeof(uint32_t);
+
+    for (uint32_t j = 1; j < size; ++j)
+    {
+      uint32_t execution_plan_inform = read_u32(execution_plan_data, idx);
+      idx += sizeof(uint32_t);
+
+      execution_plan_vector.push_back(execution_plan_inform);
+    }
+
+    if (!execution_plan_table.insert({position, execution_plan_vector}).second)
+      assert(false && "Op table decode error : duplicated origin ID");
+  }
+
+  if (idx != execution_plan_data.size())
+    assert(false && "Op table decode error : data size invalid");
+
+  if (execution_plan_table.size() != entry_number)
+    assert(false && "Op table decode error : entry number invalid");
+
+  return execution_plan_table;
+}
+
+} // namespace read_metadata
+#endif
+
+DataType luci_datatype(circle::TensorType type);
+FusedActFunc luci_actfunc(circle::ActivationFunctionType type);
+Padding luci_padding(circle::Padding padding);
+MirrorPadMode luci_mirrorpad_mode(circle::MirrorPadMode mode);
+
+/**
+ * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity
+ */
+template <typename T> class VectorWrapper
+{
+public:
+  explicit VectorWrapper(const flatbuffers::Vector<T> *ptr);
+
+  const T *data() const;
+  uint32_t size() const;
+
+  using iterator = typename flatbuffers::Vector<T>::const_iterator;
+  iterator begin() const;
+  iterator end() const;
+
+  using value_type = typename flatbuffers::Vector<T>::return_type;
+  value_type at(uint32_t i) const;
+  value_type operator[](uint32_t i) const;
+
+  bool null() const;
+  bool empty() const;
+
+private:
+  const flatbuffers::Vector<T> *_vector;
+};
+
+template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec)
+{
+  return VectorWrapper<T>(vec);
+}
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class CircleReader
+{
+public:
+  using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>;
+  using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>;
+  using CircleOperators = VectorWrapper<flatbuffers::Offset<circle::Operator>>;
+  using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>;
+  using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>;
+
+public:
+  CircleReader() = default;
+
+public: // direct API
+  CircleOperatorCodes opcodes() const { return wrap(_model->operator_codes()); }
+  CircleBuffers buffers() const { return wrap(_model->buffers()); }
+  CircleTensors tensors() const { return wrap(_current_subgraph->tensors()); }
+  CircleOperators operators() const { return wrap(_current_subgraph->operators()); }
+  VectorWrapper<int32_t> inputs() const { return wrap(_current_subgraph->inputs()); }
+  VectorWrapper<int32_t> outputs() const { return wrap(_current_subgraph->outputs()); }
+  circle::DataFormat data_format() const { return _current_subgraph->data_format(); }
+  CircleMetadataSet metadata() const { return wrap(_model->metadata()); }
+
+  uint32_t num_subgraph() const { return wrap(_model->subgraphs()).size(); }
+  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
+
+public:
+  bool parse(const circle::Model *model);
+  bool select_subgraph(uint32_t subgraph);
+
+private:
+  const circle::Model *_model{nullptr};
+  const circle::SubGraph *_current_subgraph{nullptr};
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
diff --git a/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReaderHelper.h b/onert-micro/luci-interpreter/include/luci_interpreter/core/reader/CircleMicroReaderHelper.h
new file mode 100644 (file)
index 0000000..c1122cf
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
+#define __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <vector>
+
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+  if (flat_array == nullptr)
+  {
+    assert(false && "flat array is nullptr");
+  }
+
+  std::vector<T> ret(flat_array->Length());
+  for (uint32_t i = 0; i < flat_array->Length(); i++)
+  {
+    ret[i] = flat_array->Get(i);
+  }
+  return ret;
+}
+
+} // namespace circle
+
+#endif // __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
new file mode 100644 (file)
index 0000000..353e117
--- /dev/null
@@ -0,0 +1,61 @@
+REGISTER_KERNEL(ADD, Add)
+REGISTER_KERNEL(ARG_MAX, ArgMax)
+REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
+REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)
+REGISTER_KERNEL(CAST, Cast)
+REGISTER_KERNEL(CONCATENATION, Concatenation)
+REGISTER_KERNEL(CONV_2D, Conv2D)
+REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)
+REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)
+REGISTER_KERNEL(DEQUANTIZE, Dequantize)
+REGISTER_KERNEL(DIV, Div)
+REGISTER_KERNEL(ELU, Elu)
+REGISTER_KERNEL(EXP, Exp)
+REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
+REGISTER_KERNEL(FILL, Fill)
+REGISTER_KERNEL(FLOOR, Floor)
+REGISTER_KERNEL(FLOOR_DIV, FloorDiv)
+REGISTER_KERNEL(EQUAL, Equal)
+REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
+REGISTER_KERNEL(GREATER, Greater)
+REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)
+REGISTER_KERNEL(INSTANCE_NORM, InstanceNorm)
+REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)
+REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)
+REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)
+REGISTER_KERNEL(LESS, Less)
+REGISTER_KERNEL(LESS_EQUAL, LessEqual)
+REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)
+REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)
+REGISTER_KERNEL(LOGICAL_OR, LogicalOr)
+REGISTER_KERNEL(LOGISTIC, Logistic)
+REGISTER_KERNEL(MAXIMUM, Maximum)
+REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
+REGISTER_KERNEL(MINIMUM, Minimum)
+REGISTER_KERNEL(MIRROR_PAD, MirrorPad)
+REGISTER_KERNEL(MUL, Mul)
+REGISTER_KERNEL(NEG, Neg)
+REGISTER_KERNEL(NOT_EQUAL, NotEqual)
+REGISTER_KERNEL(PAD, Pad)
+REGISTER_KERNEL(PADV2, PadV2)
+REGISTER_KERNEL(PRELU, PRelu)
+REGISTER_KERNEL(QUANTIZE, Quantize)
+REGISTER_KERNEL(RESHAPE, Reshape)
+REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)
+REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)
+REGISTER_KERNEL(RSQRT, Rsqrt)
+REGISTER_KERNEL(SHAPE, Shape)
+REGISTER_KERNEL(SOFTMAX, Softmax)
+REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)
+REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)
+REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)
+REGISTER_KERNEL(SQRT, Sqrt)
+REGISTER_KERNEL(SQUARE, Square)
+REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)
+REGISTER_KERNEL(SQUEEZE, Squeeze)
+REGISTER_KERNEL(SUB, Sub)
+REGISTER_KERNEL(SVDF, SVDF)
+REGISTER_KERNEL(TANH, Tanh)
+REGISTER_KERNEL(TRANSPOSE, Transpose)
+REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)
+REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
new file mode 100644 (file)
index 0000000..f8a4a80
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
new file mode 100644 (file)
index 0000000..bd47a88
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_conv_params conv_params;
+    conv_params.dilation.h = params.dilation_height_factor;
+    conv_params.dilation.w = params.dilation_width_factor;
+
+    assert(conv_params.dilation.h == 1);
+    assert(conv_params.dilation.w == 1);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+    conv_params.activation.min = params.quantized_activation_min;
+    conv_params.activation.max = params.quantized_activation_max;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    quant_params.multiplier = const_cast<int32_t *>(mult);
+    quant_params.shift = const_cast<int32_t *>(shifts);
+
+    assert(conv_params.activation.min <= conv_params.activation.max);
+    assert(input_shape.DimensionsCount() == 4);
+    assert(filter_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+                                       &filter_dims, filter_data, &bias_dims, bias_data,
+                                       &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                  filter_shape, filter_data, bias_shape, bias_data,
+                                                  output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_conv_params conv_params;
+  conv_params.dilation.h = params.dilation_height_factor;
+  conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == luci_interpreter::DataType::S8 && conv_params.dilation.h == 1 &&
+      conv_params.dilation.w == 1)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batches;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_height;
+    filter_dims.w = filter_width;
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batches;
+    output_dims.h = output_height;
+    output_dims.w = output_width;
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+                                                                     &filter_dims, &output_dims);
+
+    luci_interpreter::Shape scratchpad_shape{buf_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
new file mode 100644 (file)
index 0000000..46878f4
--- /dev/null
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_dw_conv_params dw_conv_params;
+    dw_conv_params.dilation.h = params.dilation_height_factor;
+    dw_conv_params.dilation.w = params.dilation_width_factor;
+    assert(dw_conv_params.dilation.h == 1);
+    assert(dw_conv_params.dilation.w == 1);
+
+    dw_conv_params.input_offset = params.input_offset;
+    dw_conv_params.output_offset = params.output_offset;
+    dw_conv_params.stride.h = params.stride_height;
+    dw_conv_params.stride.w = params.stride_width;
+    dw_conv_params.padding.h = params.padding_values.height;
+    dw_conv_params.padding.w = params.padding_values.width;
+
+    dw_conv_params.activation.min = params.quantized_activation_min;
+    dw_conv_params.activation.max = params.quantized_activation_max;
+    dw_conv_params.ch_mult = params.depth_multiplier;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    int32_t output_multiplier = params.output_multiplier;
+    int32_t output_shift = params.output_shift;
+
+    quant_params.multiplier = &output_multiplier;
+    quant_params.shift = &output_shift;
+
+    assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+                                             input_data, &filter_dims, filter_data, &bias_dims,
+                                             bias_data, &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::DepthwiseConvPerChannel(
+      params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+      bias_shape, bias_data, output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_dw_conv_params dw_conv_params;
+  dw_conv_params.dilation.h = params.dilation_height_factor;
+  dw_conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == luci_interpreter::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+      dw_conv_params.dilation.w == 1)
+  {
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+      &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h
new file mode 100644 (file)
index 0000000..efa6b16
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALFill.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALFill.h
new file mode 100644 (file)
index 0000000..1448b0c
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FILL_H
+#define LUCI_INTERPRETER_PAL_FILL_H
+
+#include "PALreference_ops.h"
+
+#endif // LUCI_INTERPRETER_PAL_FILL_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h
new file mode 100644 (file)
index 0000000..effb85d
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h
new file mode 100644 (file)
index 0000000..c1f096c
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_shape.Dims(0);
+  input_dims.h = input_shape.Dims(1);
+
+  cmsis_nn_dims weights_feature_dims;
+  weights_feature_dims.n = weight_feature_shape.Dims(0);
+  weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+  cmsis_nn_dims weights_time_dims;
+  weights_time_dims.n = weight_time_shape.Dims(0);
+  weights_time_dims.h = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = bias_shape.Dims(0);
+
+  cmsis_nn_dims state_dims;
+  state_dims.n = batch_size;
+  state_dims.h = memory_size * num_filters;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = output_shape.Dims(0);
+  output_dims.h = output_shape.Dims(1);
+
+  cmsis_nn_svdf_params svdf_params;
+  svdf_params.rank = params.rank;
+  svdf_params.input_offset = input_zp;
+  svdf_params.output_offset = output_zp;
+
+  svdf_params.input_activation.min = INT16_MIN;
+  svdf_params.input_activation.max = INT16_MAX;
+
+  svdf_params.output_activation.min = INT8_MIN;
+  svdf_params.output_activation.max = INT8_MAX;
+
+  cmsis_nn_per_tensor_quant_params in_quant_params;
+  in_quant_params.multiplier = scale_1_a;
+  in_quant_params.shift = scale_1_b;
+
+  cmsis_nn_per_tensor_quant_params out_quant_params;
+  out_quant_params.multiplier = scale_2_a;
+  out_quant_params.shift = scale_2_b;
+
+  cmsis_nn_context scratch_ctx;
+  scratch_ctx.buf = scratchpad_data;
+
+  cmsis_nn_context scratch_output_ctx;
+  scratch_output_ctx.buf = output_temp_data;
+
+  arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+              &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+              weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+              &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+  if (input_data_type == luci_interpreter::DataType::FLOAT32 &&
+      (weight_feature_data_type == luci_interpreter::DataType::S8 ||
+       weight_feature_data_type == luci_interpreter::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    assert(false && "Hybrid type is not supported for cmsisnn");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == luci_interpreter::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALUnidirectionalSequenceLSTM.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALUnidirectionalSequenceLSTM.h
new file mode 100644 (file)
index 0000000..1a86e74
--- /dev/null
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "arm_nnfunctions.h"
+#include "core/KernelParams.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
+#include "fixedpoint/fixedpoint.h"
+
+namespace luci_interpreter_pal
+{
+namespace lstm
+{
+
+inline cmsis_nn_lstm_params
+convert_lstm_params(const luci_interpreter::IntegerLSTMParams &params_in, bool time_major,
+                    int32_t output_zeropoint, const int32_t *input_gate_bias,
+                    const int32_t *forget_gate_bias, const int32_t *cell_gate_bias,
+                    const int32_t *output_gate_bias, int16_t *input_layer_norm_coefficients,
+                    int16_t *forget_layer_norm_coefficients, int16_t *cell_layer_norm_coefficients,
+                    int16_t *output_layer_norm_coefficients)
+{
+  cmsis_nn_lstm_params params_out;
+
+  params_out.time_major = time_major;
+
+  // Multipliers and shifts for weights
+  params_out.input_to_input_scaling.multiplier = params_in.effective_input_to_input_scale_a;
+  params_out.input_to_input_scaling.shift = params_in.effective_input_to_input_scale_b;
+  params_out.recurrent_to_input_scaling.multiplier = params_in.effective_recurrent_to_input_scale_a;
+  params_out.recurrent_to_input_scaling.shift = params_in.effective_recurrent_to_input_scale_b;
+  params_out.cell_to_input_scaling.multiplier = params_in.effective_cell_to_input_scale_a;
+  params_out.cell_to_input_scaling.shift = params_in.effective_cell_to_input_scale_b;
+  params_out.input_to_forget_scaling.multiplier = params_in.effective_input_to_forget_scale_a;
+  params_out.input_to_forget_scaling.shift = params_in.effective_input_to_forget_scale_b;
+  params_out.recurrent_to_forget_scaling.multiplier =
+    params_in.effective_recurrent_to_forget_scale_a;
+  params_out.recurrent_to_forget_scaling.shift = params_in.effective_recurrent_to_forget_scale_b;
+  params_out.cell_to_forget_scaling.multiplier = params_in.effective_cell_to_forget_scale_a;
+  params_out.cell_to_forget_scaling.shift = params_in.effective_cell_to_forget_scale_b;
+  params_out.input_to_cell_scaling.multiplier = params_in.effective_input_to_cell_scale_a;
+  params_out.input_to_cell_scaling.shift = params_in.effective_input_to_cell_scale_b;
+  params_out.recurrent_to_cell_scaling.multiplier = params_in.effective_recurrent_to_cell_scale_a;
+  params_out.recurrent_to_cell_scaling.shift = params_in.effective_recurrent_to_cell_scale_b;
+  params_out.input_to_output_scaling.multiplier = params_in.effective_input_to_output_scale_a;
+  params_out.input_to_output_scaling.shift = params_in.effective_input_to_output_scale_b;
+
+  params_out.recurrent_to_output_scaling.multiplier =
+    params_in.effective_recurrent_to_output_scale_a;
+  params_out.recurrent_to_output_scaling.shift = params_in.effective_recurrent_to_output_scale_b;
+  params_out.cell_to_output_scaling.multiplier = params_in.effective_cell_to_output_scale_a;
+  params_out.cell_to_output_scaling.shift = params_in.effective_cell_to_output_scale_b;
+  params_out.projection_scaling.multiplier = params_in.effective_proj_scale_a;
+  params_out.projection_scaling.shift = params_in.effective_proj_scale_b;
+
+  params_out.layer_norm_input_scaling.multiplier = params_in.layer_norm_input_scale_a;
+  params_out.layer_norm_input_scaling.shift = params_in.layer_norm_input_scale_b;
+  params_out.layer_norm_forget_scaling.multiplier = params_in.layer_norm_forget_scale_a;
+  params_out.layer_norm_forget_scaling.shift = params_in.layer_norm_forget_scale_b;
+  params_out.layer_norm_cell_scaling.multiplier = params_in.layer_norm_cell_scale_a;
+  params_out.layer_norm_cell_scaling.shift = params_in.layer_norm_cell_scale_b;
+  params_out.layer_norm_output_scaling.multiplier = params_in.layer_norm_output_scale_a;
+  params_out.layer_norm_output_scaling.shift = params_in.layer_norm_output_scale_b;
+
+  params_out.clip.cell = params_in.quantized_cell_clip;
+  params_out.clip.projection = params_in.quantized_proj_clip;
+
+  params_out.cell_state_shift = params_in.cell_scale;
+
+  params_out.hidden_offset = params_in.hidden_zp;
+  params_out.output_state_offset = output_zeropoint;
+
+  params_out.guard.input_variance = params_in.input_variance_guard;
+  params_out.guard.forget_variance = params_in.forget_variance_guard;
+  params_out.guard.cell_variance = params_in.cell_variance_guard;
+  params_out.guard.output_variance = params_in.output_variance_guard;
+
+  params_out.i2f_effective_bias = params_in.input_to_forget_effective_bias.data();
+  params_out.r2f_effective_bias = params_in.recurrent_to_forget_effective_bias.data();
+  params_out.i2c_effective_bias = params_in.input_to_cell_effective_bias.data();
+  params_out.r2c_effective_bias = params_in.recurrent_to_cell_effective_bias.data();
+  params_out.i2o_effective_bias = params_in.input_to_output_effective_bias.data();
+  params_out.r2o_effective_bias = params_in.recurrent_to_output_effective_bias.data();
+  params_out.i2i_effective_bias = params_in.input_to_input_effective_bias.data();
+  params_out.r2i_effective_bias = params_in.recurrent_to_input_effective_bias.data();
+  params_out.projection_effective_bias = params_in.projection_effective_bias.data();
+
+  params_out.hidden_scaling.multiplier = params_in.effective_hidden_scale_a;
+  params_out.hidden_scaling.shift = params_in.effective_hidden_scale_b;
+
+  params_out.input_gate_bias = input_gate_bias;
+  params_out.forget_gate_bias = forget_gate_bias;
+  params_out.cell_gate_bias = cell_gate_bias;
+  params_out.output_gate_bias = output_gate_bias;
+
+  params_out.layer_norm.input_weight = input_layer_norm_coefficients;
+  params_out.layer_norm.forget_weight = forget_layer_norm_coefficients;
+  params_out.layer_norm.cell_weight = cell_layer_norm_coefficients;
+  params_out.layer_norm.output_weight = output_layer_norm_coefficients;
+
+  params_out.activation.min = std::numeric_limits<int16_t>::min();
+  params_out.activation.max = std::numeric_limits<int16_t>::max();
+
+  return params_out;
+}
+
+} // namespace lstm
+
+void eval_integer_8x8_16_lstm(
+  const luci_interpreter::Tensor *input, const luci_interpreter::Tensor *input_to_input_weights,
+  const luci_interpreter::Tensor *input_to_forget_weights,
+  const luci_interpreter::Tensor *input_to_cell_weights,
+  const luci_interpreter::Tensor *input_to_output_weights,
+  const luci_interpreter::Tensor *recurrent_to_input_weights,
+  const luci_interpreter::Tensor *recurrent_to_forget_weights,
+  const luci_interpreter::Tensor *recurrent_to_cell_weights,
+  const luci_interpreter::Tensor *recurrent_to_output_weights,
+  const luci_interpreter::Tensor *cell_to_input_weights,
+  const luci_interpreter::Tensor *cell_to_forget_weights,
+  const luci_interpreter::Tensor *cell_to_output_weights,
+  const luci_interpreter::Tensor *input_layer_norm_coefficients,
+  const luci_interpreter::Tensor *forget_layer_norm_coefficients,
+  const luci_interpreter::Tensor *cell_layer_norm_coefficients,
+  const luci_interpreter::Tensor *output_layer_norm_coefficients,
+  const luci_interpreter::Tensor *input_gate_bias, const luci_interpreter::Tensor *forget_gate_bias,
+  const luci_interpreter::Tensor *cell_gate_bias, const luci_interpreter::Tensor *output_gate_bias,
+  const luci_interpreter::Tensor *projection_weights,
+  const luci_interpreter::Tensor *projection_bias,
+  const luci_interpreter::UnidirectionalSequenceLSTMParams &params, bool forward_sequence,
+  bool time_major, const luci_interpreter::IntegerLSTMParams &integer_lstm_param,
+  int32_t output_state_zp, luci_interpreter::Tensor *output_state,
+  luci_interpreter::Tensor *cell_state, luci_interpreter::Tensor *output, int16_t *scratch0,
+  int16_t *scratch1, int16_t *scratch2, int16_t *scratch3, int8_t *scratch4, int32_t *scratch5)
+{
+  // CMSIS-NN does not support these configurations currently.
+  // Please use MCU kernels instead
+  const bool use_layer_norm = (forget_layer_norm_coefficients != nullptr);
+  const bool use_peephole = (cell_to_output_weights != nullptr);
+  const bool use_projection = (projection_weights != nullptr);
+  const bool use_cifg = (input_to_input_weights == nullptr);
+  const bool unsupported_config = use_layer_norm || use_peephole || use_projection || use_cifg;
+
+  if (unsupported_config)
+  {
+    assert(false && "CMSIS-NN does not support these configurations currently");
+    return;
+  }
+
+  const auto input_shape = input->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() >= 2 && input_shape.num_dims() <= 3);
+
+  cmsis_nn_lstm_context scratch_buffers;
+  scratch_buffers.input_gate = scratch0;
+  scratch_buffers.forget_gate = scratch1;
+  scratch_buffers.cell_gate = scratch2;
+  scratch_buffers.output_gate = scratch3;
+  scratch_buffers.scratch = scratch4;
+
+  cmsis_nn_lstm_params cmsis_lstm_params = lstm::convert_lstm_params(
+    integer_lstm_param, time_major, output_state_zp,
+    luci_interpreter::kernels::getTensorData<int32_t>(input_gate_bias),
+    luci_interpreter::kernels::getTensorData<int32_t>(forget_gate_bias),
+    luci_interpreter::kernels::getTensorData<int32_t>(cell_gate_bias),
+    luci_interpreter::kernels::getTensorData<int32_t>(output_gate_bias),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(input_layer_norm_coefficients)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(forget_layer_norm_coefficients)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(cell_layer_norm_coefficients)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(output_layer_norm_coefficients)));
+
+  const int n_input = input_shape.dim(input_shape.num_dims() - 1);
+  int max_time, n_batch;
+  if (input_shape.num_dims() == 2)
+  {
+    max_time = 1;
+    n_batch = input_shape.dim(0);
+  }
+  else
+  {
+    max_time = (time_major) ? input_shape.dim(0) : input_shape.dim(1);
+    n_batch = (time_major) ? input_shape.dim(1) : input_shape.dim(0);
+  }
+
+  // n_cell and n_output will be the same size when there is no projection.
+  const int n_cell = input_to_output_weights->shape().dim(0);
+  const int n_output = recurrent_to_output_weights->shape().dim(1);
+
+  cmsis_nn_lstm_dims lstm_dims;
+  lstm_dims.num_inputs = n_input;
+  lstm_dims.num_outputs = n_output;
+  lstm_dims.num_batches = n_batch;
+  lstm_dims.max_time = max_time;
+
+  arm_lstm_unidirectional_s16_s8(
+    &scratch_buffers, const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input)),
+    &lstm_dims,
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_input_weights)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_forget_weights)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_cell_weights)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_output_weights)),
+    const_cast<int8_t *>(
+      luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_input_weights)),
+    const_cast<int8_t *>(
+      luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_forget_weights)),
+    const_cast<int8_t *>(
+      luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_cell_weights)),
+    const_cast<int8_t *>(
+      luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_output_weights)),
+    const_cast<int16_t *>(luci_interpreter::kernels::getTensorData<int16_t>(cell_to_input_weights)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(cell_to_forget_weights)),
+    const_cast<int16_t *>(
+      luci_interpreter::kernels::getTensorData<int16_t>(cell_to_output_weights)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(projection_weights)),
+    &cmsis_lstm_params,
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(output_state)),
+    const_cast<int16_t *>(luci_interpreter::kernels::getTensorData<int16_t>(cell_state)),
+    const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(output)));
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/PALreference_ops.h b/onert-micro/luci-interpreter/pal/cmsisnn/PALreference_ops.h
new file mode 100644 (file)
index 0000000..62c7209
--- /dev/null
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+                 const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T lower = 0;
+    const T clamped = val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+                  const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T upper = 1;
+    const T lower = -1;
+    const T clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+                  const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const float upper = 6;
+    const float lower = 0;
+    const float clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const int32 val = static_cast<int32_t>(input_data[i]);
+    int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+                                                                         params.output_multiplier,
+                                                                         params.output_shift);
+    clamped = std::max(params.quantized_activation_min, clamped);
+    clamped = std::min(params.quantized_activation_max, clamped);
+    output_data[i] = static_cast<T>(clamped);
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  const T max_value = params.quantized_activation_max;
+  const T min_value = params.quantized_activation_min;
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+    output_data[i] = clamped;
+  }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+                                 const RuntimeShape &unswitched_input1_shape,
+                                 const uint8 *unswitched_input1_data,
+                                 const RuntimeShape &unswitched_input2_shape,
+                                 const uint8 *unswitched_input2_data,
+                                 const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ArithmeticParams switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+
+  const bool use_unswitched = unswitched_params.broadcast_category ==
+                              tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
+  const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+  // sections of the arrays.
+  uint8 *output_data_ptr = output_data;
+  const uint8 *input1_data_ptr = input1_data;
+  const uint8 *input2_data_reset = input2_data;
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  for (int i0 = 0; i0 < y0; ++i0)
+  {
+    const uint8 *input2_data_ptr;
+    for (int i1 = 0; i1 < y1; ++i1)
+    {
+      input2_data_ptr = input2_data_reset;
+      for (int i2 = 0; i2 < y2; ++i2)
+      {
+        for (int i3 = 0; i3 < y3; ++i3)
+        {
+          MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+          input2_data_ptr += y4;
+          output_data_ptr += y4;
+        }
+        input1_data_ptr += y4;
+      }
+    }
+    input2_data_reset = input2_data_ptr;
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16");
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    output_data[i] = unclamped_result.raw();
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+  int32 output_offset = params.output_offset;
+  int32 output_activation_min = params.quantized_activation_min;
+  int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+    int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+    clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+    output_data[i] = output_offset + clamped_result;
+  }
+}
+
+inline void Sub16(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                  const int16_t *input1_data, const RuntimeShape &input2_shape,
+                  const int16_t *input2_data, const RuntimeShape &output_shape,
+                  int16_t *output_data)
+{
+  ruy::profiler::ScopeLabel label("Sub/Int16");
+  const int input1_shift = params.input1_shift;
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+  const int16 output_activation_min = params.quantized_activation_min;
+  const int16 output_activation_max = params.quantized_activation_max;
+
+  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+  TFLITE_DCHECK_LE(input1_shift, 0);
+  TFLITE_DCHECK_LE(params.input2_shift, 0);
+  const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+  const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+  const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+  if (input1_shift == 0)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+  else
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+}
+
+template <typename Scalar>
+void Pack(const PackParams &params, const RuntimeShape *const *input_shapes,
+          const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Pack");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = params.axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < inputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      const Scalar *input_ptr = input_data[i] + copy_size * k;
+      int loc = k * inputs_count * copy_size + i * copy_size;
+      memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+            const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+  ruy::profiler::ScopeLabel label("Unpack");
+  const int dimensions = input_shape.DimensionsCount();
+  const int outputs_count = params.num_split;
+
+  int outer_size = 1;
+  int axis = params.axis;
+  if (axis < 0)
+  {
+    axis += dimensions;
+  }
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, dimensions);
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < outputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      Scalar *output_ptr = output_datas[i] + copy_size * k;
+      int loc = k * outputs_count * copy_size + i * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams &params, const RuntimeShape *const *input_shapes,
+                     const uint8 *const *input_data, const RuntimeShape &output_shape,
+                     uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("PackWithScaling");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  const int32 *input_zeropoint = params.input_zeropoint;
+  const float *input_scale = params.input_scale;
+  int inputs_count = params.inputs_count;
+  const int32 output_zeropoint = params.output_zeropoint;
+  const float output_scale = params.output_scale;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  Scalar *output_ptr = output_data;
+  const float inverse_output_scale = 1.f / output_scale;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+      {
+        memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+      }
+      else
+      {
+        assert(false);
+        const float scale = input_scale[i] * inverse_output_scale;
+        const float bias = -input_zeropoint[i] * scale;
+        auto input_ptr = input_data[i];
+        for (int j = 0; j < copy_size; ++j)
+        {
+          const int value =
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+          output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+        }
+      }
+      output_ptr += copy_size;
+    }
+  }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams &params, const RuntimeShape *const *input_shapes,
+                        const Scalar *const *input_data, const RuntimeShape &output_shape,
+                        Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("DepthConcatenation");
+  auto params_copy = params;
+  params_copy.axis = 3;
+  Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+                     const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+                     const float *prev_activ_data, const RuntimeShape &weights_shape,
+                     const float *weights_data, const RuntimeShape &unextended_bias_shape,
+                     const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+                     const float *prev_state_data,
+                     const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+                     const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+                     const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+                     const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+                                  output_state_shape, 0, output_activ_shape, 0);
+  const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+                                 output_state_shape, 1, output_activ_shape, 1);
+  const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+                                output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+  // Concatenate prev_activ and input data together
+  std::vector<float const *> concat_input_arrays_data;
+  std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+  concat_input_arrays_data.push_back(input_data);
+  concat_input_arrays_data.push_back(prev_activ_data);
+  concat_input_arrays_shapes.push_back(&input_shape);
+  concat_input_arrays_shapes.push_back(&prev_activ_shape);
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = concat_input_arrays_data.size();
+  Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+                concat_temp_shape, concat_temp_data);
+
+  // Fully connected
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+                 bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+  // Memory state update (the LSTM "guts")
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int w = 0; w < width; ++w)
+    {
+      for (int h = 0; h < height; ++h)
+      {
+        for (int c = 0; c < output_depth; ++c)
+        {
+          const float input_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+          const float new_input =
+            std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+          const float forget_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+          const float output_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+          const float new_state =
+            input_gate * new_input +
+            forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+            output_gate * std::tanh(new_state);
+        }
+      }
+    }
+  }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+//  - The input activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that is the natural interval for output
+//    activations (see next point) and these need to be concatenated together.
+//    We could accommodate different ranges by re-scaling, but we empirically
+//    found that setting the input activations range to be [-1, 127/128] in the
+//    first place, removing the need for re-scaling, greatly improves accuracy.
+//  - The output activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that the definition of a LSTM cell makes them
+//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+//    makes for simpler, more accurate fixed-point arithmetic.
+//  - The output-at-previous-timestep state array is obviously quantized as
+//    the output activations.
+//  - The internal LSTM memory (not the output-at-previous-timestep, the other
+//    internal state array) is int16-quantized and may use any power-of-two,
+//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+//    StateIntegerBits below, see the below discussion of that template
+//    parameter ("The StateIntegerBits template parameter").
+//  - The output of the internal fully-connected node is int16-quantized
+//    on the interval [-8, 8 * 32767/32768], the rationale for which is
+//    explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
+//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
+//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+         const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+         const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+         const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+         const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+         const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+         int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+         uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+         uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+         int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+  (void)gemmlowp_context; // only used in optimized code.
+  int32 weights_zero_point = params.weights_zero_point;
+  int32 accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  // Gather dimensions information, and perform consistency checks.
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+                                                 output_state_shape, output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+  const int fc_output_depth =
+    MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+  // Depth-concatenate prev_activ and input data together.
+  uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+  const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+                concat_temp_shape, concat_temp_data_uint8);
+
+  // Implementation of the fully connected node inside the LSTM cell.
+  // The operands are 8-bit integers, the accumulators are internally 32bit
+  // integers, and the output is 16-bit fixed-point with 3 integer bits so
+  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+  // is explained in the function comment above.
+  for (int b = 0; b < fc_batches; ++b)
+  {
+    for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+    {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum = bias_data_int32[out_c];
+      // Accumulation loop.
+      for (int d = 0; d < fc_accum_depth; ++d)
+      {
+        int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+        int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+        accum += input_val * weights_val;
+      }
+      // Down-scale the final int32 accumulator to the scale used by our
+      // (16-bit, using 3 integer bits) fixed-point format. The quantized
+      // multiplier and shift here have been pre-computed offline
+      // (e.g. by toco).
+      accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+      // Saturate, cast to int16, and store to the temporary activations array.
+      accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+    }
+  }
+
+  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+  // and muls, all done in 16-bit fixed-point.
+  for (int b = 0; b < outer_size; ++b)
+  {
+    for (int c = 0; c < output_depth; ++c)
+    {
+      // Define the fixed-point data types that we will use here. All use
+      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+      // They only differ by the number of integral vs. fractional bits,
+      // determining the range of values that they can represent.
+      //
+      // F0 uses 0 integer bits, range [-1, 1].
+      // This is the return type of math functions such as tanh, logistic,
+      // whose range is in [-1, 1].
+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+      // F3 uses 3 integer bits, range [-8, 8].
+      // This is the range of the previous fully-connected node's output,
+      // which is our input here.
+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+      // 2^StateIntegerBits]. It's used to represent the internal state, whose
+      // number of integer bits is currently dictated by the model. See comment
+      // on the StateIntegerBits template parameter above.
+      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+      // Implementation of input gate, using fixed-point logistic function.
+      F3 input_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+      // Implementation of input modulation gate, using fixed-point tanh
+      // function.
+      F3 input_modulation_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+      F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+      // Implementation of forget gate, using fixed-point logistic function.
+      F3 forget_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+      // Implementation of output gate, using fixed-point logistic function.
+      F3 output_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+      // Implementation of internal multiplication nodes, still in fixed-point.
+      F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+      FS prev_state_times_forget_state = forget_gate_output * prev_state;
+      // Implementation of internal addition node, saturating.
+      FS new_state =
+        gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+                                prev_state_times_forget_state);
+      // Implementation of last internal Tanh node, still in fixed-point.
+      // Since a Tanh fixed-point implementation is specialized for a given
+      // number or integer bits, and each specialization can have a substantial
+      // code size, and we already used above a Tanh on an input with 3 integer
+      // bits, and per the table in the above function comment there is no
+      // significant accuracy to be lost by clamping to [-8, +8] for a
+      // 3-integer-bits representation, let us just do that. This helps people
+      // porting this to targets where code footprint must be minimized.
+      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+      // Store the new internal state back to memory, as 16-bit integers.
+      // Note: here we store the original value with StateIntegerBits, not
+      // the rescaled 3-integer-bits value fed to tanh.
+      output_state_data_int16[b * output_depth + c] = new_state.raw();
+      // Down-scale the output activations to 8-bit integers, saturating,
+      // and store back to memory.
+      int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+      int16 clamped_output_activ =
+        std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+      output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+    }
+  }
+}
+
+template <typename Scalar>
+void Split(const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+           const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+  ruy::profiler::ScopeLabel label("Split");
+  const int split_dimensions = input_shape.DimensionsCount();
+  int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+  int outputs_count = params.num_split;
+  TFLITE_DCHECK_LT(axis, split_dimensions);
+
+  int64_t split_size = 0;
+  for (int i = 0; i < outputs_count; i++)
+  {
+    TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+    for (int j = 0; j < split_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        MatchingDim(*output_shapes[i], j, input_shape, j);
+      }
+    }
+    split_size += output_shapes[i]->Dims(axis);
+  }
+  TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  // For all output arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < split_dimensions; ++i)
+  {
+    base_inner_size *= input_shape.Dims(i);
+  }
+
+  const Scalar *input_ptr = input_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < outputs_count; ++i)
+    {
+      const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+      memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+      input_ptr += copy_size;
+    }
+  }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+  return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                                       const RuntimeShape &input_shape, const float *input_data,
+                                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int c = 0; c < depth; ++c)
+    {
+      const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+      const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+      float accum = 0.f;
+      for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+      {
+        const float input_val = input_data[i * depth + input_c];
+        accum += input_val * input_val;
+      }
+      const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+      output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+    }
+  }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = static_cast<float>(input_data[i]);
+  }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+                      const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("FakeQuant");
+  float rmin = op_params.minmax.min;
+  float rmax = op_params.minmax.max;
+  int num_bits = op_params.num_bits;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  TFLITE_DCHECK_LE(rmin, 0.0f);
+  TFLITE_DCHECK_GE(rmax, 0.0f);
+  TFLITE_DCHECK_LT(rmin, rmax);
+
+  // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+  int quant_min = 0;
+  int quant_max = (1 << num_bits) - 1;
+  float nudged_min, nudged_max, nudged_scale;
+  NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+  int n_slices;
+  int slice_size;
+  int indices_nd;
+  std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape,
+                                           const RuntimeShape &indices_shape)
+{
+  GatherNdHelperResult ret;
+  ret.n_slices = 1;
+  ret.slice_size = 1;
+  const int indices_dims = indices_shape.DimensionsCount();
+  ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+  const int params_dims = params_shape.DimensionsCount();
+  for (int i = 0; i < indices_dims - 1; ++i)
+  {
+    ret.n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = ret.indices_nd; i < params_dims; ++i)
+  {
+    ret.slice_size *= params_shape.Dims(i);
+  }
+
+  int remain_flat_size = params_shape.FlatSize();
+  ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+  for (int i = 0; i < ret.indices_nd; ++i)
+  {
+    ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+    remain_flat_size = ret.dims_to_count[i];
+  }
+
+  return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape &params_shape, const ParamsT *params_data,
+                     const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                     const RuntimeShape &output_shape, ParamsT *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNd");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+                sizeof(ParamsT) * res.slice_size);
+  }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape &params_shape, const TfLiteTensor *params_data,
+                           const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                           const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNdString");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  DynamicBuffer buffer;
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    for (int j = 0; j < res.slice_size; ++j)
+    {
+      buffer.AddString(GetString(params_data, from_pos + j));
+    }
+  }
+  buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                      const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+                      const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+  ruy::profiler::ScopeLabel label("ScatterNd");
+
+  int n_slices = 1;
+  int slice_size = 1;
+  const int outer_dims = indices_shape.DimensionsCount() - 1;
+  const int indices_nd = indices_shape.Dims(outer_dims);
+  const int updates_dims = updates_shape.DimensionsCount();
+  for (int i = 0; i < outer_dims; ++i)
+  {
+    n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = outer_dims; i < updates_dims; ++i)
+  {
+    slice_size *= updates_shape.Dims(i);
+  }
+
+  int output_flat_size = output_shape.FlatSize();
+  int remain_flat_size = output_flat_size;
+  std::vector<int> dims_to_count(indices_nd, 0);
+  for (int i = 0; i < indices_nd; ++i)
+  {
+    dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+    remain_flat_size = dims_to_count[i];
+  }
+
+  memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+  for (int i = 0; i < n_slices; ++i)
+  {
+    int to_pos = 0;
+    for (int j = 0; j < indices_nd; ++j)
+    {
+      IndicesT idx = indices_data[i * indices_nd + j];
+      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+      to_pos += idx * dims_to_count[j];
+    }
+    for (int j = 0; j < slice_size; j++)
+    {
+      output_data[to_pos + j] += updates_data[i * slice_size + j];
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+  TFLITE_DCHECK_LE(op_params.begin_count, 5);
+  TFLITE_DCHECK_LE(op_params.size_count, 5);
+  const int begin_count = op_params.begin_count;
+  const int size_count = op_params.size_count;
+  // We front-pad the begin and size vectors.
+  std::array<int, 5> start;
+  std::array<int, 5> stop;
+  for (int i = 0; i < 5; ++i)
+  {
+    int padded_i = 5 - i;
+    start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+    stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+                ? ext_shape.Dims(i)
+                : start[i] + op_params.size[size_count - padded_i];
+  }
+
+  for (int i0 = start[0]; i0 < stop[0]; ++i0)
+  {
+    for (int i1 = start[1]; i1 < stop[1]; ++i1)
+    {
+      for (int i2 = start[2]; i2 < stop[2]; ++i2)
+      {
+        for (int i3 = start[3]; i3 < stop[3]; ++i3)
+        {
+          for (int i4 = start[4]; i4 < stop[4]; ++i4)
+          {
+            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  SequentialTensorWriter<T> writer(input_data, output_data);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+  SequentialTensorWriter<T> writer(input, output);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto min_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto max_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+            const RuntimeShape &output_shape, T2 *output_data)
+{
+  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+                   const RuntimeShape &input2_shape, const T3 *input2_data,
+                   const RuntimeShape &output_shape, T2 *output_data)
+{
+  // Drop shape of second input: not needed.
+  ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+            const RuntimeShape &input_x_shape, const T *input_x_data,
+            const RuntimeShape &input_y_shape, const T *input_y_data,
+            const RuntimeShape &output_shape, T *output_data)
+{
+  int64_t flatsize;
+  // Allow select operator executions on mixed scalar tensors and one element
+  // tensors.
+  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+  {
+    flatsize = 1;
+  }
+  else
+  {
+    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+  }
+  for (int64_t i = 0; i < flatsize; ++i)
+  {
+    output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                   const RuntimeShape &input_x_shape, const T *input_x_data,
+                   const RuntimeShape &input_y_shape, const T *input_y_data,
+                   const RuntimeShape &output_shape, T *output_data)
+{
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  int64_t inner_size;
+  if (input_condition_shape.DimensionsCount() == 0)
+  {
+    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+  }
+  else
+  {
+    TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+    inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+  }
+
+  int64_t offset = 0;
+  for (int64_t i = 0; i < outer_size; i++)
+  {
+    const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+    offset += inner_size;
+  }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                           const RuntimeShape &input_x_shape, const T *input_x_data,
+                           const RuntimeShape &input_y_shape, const T *input_y_data,
+                           const RuntimeShape &output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+  const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+  NdArrayDesc<4> desc_condition;
+  NdArrayDesc<4> desc_x;
+  NdArrayDesc<4> desc_y;
+  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+                                      &desc_condition, &desc_x, &desc_y);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest
+  // stride, typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for
+  // the best cache behavior.
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+        {
+          const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+          const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+          const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+            input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+        }
+      }
+    }
+  }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                      T *output_data)
+{
+  const size_t size = input_condition_shape.FlatSize();
+  if (size == 0)
+  {
+    // Dimension is zero, in which case we don't need to output.
+    return;
+  }
+  const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+  std::vector<int> dims_to_count(cond_rank, 0);
+  int cur_flat_size = size;
+  for (int i = 0; i < cond_rank; ++i)
+  {
+    dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+    cur_flat_size = dims_to_count[i];
+  }
+
+  int output_index = 0;
+  for (int i = 0; i < size; ++i)
+  {
+    if (input_condition_data[i])
+    {
+      // Insert the coordinate of the current item (row major) into output.
+      int flat_index = i;
+      for (int j = 0; j < cond_rank; ++j)
+      {
+        int coord_j = flat_index / dims_to_count[j];
+        output_data[output_index * cond_rank + j] = coord_j;
+        flat_index %= dims_to_count[j];
+      }
+      output_index++;
+    }
+  }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+                          T default_value, bool value_is_scalar,
+                          const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  const int value_count = indices.size();
+
+  // First fill the output_data with default value.
+  const int num_elements = output_shape.FlatSize();
+  for (int i = 0; i < num_elements; ++i)
+  {
+    output_data[i] = default_value;
+  }
+
+  // Special handle for value is scalar case to avoid checking the boolean
+  // condition within the loop every time.
+  if (value_is_scalar)
+  {
+    for (int i = 0; i < value_count; ++i)
+    {
+      const std::vector<TI> &index = indices[i];
+      TFLITE_DCHECK_EQ(index.size(), 4);
+      const T value = *values; // just use the first value.
+      output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+    }
+    return;
+  }
+
+  // Go through the values and indices to fill the sparse values.
+  for (int i = 0; i < value_count; ++i)
+  {
+    const std::vector<TI> &index = indices[i];
+    TFLITE_DCHECK_EQ(index.size(), 4);
+    const T value = values[i];
+    output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+  }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+                const RuntimeShape &input2_shape, const T *input2_data,
+                const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = std::pow(input1_data[i], input2_data[i]);
+  }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+                               const RuntimeShape &unextended_input2_shape, const T *input2_data,
+                               const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < output_shape.Dims(3); ++c)
+        {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = std::pow(in1_val, in2_val);
+        }
+      }
+    }
+  }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+             const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Reverse");
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_axis = input_shape.Dims(axis);
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int j = 0; j < dims_at_axis; ++j)
+    {
+      const int start_pos = (i * dims_at_axis + j) * copy_size;
+      Scalar *output_ptr = output_data + start_pos;
+      int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+                     const RuntimeShape &input_shape, const Scalar *input_data,
+                     const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("ReverseSequence");
+
+  int outer_size = 1;
+  int outer_dim = std::min(batch_dim, seq_dim);
+  int medium_dim = std::max(batch_dim, seq_dim);
+  for (int i = 0; i < outer_dim; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int medium_size = 1;
+  for (int i = outer_dim + 1; i < medium_dim; ++i)
+  {
+    medium_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+  const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+  Scalar *output_ptr;
+  if (batch_dim > seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            int sl = seq_lengths[q] - 1;
+            if (j > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+  else if (batch_dim < seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        int sl = seq_lengths[j] - 1;
+        const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            if (q > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+                       const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+                       const RuntimeShape &output_shape, T *output_data)
+{
+  const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+  memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+  for (int i = 0; i < input_shape.Dims(0); i++)
+  {
+    int output_index = segment_ids_data[i];
+    for (int j = 0; j < segment_flat_size; ++j)
+    {
+      output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+    }
+  }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/onert-micro/luci-interpreter/pal/cmsisnn/pal.cmake b/onert-micro/luci-interpreter/pal/cmsisnn/pal.cmake
new file mode 100644 (file)
index 0000000..5110474
--- /dev/null
@@ -0,0 +1,83 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 REQUIRED)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 REQUIRED)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 REQUIRED)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 REQUIRED)
+    nnas_find_package(CMSIS-NN EXACT 4.0.0 REQUIRED)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    if (NOT CMSISSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+        return()
+    endif ()
+
+    if (NOT CMSIS_NNSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: CMSIS-NN not found")
+        return()
+    endif ()
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    file(GLOB_RECURSE PAL_SOURCES "${CMSIS_NNSource_DIR}/Source/ActivationFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/BasicMathFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/ConcatenationFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/ConvolutionFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/FullyConnectedFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/LSTMFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/NNSupportFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/PoolingFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/ReshapeFunctions/*.c"
+            "${CMSIS_NNSource_DIR}/Source/SoftmaxFunctions/*.c")
+
+    list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+    set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+            "${CMSIS_NNSource_DIR}"
+    )
+
+    set(CMSIS_PATH ${CMSISSource_DIR} CACHE INTERNAL "CMSIS_PATH")
+    add_subdirectory(${CMSIS_NNSource_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+
+    target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
+            "${CMSISSource_DIR}/CMSIS/DSP/Include"
+            "${CMSISSource_DIR}/CMSIS/Core/Include"
+            "${CMSIS_NNSource_DIR}/Include")
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
diff --git a/onert-micro/luci-interpreter/pal/linux/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/linux/KernelsToBuild.lst
new file mode 100644 (file)
index 0000000..1f4ccb3
--- /dev/null
@@ -0,0 +1,77 @@
+REGISTER_KERNEL(ADD, Add)
+REGISTER_KERNEL(ARG_MAX, ArgMax)
+REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
+REGISTER_KERNEL(BATCH_MATMUL, BatchMatMul)
+REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)
+REGISTER_KERNEL(CAST, Cast)
+REGISTER_KERNEL(CONCATENATION, Concatenation)
+REGISTER_KERNEL(CONV_2D, Conv2D)
+REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)
+REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)
+REGISTER_KERNEL(DEQUANTIZE, Dequantize)
+REGISTER_KERNEL(DIV, Div)
+REGISTER_KERNEL(ELU, Elu)
+REGISTER_KERNEL(EXP, Exp)
+REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
+REGISTER_KERNEL(FILL, Fill)
+REGISTER_KERNEL(FLOOR, Floor)
+REGISTER_KERNEL(FLOOR_DIV, FloorDiv)
+REGISTER_KERNEL(EQUAL, Equal)
+REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
+REGISTER_KERNEL(GATHER, Gather)
+REGISTER_KERNEL(GREATER, Greater)
+REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)
+REGISTER_KERNEL(IF, If)
+REGISTER_KERNEL(INSTANCE_NORM, InstanceNorm)
+REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)
+REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)
+REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)
+REGISTER_KERNEL(LESS, Less)
+REGISTER_KERNEL(LESS_EQUAL, LessEqual)
+REGISTER_KERNEL(LOCAL_RESPONSE_NORMALIZATION, LocalResponseNormalization)
+REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)
+REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)
+REGISTER_KERNEL(LOGICAL_OR, LogicalOr)
+REGISTER_KERNEL(LOGISTIC, Logistic)
+REGISTER_KERNEL(LOG_SOFTMAX, LogSoftmax)
+REGISTER_KERNEL(MAXIMUM, Maximum)
+REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
+REGISTER_KERNEL(MEAN, Mean)
+REGISTER_KERNEL(MINIMUM, Minimum)
+REGISTER_KERNEL(MIRROR_PAD, MirrorPad)
+REGISTER_KERNEL(MUL, Mul)
+REGISTER_KERNEL(NEG, Neg)
+REGISTER_KERNEL(NOT_EQUAL, NotEqual)
+REGISTER_KERNEL(ONE_HOT, OneHot)
+REGISTER_KERNEL(PACK, Pack)
+REGISTER_KERNEL(PAD, Pad)
+REGISTER_KERNEL(PADV2, PadV2)
+REGISTER_KERNEL(POW, Pow)
+REGISTER_KERNEL(PRELU, PRelu)
+REGISTER_KERNEL(QUANTIZE, Quantize)
+REGISTER_KERNEL(RELU, Relu)
+REGISTER_KERNEL(RELU6, Relu6)
+REGISTER_KERNEL(RESHAPE, Reshape)
+REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)
+REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)
+REGISTER_KERNEL(REVERSE_V2, ReverseV2)
+REGISTER_KERNEL(RSQRT, Rsqrt)
+REGISTER_KERNEL(SHAPE, Shape)
+REGISTER_KERNEL(SLICE, Slice)
+REGISTER_KERNEL(SOFTMAX, Softmax)
+REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)
+REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)
+REGISTER_KERNEL(SPLIT, Split)
+REGISTER_KERNEL(SPLIT_V, SplitV)
+REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)
+REGISTER_KERNEL(SQRT, Sqrt)
+REGISTER_KERNEL(SQUARE, Square)
+REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)
+REGISTER_KERNEL(SQUEEZE, Squeeze)
+REGISTER_KERNEL(SUB, Sub)
+REGISTER_KERNEL(SVDF, SVDF)
+REGISTER_KERNEL(TANH, Tanh)
+REGISTER_KERNEL(TRANSPOSE, Transpose)
+REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)
+REGISTER_KERNEL(UNPACK, Unpack)
+REGISTER_KERNEL(WHILE, While)
diff --git a/onert-micro/luci-interpreter/pal/linux/PALFill.h b/onert-micro/luci-interpreter/pal/linux/PALFill.h
new file mode 100644 (file)
index 0000000..4ef1af2
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FILL_H
+#define LUCI_INTERPRETER_PAL_FILL_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+#endif // LUCI_INTERPRETER_PAL_FILL_H
diff --git a/onert-micro/luci-interpreter/pal/linux/PALSVDF.h b/onert-micro/luci-interpreter/pal/linux/PALSVDF.h
new file mode 100644 (file)
index 0000000..101d045
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                         weight_feature_data, weight_time_shape, weight_time_data,
+                                         bias_shape, bias_data, activation_state_data, output_shape,
+                                         output_data, scratchpad_data, output_temp_data, scale_1_a,
+                                         scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                       weight_feature_data, weight_time_shape, weight_time_data,
+                                       bias_shape, bias_data, scratchpad_data,
+                                       activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == luci_interpreter::DataType::FLOAT32 &&
+      (weight_feature_data_type == luci_interpreter::DataType::S8 ||
+       weight_feature_data_type == luci_interpreter::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    assert(false && "Hybrid type is not currently supported for linux platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == luci_interpreter::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/onert-micro/luci-interpreter/pal/linux/pal.cmake b/onert-micro/luci-interpreter/pal/linux/pal.cmake
new file mode 100644 (file)
index 0000000..c373b96
--- /dev/null
@@ -0,0 +1,82 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    find_package(Threads REQUIRED)
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    # TODO put it back, I changed my mind.
+    # instead add sources with visitors in this library
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+    if(BUILD_ARM32_NEON)
+        # NOTE may need to revise this list for version upgrade
+        set(PAL_SOURCES ${PAL_SOURCES}
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+                ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+                ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+                ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+                ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+                ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+                ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+                ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+                ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+                ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+                ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+                ${TensorFlowRuySource_DIR}/ruy/tune.cc
+                ${TensorFlowRuySource_DIR}/ruy/wait.cc
+                ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+                )
+    endif(BUILD_ARM32_NEON)
+
+    add_library(luci_interpreter_linux_pal_micro STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_linux_pal_micro PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_linux_pal_micro SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal_micro)
+endmacro()
diff --git a/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/onert-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
new file mode 100644 (file)
index 0000000..9203973
--- /dev/null
@@ -0,0 +1,9 @@
+REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
+REGISTER_KERNEL(CONV_2D, Conv2D)
+REGISTER_KERNEL(LOGISTIC, Logistic)
+REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
+REGISTER_KERNEL(RESHAPE, Reshape)
+REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
+REGISTER_KERNEL(CONCATENATION, Concatenation)
+REGISTER_KERNEL(SOFTMAX, Softmax)
+REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALApplyActivationToVector.h b/onert-micro/luci-interpreter/pal/mcu/PALApplyActivationToVector.h
new file mode 100644 (file)
index 0000000..55076c2
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_APPLY_ACTIVATION_TO_VECTOR_H
+#define LUCI_INTERPRETER_PAL_APPLY_ACTIVATION_TO_VECTOR_H
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+
+namespace luci_interpreter_pal
+{
+
+// Dynamic (non-fused) activation functor. perhaps it is worth having
+// template instantiation?
+// TODO(aselle): Make this more efficient by pulling the switch to conv_eval
+// using template inlining.
+class ActivationFunctor
+{
+public:
+  explicit ActivationFunctor(TfLiteFusedActivation act) : act_(act) {}
+
+  float operator()(float a) const
+  {
+    switch (act_)
+    {
+      case kTfLiteActNone:
+        return a;
+      case kTfLiteActRelu:
+        return a < 0.f ? 0.f : a;
+      case kTfLiteActRelu6:
+        return std::max(0.f, std::min(a, 6.f));
+      case kTfLiteActTanh:
+        return std::tanh(a);
+      case kTfLiteActSigmoid:
+        return 1.0f / (1.0f + std::exp(-a));
+      default:
+        assert(false && "Activation functor is not supported");
+    }
+  }
+
+private:
+  TfLiteFusedActivation act_;
+};
+
+inline void ApplyActivationToVector(const float *vector, int v_size,
+                                    TfLiteFusedActivation activation, float *result)
+{
+  auto activation_func = ActivationFunctor(activation);
+  for (int v = 0; v < v_size; v++)
+  {
+    *result++ = (activation_func)(*vector++);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_APPLY_ACTIVATION_TO_VECTOR_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/onert-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
new file mode 100644 (file)
index 0000000..acfd8b7
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32_t *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32_t *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALConv2d.h b/onert-micro/luci-interpreter/pal/mcu/PALConv2d.h
new file mode 100644 (file)
index 0000000..31fd7f2
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8_t *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8_t *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8_t *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void
+ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult, const int32_t *shifts,
+               const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+               const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+               const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+               const tflite::RuntimeShape &output_shape, int8_t *output_data,
+               const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  (void)input_data_type;
+  (void)params;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+  (void)scratchpad;
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALDequantize.h b/onert-micro/luci-interpreter/pal/mcu/PALDequantize.h
new file mode 100644 (file)
index 0000000..efa6b16
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALFill.h b/onert-micro/luci-interpreter/pal/mcu/PALFill.h
new file mode 100644 (file)
index 0000000..1448b0c
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FILL_H
+#define LUCI_INTERPRETER_PAL_FILL_H
+
+#include "PALreference_ops.h"
+
+#endif // LUCI_INTERPRETER_PAL_FILL_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALQuantize.h b/onert-micro/luci-interpreter/pal/mcu/PALQuantize.h
new file mode 100644 (file)
index 0000000..effb85d
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h b/onert-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h
new file mode 100644 (file)
index 0000000..4db7fa8
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32_t *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/onert-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
new file mode 100644 (file)
index 0000000..06c597d
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape,
+                      const int32_t *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALSVDF.h b/onert-micro/luci-interpreter/pal/mcu/PALSVDF.h
new file mode 100644 (file)
index 0000000..d39a2f1
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int n_rank = params.rank;
+  const int n_batch = input_shape.Dims(0);
+  const int n_input = input_shape.Dims(1);
+  const int n_filter = weight_feature_shape.Dims(0);
+  const int n_unit = n_filter / n_rank;
+  const int n_memory = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    int16_t *new_state_start = activation_state_data;
+    const int16_t *old_state_start = activation_state_data + 1;
+    const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Feature matmul.
+  {
+    const int32_t output_max = std::numeric_limits<int16_t>::max();
+    const int32_t output_min = std::numeric_limits<int16_t>::min();
+    int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+    for (int b = 0; b < n_batch; b++)
+    {
+      const int8_t *matrix_ptr = weight_feature_data;
+      for (int r = 0; r < n_filter; r++)
+      {
+        int32_t dot_prod = 0;
+        const int8_t *vector_in_batch = input_data + b * n_input;
+        for (int c = 0; c < n_input; c++)
+        {
+          dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+        }
+        dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+        dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+        // This assumes state is symmetrically quantized. Otherwise last bit of
+        // state should be initialized to its zero point and accumulate the
+        // dot_prod.
+        // Equivalent as the following:
+        //     result_in_batch = zero point, which happens to be zero.
+        //     result_in_batch += dot_prod_56.
+        *result_in_batch = dot_prod;
+        result_in_batch += n_memory;
+      }
+    }
+  }
+
+  // Time.
+  {
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Perform batched vector dot product:
+      const int16_t *vector1_ptr = weight_time_data;
+      const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+      for (int i = 0; i < n_filter; i++)
+      {
+        *scratch_ptr_batch = 0;
+        for (int j = 0; j < n_memory; j++)
+        {
+          *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+        }
+        scratch_ptr_batch++;
+      }
+    }
+  }
+
+  // Reduce, add bias, rescale, activation.
+  {
+    // Add bias.
+    if (bias_data)
+    {
+      // Vector batch assign:
+      for (int i = 0; i < n_batch; ++i)
+      {
+        int32_t *output_ptr = output_temp_data + i * n_unit;
+        const int32_t *bias_ptr = bias_data;
+        for (int j = 0; j < n_unit; ++j)
+        {
+          *output_ptr++ = *bias_ptr++;
+        }
+      }
+    }
+    else
+    {
+      int32_t *output_ptr = output_temp_data;
+      for (int i = 0; i < n_batch * n_unit; ++i)
+      {
+        *output_ptr++ = 0;
+      }
+    }
+
+    // Reduce.
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Reduction sum vector
+      for (int i = 0; i < n_unit; ++i)
+      {
+        for (int j = 0; j < n_rank; ++j)
+        {
+          output_temp_ptr[i] += *scratch_ptr_batch++;
+        }
+      }
+    }
+
+    // Rescale.
+    const int32_t output_max = std::numeric_limits<int8_t>::max();
+    const int32_t output_min = std::numeric_limits<int8_t>::min();
+    for (int i = 0; i < n_batch * n_unit; ++i)
+    {
+      int32_t x1 = output_temp_data[i];
+      int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+      int32_t x3 = x2 + output_zp;
+      int32_t x4 = std::min(std::max(output_min, x3), output_max);
+      output_data[i] = static_cast<int8_t>(x4);
+    }
+  }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == luci_interpreter::DataType::FLOAT32 &&
+      (weight_feature_data_type == luci_interpreter::DataType::S8 ||
+       weight_feature_data_type == luci_interpreter::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    assert(false && "Hybrid type is not currently supported for mcu platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == luci_interpreter::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALSoftmax.h b/onert-micro/luci-interpreter/pal/mcu/PALSoftmax.h
new file mode 100644 (file)
index 0000000..1c4c64d
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // TODO Impl it
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // TODO Impl it
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/onert-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
new file mode 100644 (file)
index 0000000..9f7c54e
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32_t *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32_t *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALUnidirectionalSequenceLSTM.h b/onert-micro/luci-interpreter/pal/mcu/PALUnidirectionalSequenceLSTM.h
new file mode 100644 (file)
index 0000000..287f303
--- /dev/null
@@ -0,0 +1,678 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+
+namespace luci_interpreter_pal
+{
+namespace lstm_internal
+{
+namespace
+{
+// Possible fused activation functions.
+typedef enum
+{
+  kTfLiteActNone = 0,
+  kTfLiteActRelu,
+  kTfLiteActReluN1To1, // min(max(-1, x), 1)
+  kTfLiteActRelu6,     // min(max(0, x), 6)
+  kTfLiteActTanh,
+  kTfLiteActSignBit,
+  kTfLiteActSigmoid,
+} TfLiteFusedActivation;
+
+} // namespace
+
+template <typename T>
+inline T activationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
+{
+  using std::max;
+  using std::min;
+  return min(max(x, output_activation_min), output_activation_max);
+}
+
+template <typename T>
+inline void mul(const luci_interpreter::lstm::ArithmeticParams *params,
+                const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  T output_activation_min = params->quantized_activation_min;
+  T output_activation_max = params->quantized_activation_max;
+
+  const int flat_size = input1_shape.FlatSize();
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = activationFunctionWithMinMax(input1_data[i] * input2_data[i],
+                                                  output_activation_min, output_activation_max);
+  }
+}
+
+#ifndef DIS_QUANT
+inline int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
+{
+  using gemmlowp::RoundingDivideByPOT;
+  using gemmlowp::SaturatingRoundingDoublingHighMul;
+  int left_shift = shift > 0 ? shift : 0;
+  int right_shift = shift > 0 ? 0 : -shift;
+  return RoundingDivideByPOT(
+    SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+}
+
+template <typename InputType, typename WeightType, typename OutputType, typename BiasType>
+void fullyConnectedInteger(const tflite::FullyConnectedParams &params,
+                           const tflite::RuntimeShape &input_shape, const InputType *input_data,
+                           const tflite::RuntimeShape &filter_shape, const WeightType *filter_data,
+                           const tflite::RuntimeShape &bias_shape, const BiasType *bias_data,
+                           const tflite::RuntimeShape &output_shape, OutputType *output_data)
+{
+  const int32_t input_offset = params.input_offset;
+  const int32_t filter_offset = params.weights_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = output_shape.Dims(output_dim_count - 1);
+  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int out_c = 0; out_c < output_depth; ++out_c)
+    {
+      BiasType acc = 0;
+      for (int d = 0; d < accum_depth; ++d)
+      {
+        int32_t input_val = input_data[b * accum_depth + d];
+        int32_t filter_val = filter_data[out_c * accum_depth + d];
+        acc += (filter_val + filter_offset) * (input_val + input_offset);
+      }
+      if (bias_data)
+      {
+        acc += bias_data[out_c];
+      }
+      int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+      acc_scaled += output_offset;
+      acc_scaled = std::max(acc_scaled, output_activation_min);
+      acc_scaled = std::min(acc_scaled, output_activation_max);
+      output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
+    }
+  }
+}
+
+void fullyConnected(const tflite::FullyConnectedParams &params,
+                    const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                    const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                    const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                    const tflite::RuntimeShape &output_shape, int16_t *output_data)
+{
+  return fullyConnectedInteger(params, input_shape, input_data, filter_shape, filter_data,
+                               bias_shape, bias_data, output_shape, output_data);
+}
+
+void fullyConnected(const tflite::FullyConnectedParams &params,
+                    const tflite::RuntimeShape &input_shape, const int16_t *input_data,
+                    const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                    const tflite::RuntimeShape &bias_shape, const int64_t *bias_data,
+                    const tflite::RuntimeShape &output_shape, int16_t *output_data)
+{
+  return fullyConnectedInteger(params, input_shape, input_data, filter_shape, filter_data,
+                               bias_shape, bias_data, output_shape, output_data);
+}
+
+template <typename InputType, typename OutputType>
+void mulElementwise(int size, const luci_interpreter::lstm::ArithmeticParams *params,
+                    const InputType *input1_data, const InputType *input2_data,
+                    OutputType *output_data)
+{
+  for (int i = 0; i < size; ++i)
+  {
+    const int32_t input1_val = params->input1_offset + input1_data[i];
+    const int32_t input2_val = params->input2_offset + input2_data[i];
+    const int32_t unclamped_result =
+      params->output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                            params->output_multiplier,
+                                                            params->output_shift);
+    const int32_t clamped_output =
+      std::min(params->quantized_activation_max,
+               std::max(params->quantized_activation_min, unclamped_result));
+    output_data[i] = static_cast<OutputType>(clamped_output);
+  }
+}
+
+// Input and output have the same shape in LSTM
+void mul(const tflite::RuntimeShape &shape, const luci_interpreter::lstm::ArithmeticParams *params,
+         const int16_t *input1_data, const int16_t *input2_data, int8_t *output_data)
+{
+  return mulElementwise<int16_t, int8_t>(shape.FlatSize(), params, input1_data, input2_data,
+                                         output_data);
+}
+
+// Input and output have the same shape in LSTM
+void mul(const tflite::RuntimeShape &shape, const luci_interpreter::lstm::ArithmeticParams *params,
+         const int16_t *input1_data, const int16_t *input2_data, int16_t *output_data)
+{
+  return mulElementwise(shape.FlatSize(), params, input1_data, input2_data, output_data);
+}
+
+void addElementWise(const int16_t *input_1, const int16_t *input_2, int n_batch, int n_input,
+                    int16_t *output)
+{
+  for (int batch = 0; batch < n_batch; ++batch)
+  {
+    for (int i = 0; i < n_input; ++i)
+    {
+      const int index = batch * n_input + i;
+      int32_t sum = input_1[index] + input_2[index];
+      const int32_t sum_clamped =
+        std::min(static_cast<int32_t>(std::numeric_limits<int16_t>::max()),
+                 std::max(static_cast<int32_t>(std::numeric_limits<int16_t>::min()), sum));
+      output[index] = static_cast<int16_t>(sum_clamped);
+    }
+  }
+}
+
+void tanh(int32_t cell_state_scale_power, const tflite::RuntimeShape &input_data_shape,
+          int16_t *input_data, const tflite::RuntimeShape &output_data_shape, int16_t *output_data)
+{
+  int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3;
+  int32_t input_multiplier = 0;
+  if (tanh_input_left_shift < 0) /* handling negative shift value */
+  {
+    tanh_input_left_shift = -tanh_input_left_shift;
+    input_multiplier = 3;
+  }
+  tflite::reference_integer_ops::Tanh(input_multiplier, tanh_input_left_shift, input_data_shape,
+                                      input_data, output_data_shape, output_data);
+}
+
+void sigmoid(const tflite::RuntimeShape &data_shape, int16_t *data)
+{
+  tflite::reference_integer_ops::Logistic(0 /*data->input_multiplier*/,
+                                          0 /*data->input_left_shift */,
+                                          data_shape.FlatSize() /*NumElements(input->dims)*/,
+                                          data /* tflite::micro::GetTensorData<int16_t>(input) */,
+                                          data /*tflite::micro::GetTensorData<int16_t>(output) */);
+}
+
+void clipping(const int v_size, const luci_interpreter::lstm::CellStateInfo *cell_state_info,
+              int16_t *vector)
+{
+  for (int i = 0; i < v_size; i++)
+  {
+    vector[i] = std::max(std::min(cell_state_info->quantized_cell_clip, vector[i]),
+                         static_cast<int16_t>(-cell_state_info->quantized_cell_clip));
+  }
+}
+#endif // DIS_QUANT
+
+#ifndef DIS_FLOAT
+void fullyConnected(const tflite::FullyConnectedParams &params,
+                    const tflite::RuntimeShape &input_shape, const float *input_data,
+                    const tflite::RuntimeShape &filter_shape, const float *filter_data,
+                    const tflite::RuntimeShape &bias_shape, const float *bias_data,
+                    const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  return tflite::reference_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                               filter_data, bias_shape, bias_data, output_shape,
+                                               output_data);
+}
+
+// Input and output have the same shape in LSTM
+void mul(const tflite::RuntimeShape &shape, const luci_interpreter::lstm::ArithmeticParams *params,
+         const float *input1_data, const float *input2_data, float *output_data)
+{
+  return mul(params, shape, input1_data, shape, input2_data, shape, output_data);
+}
+
+void addElementWise(const float *input_1, const float *input_2, int n_batch, int n_input,
+                    float *output)
+{
+  for (int batch = 0; batch < n_batch; ++batch)
+  {
+    for (int i = 0; i < n_input; ++i)
+    {
+      const int index = batch * n_input + i;
+      output[index] = input_1[index] + input_2[index];
+    }
+  }
+}
+
+void tanh(int32_t cell_state_scale_power, const tflite::RuntimeShape &input_data_shape,
+          float *input_data, const tflite::RuntimeShape &output_data_shape, float *output_data)
+{
+  tflite::reference_ops::Tanh(input_data_shape, input_data, output_data_shape, output_data);
+}
+
+void sigmoid(const tflite::RuntimeShape &data_shape, float *data)
+{
+  tflite::reference_ops::Logistic(data_shape, data, data_shape, data);
+}
+
+void clipping(const int v_size, const luci_interpreter::lstm::CellStateInfo *cell_state_info,
+              float *vector)
+{
+  for (int i = 0; i < v_size; i++)
+  {
+    vector[i] =
+      std::max(std::min(cell_state_info->cell_clip, vector[i]), -cell_state_info->cell_clip);
+  }
+}
+#endif // DIS_FLOAT
+
+// Size information about the LSTM kernel, which is deduced from tensors stored
+// in the flat buffer file.
+struct LstmSizeInfo
+{
+  bool time_major;
+  int32_t batch_size;
+  int32_t time_steps;
+  int32_t input_dimension;
+  int32_t state_dimension;
+};
+
+class LstmStepManager
+{
+public:
+  LstmStepManager() = delete;
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  explicit LstmStepManager(const LstmSizeInfo &size_info) : size_info_(size_info) {}
+
+  void updateTime()
+  {
+    current_time_ += 1;
+    // default as one batch per inference
+    int input_step = size_info_.input_dimension;
+    int output_step = size_info_.state_dimension;
+    // time major: batch inference
+    if (size_info_.time_major)
+    {
+      input_step = input_step * size_info_.batch_size;
+      output_step = output_step * size_info_.batch_size;
+    }
+
+    input_offset_ += input_step;
+    output_offset_ += output_step;
+  }
+
+  void updateBatch()
+  {
+    current_batch_ += 1;
+    TFLITE_DCHECK_LE(current_batch_, size_info_.batch_size);
+    // batch inference for time major: no action needed
+    if (size_info_.time_major)
+    {
+      return;
+    }
+    // otherwise: singe batch inference, go to the next batch
+    hidden_state_offset_ += size_info_.state_dimension;
+    cell_state_offset_ += size_info_.state_dimension;
+  }
+
+  void resetTime() { current_time_ = 0; }
+
+  tflite::RuntimeShape inputShape() const
+  {
+    int batch_size = 1;
+    if (size_info_.time_major)
+    {
+      batch_size = size_info_.batch_size;
+    }
+    const int dims[2] = {batch_size, size_info_.input_dimension};
+    const int32_t *dims_data = reinterpret_cast<const int32_t *>(dims);
+    return tflite::RuntimeShape(2, dims_data);
+  }
+
+  tflite::RuntimeShape stateShape() const
+  {
+    int batch_size = 1;
+    if (size_info_.time_major)
+    {
+      batch_size = size_info_.batch_size;
+    }
+    const int dims[2] = {batch_size, size_info_.state_dimension};
+    const int32_t *dims_data = reinterpret_cast<const int32_t *>(dims);
+    return tflite::RuntimeShape(2, dims_data);
+  }
+
+  int inputOffset() const { return input_offset_; }
+
+  int outputOffset() const { return output_offset_; }
+
+  int hiddenStateOffset() const { return hidden_state_offset_; }
+
+  int cellStateOffset() const { return cell_state_offset_; }
+
+private:
+  int32_t current_time_ = 0;
+  int32_t current_batch_ = 0;
+  int32_t input_offset_ = 0;
+  int32_t output_offset_ = 0;
+  int32_t hidden_state_offset_ = 0;
+  int32_t cell_state_offset_ = 0;
+
+  const LstmSizeInfo &size_info_;
+};
+
+// Calculates a single LSTM gate.
+// Implements the following formula:
+//   gate = activate(FC(input) + FC(recurrent))
+// Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void calculateLstmGate(const LstmStepManager *step_info,
+                       const luci_interpreter::lstm::GateParameters *gate_params,
+                       // Input FC
+                       ActivationType *input_data, const circle::Tensor *input_weight,
+                       const circle::Tensor *input_bias,
+                       // Recurrent FC
+                       ActivationType *recurrent_data, const circle::Tensor *recurrent_weight,
+                       const circle::Tensor *recurrent_bias,
+                       // Output
+                       CellType *gate_output,
+                       // Scratch arrays
+                       CellType *fc_output_buffer, const TfLiteFusedActivation activation,
+                       luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+  // Input FC
+  const auto gate_output_shape = step_info->stateShape();
+  {
+    tflite::FullyConnectedParams op_params{};
+    op_params.input_offset = gate_params->input_fc_params.input_offset;
+    op_params.weights_offset = gate_params->input_fc_params.weights_offset;
+    op_params.output_offset = gate_params->input_fc_params.output_offset;
+    op_params.output_multiplier = gate_params->input_fc_params.output_multiplier;
+    op_params.output_shift = gate_params->input_fc_params.output_shift;
+    op_params.quantized_activation_min = gate_params->input_fc_params.quantized_activation_min;
+    op_params.quantized_activation_max = gate_params->input_fc_params.quantized_activation_max;
+    op_params.float_activation_max = gate_params->input_fc_params.float_activation_max;
+    op_params.float_activation_min = gate_params->input_fc_params.float_activation_min;
+
+    fullyConnected(op_params, step_info->inputShape(), input_data + step_info->inputOffset(),
+                   luci_interpreter::kernels::getTensorShape(input_weight),
+                   luci_interpreter::kernels::getTensorData<WeightType>(
+                     runtime_graph->getConstDataByTensor(input_weight)),
+                   luci_interpreter::kernels::getTensorShape(input_bias),
+                   luci_interpreter::kernels::getTensorData<BiasType>(
+                     runtime_graph->getConstDataByTensor(input_bias)),
+                   gate_output_shape, gate_output);
+  }
+
+  // Recurrent FC
+  {
+    tflite::FullyConnectedParams op_params{};
+    op_params.input_offset = gate_params->recurrent_fc_params.input_offset;
+    op_params.weights_offset = gate_params->recurrent_fc_params.weights_offset;
+    op_params.output_offset = gate_params->recurrent_fc_params.output_offset;
+    op_params.output_multiplier = gate_params->recurrent_fc_params.output_multiplier;
+    op_params.output_shift = gate_params->recurrent_fc_params.output_shift;
+    op_params.quantized_activation_min = gate_params->recurrent_fc_params.quantized_activation_min;
+    op_params.quantized_activation_max = gate_params->recurrent_fc_params.quantized_activation_max;
+    op_params.float_activation_max = gate_params->recurrent_fc_params.float_activation_max;
+    op_params.float_activation_min = gate_params->recurrent_fc_params.float_activation_min;
+
+    fullyConnected(op_params, step_info->stateShape(),
+                   recurrent_data + step_info->hiddenStateOffset(),
+                   luci_interpreter::kernels::getTensorShape(recurrent_weight),
+                   luci_interpreter::kernels::getTensorData<WeightType>(
+                     runtime_graph->getConstDataByTensor(recurrent_weight)),
+                   luci_interpreter::kernels::getTensorShape(recurrent_bias),
+                   luci_interpreter::kernels::getTensorData<BiasType>(
+                     runtime_graph->getConstDataByTensor(recurrent_bias)),
+                   gate_output_shape, fc_output_buffer);
+
+    addElementWise(gate_output, fc_output_buffer, /*n_batch=*/gate_output_shape.DimsData()[0],
+                   /*n_state=*/gate_output_shape.DimsData()[1], gate_output);
+
+    switch (activation)
+    {
+      case TfLiteFusedActivation::kTfLiteActSigmoid:
+        sigmoid(gate_output_shape, gate_output);
+        break;
+      case TfLiteFusedActivation::kTfLiteActTanh:
+      {
+        // Set the scale power to -12 to avoid shift
+        tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output, gate_output_shape,
+             gate_output);
+      }
+      break;
+      default:
+        // Only Sigmoid or Tanh is used.
+        assert(false && "Only Sigmoid or Tanh is used");
+    }
+  }
+}
+
+// Update the hidden state of the LSTM kernel using the following formula:
+// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means
+// element wise multiplication
+template <typename CellType, typename ActivationType>
+void updateLstmHidden(const LstmStepManager *step_info, CellType *cell_state_data_base,
+                      ActivationType *hidden_state_data, const CellType *output_gate_output,
+                      const luci_interpreter::lstm::ArithmeticParams *mul_params,
+                      int32_t cell_state_scale_power, CellType *buffer)
+{
+  auto cell_state_shape = step_info->stateShape();
+  CellType *cell_state_data = cell_state_data_base + step_info->cellStateOffset();
+  // Tanh(cell_state)
+  tanh(cell_state_scale_power, cell_state_shape, cell_state_data, cell_state_shape, buffer);
+  // Update the hidden state
+  mul(cell_state_shape, mul_params, buffer, output_gate_output,
+      hidden_state_data + step_info->hiddenStateOffset());
+}
+
+// Update the cell state using the output from the forget gate, input gate, and
+// cell gate Formula: updated_cell_state = forget_gate_output*cell_state +
+// input_gate_output * cell_gate_output, where * denotes element wise
+// multiplication
+template <typename CellType>
+void updateLstmCell(const LstmStepManager *step_info, CellType *cell_state_data,
+                    // Gate outputs
+                    CellType *forget_gate_output, const CellType *input_gate_output,
+                    const CellType *cell_gate_output,
+                    // Mul parameters
+                    const luci_interpreter::lstm::ArithmeticParams &forget_cell_mul_params,
+                    const luci_interpreter::lstm::ArithmeticParams &input_mul_params,
+                    const luci_interpreter::lstm::CellStateInfo *cell_state_info, CellType *buffer)
+{
+  auto cell_state_shape = step_info->stateShape();
+  // Forget Gate x Cell State
+  mul(cell_state_shape, &forget_cell_mul_params, forget_gate_output,
+      cell_state_data + step_info->cellStateOffset(),
+      cell_state_data + step_info->cellStateOffset());
+  // Input Gate x Cell Gate
+  mul(cell_state_shape, &input_mul_params, input_gate_output, cell_gate_output, buffer);
+
+  // Update the cell state
+  addElementWise(cell_state_data + step_info->cellStateOffset(), buffer,
+                 /*n_batch=*/cell_state_shape.DimsData()[0],
+                 /*n_state=*/cell_state_shape.DimsData()[1],
+                 cell_state_data + step_info->cellStateOffset());
+
+  if (cell_state_info->cell_clip > 0)
+  {
+    clipping(cell_state_shape.FlatSize(), cell_state_info,
+             cell_state_data + step_info->cellStateOffset());
+  }
+}
+
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void lstmStep(luci_interpreter::lstm::LSTMStruct *lstm_struct,
+              luci_interpreter::lstm::LSTMParameters *lstm_params, LstmStepManager *step_info,
+              luci_interpreter::lstm::CellStateInfo *cell_state_info,
+              ActivationType *output_state_data, CellType *cell_state_data, CellType *scratch0,
+              CellType *scratch1, CellType *scratch2, CellType *scratch3,
+              luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+  /*Step1: Calculate gate outputs to prepare cell state update*/
+  CellType *gate_internal_buffer = scratch3;
+  CellType *forget_gate_output = scratch0;
+
+  auto input_data = luci_interpreter::kernels::getTensorData<ActivationType>(
+    runtime_graph->getDataByTensor(lstm_struct->input()));
+
+  calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+    step_info, &lstm_params->forget_gate_parameters,
+    // Input FC
+    input_data, lstm_struct->input_to_forget_weights(), lstm_struct->forget_gate_bias(),
+    // Recurrent FC
+    output_state_data, lstm_struct->recurrent_to_forget_weights(), nullptr,
+    // Output
+    forget_gate_output, gate_internal_buffer, TfLiteFusedActivation::kTfLiteActSigmoid,
+    runtime_graph);
+
+  // Input Gate calculation;
+  CellType *input_gate_output = scratch1;
+  calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+    step_info, &lstm_params->input_gate_parameters,
+    // Input FC
+    input_data, lstm_struct->input_to_input_weights(), lstm_struct->input_gate_bias(),
+    // Recurrent FC
+    output_state_data, lstm_struct->recurrent_to_input_weights(),
+    /*recurrent_bias*/ nullptr,
+    // Output
+    input_gate_output,
+    // Scratch arrays
+    gate_internal_buffer, TfLiteFusedActivation::kTfLiteActSigmoid, runtime_graph);
+
+  // Cell Gate calculation
+  CellType *cell_gate_output = scratch2;
+  calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+    step_info, &lstm_params->cell_gate_parameters,
+    // Input FC
+    input_data, lstm_struct->input_to_cell_weights(), lstm_struct->cell_gate_bias(),
+    // Recurrent FC
+    output_state_data, lstm_struct->recurrent_to_cell_weights(),
+    /*recurrent_bias*/ nullptr,
+    // Output
+    cell_gate_output,
+    // Scratch arrays
+    gate_internal_buffer, TfLiteFusedActivation::kTfLiteActTanh, runtime_graph);
+
+  /*Step2: update the cell state */
+  {
+    // const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters;
+    CellType *updated_input_buffer = scratch1; // reuse buffer
+
+    updateLstmCell<CellType>(
+      step_info, cell_state_data, forget_gate_output, input_gate_output, cell_gate_output,
+      lstm_params->inter_gate_parameters.forget_cell_mul_params,
+      lstm_params->inter_gate_parameters.input_mul_params, cell_state_info, updated_input_buffer);
+  }
+
+  {
+    /*Step3: update the hidden state */
+    CellType *output_gate_output = scratch1; // reuse buffer
+    calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, &lstm_params->output_gate_parameters,
+      // Input FC
+      input_data, lstm_struct->input_to_output_weights(), lstm_struct->output_gate_bias(),
+      // Recurrent FC
+      output_state_data, lstm_struct->recurrent_to_output_weights(), nullptr,
+      // Output
+      output_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, TfLiteFusedActivation::kTfLiteActSigmoid, runtime_graph);
+    CellType *tanh_activated_cell_buffer = scratch0; // reuse buffer
+    updateLstmHidden<CellType, ActivationType>(
+      step_info, cell_state_data, output_state_data, output_gate_output,
+      &lstm_params->inter_gate_parameters.output_mul_params,
+      cell_state_info->cell_state_scale_power, tanh_activated_cell_buffer);
+
+    ActivationType *output_ptr = luci_interpreter::kernels::getTensorData<ActivationType>(
+      runtime_graph->getDataByTensor(lstm_struct->output()));
+    std::memcpy(output_ptr + step_info->outputOffset(),
+                output_state_data + step_info->hiddenStateOffset(),
+                step_info->stateShape().FlatSize() * sizeof(ActivationType));
+  }
+}
+
+} // namespace lstm_internal
+
+// Evaluate the LSTM kernel with (potential) multi-steps and multi-batch input
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void evalLSTM(luci_interpreter::lstm::LSTMStruct *lstm_struct,
+              luci_interpreter::lstm::LSTMParameters *lstm_params,
+              luci_interpreter::lstm::CellStateInfo *cell_state_info,
+              ActivationType *output_state_data, CellType *cell_state_data, CellType *scratch0,
+              CellType *scratch1, CellType *scratch2, CellType *scratch3,
+              luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+  lstm_internal::LstmSizeInfo size_info;
+
+  size_info.time_major = lstm_struct->options->time_major();
+  size_info.batch_size = size_info.time_major
+                           ? luci_interpreter::Tensor::dim(lstm_struct->input(), 1)
+                           : luci_interpreter::Tensor::dim(lstm_struct->input(), 0);
+  size_info.time_steps = size_info.time_major
+                           ? luci_interpreter::Tensor::dim(lstm_struct->input(), 0)
+                           : luci_interpreter::Tensor::dim(lstm_struct->input(), 1);
+  size_info.input_dimension = luci_interpreter::Tensor::dim(lstm_struct->input(), 2);
+  size_info.state_dimension = luci_interpreter::Tensor::dim(lstm_struct->output_state(), 1);
+
+  lstm_internal::LstmStepManager step_info(size_info);
+
+  // time is the first dimention, enable batch computation
+  if (size_info.time_major)
+  {
+    for (int t = 0; t < size_info.time_steps; t++)
+    {
+      lstm_internal::lstmStep<ActivationType, WeightType, CellType, BiasType>(
+        lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data,
+        scratch0, scratch1, scratch2, scratch3, runtime_graph);
+      // prepare for the next time step
+      step_info.updateTime();
+    }
+  }
+  else
+  {
+    // batch first, unable to size the input data. single batch inference
+    for (int b = 0; b < size_info.batch_size; b++)
+    {
+      for (int t = 0; t < size_info.time_steps; t++)
+      {
+        lstm_internal::lstmStep<ActivationType, WeightType, CellType, BiasType>(
+          lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data,
+          scratch0, scratch1, scratch2, scratch3, runtime_graph);
+        // prepare for the next time step
+        step_info.updateTime();
+      }
+      // prepare for the next batch
+      step_info.updateBatch();
+      step_info.resetTime();
+    }
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/PALreference_ops.h b/onert-micro/luci-interpreter/pal/mcu/PALreference_ops.h
new file mode 100644 (file)
index 0000000..62c7209
--- /dev/null
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+                 const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T lower = 0;
+    const T clamped = val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+                  const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T upper = 1;
+    const T lower = -1;
+    const T clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+                  const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const float upper = 6;
+    const float lower = 0;
+    const float clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const int32 val = static_cast<int32_t>(input_data[i]);
+    int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+                                                                         params.output_multiplier,
+                                                                         params.output_shift);
+    clamped = std::max(params.quantized_activation_min, clamped);
+    clamped = std::min(params.quantized_activation_max, clamped);
+    output_data[i] = static_cast<T>(clamped);
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  const T max_value = params.quantized_activation_max;
+  const T min_value = params.quantized_activation_min;
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+    output_data[i] = clamped;
+  }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+                                 const RuntimeShape &unswitched_input1_shape,
+                                 const uint8 *unswitched_input1_data,
+                                 const RuntimeShape &unswitched_input2_shape,
+                                 const uint8 *unswitched_input2_data,
+                                 const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ArithmeticParams switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+
+  const bool use_unswitched = unswitched_params.broadcast_category ==
+                              tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
+  const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+  // sections of the arrays.
+  uint8 *output_data_ptr = output_data;
+  const uint8 *input1_data_ptr = input1_data;
+  const uint8 *input2_data_reset = input2_data;
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  for (int i0 = 0; i0 < y0; ++i0)
+  {
+    const uint8 *input2_data_ptr;
+    for (int i1 = 0; i1 < y1; ++i1)
+    {
+      input2_data_ptr = input2_data_reset;
+      for (int i2 = 0; i2 < y2; ++i2)
+      {
+        for (int i3 = 0; i3 < y3; ++i3)
+        {
+          MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+          input2_data_ptr += y4;
+          output_data_ptr += y4;
+        }
+        input1_data_ptr += y4;
+      }
+    }
+    input2_data_reset = input2_data_ptr;
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16");
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    output_data[i] = unclamped_result.raw();
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+  int32 output_offset = params.output_offset;
+  int32 output_activation_min = params.quantized_activation_min;
+  int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+    int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+    clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+    output_data[i] = output_offset + clamped_result;
+  }
+}
+
+inline void Sub16(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                  const int16_t *input1_data, const RuntimeShape &input2_shape,
+                  const int16_t *input2_data, const RuntimeShape &output_shape,
+                  int16_t *output_data)
+{
+  ruy::profiler::ScopeLabel label("Sub/Int16");
+  const int input1_shift = params.input1_shift;
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+  const int16 output_activation_min = params.quantized_activation_min;
+  const int16 output_activation_max = params.quantized_activation_max;
+
+  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+  TFLITE_DCHECK_LE(input1_shift, 0);
+  TFLITE_DCHECK_LE(params.input2_shift, 0);
+  const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+  const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+  const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+  if (input1_shift == 0)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+  else
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+}
+
+template <typename Scalar>
+void Pack(const PackParams &params, const RuntimeShape *const *input_shapes,
+          const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Pack");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = params.axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < inputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      const Scalar *input_ptr = input_data[i] + copy_size * k;
+      int loc = k * inputs_count * copy_size + i * copy_size;
+      memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+            const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+  ruy::profiler::ScopeLabel label("Unpack");
+  const int dimensions = input_shape.DimensionsCount();
+  const int outputs_count = params.num_split;
+
+  int outer_size = 1;
+  int axis = params.axis;
+  if (axis < 0)
+  {
+    axis += dimensions;
+  }
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, dimensions);
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < outputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      Scalar *output_ptr = output_datas[i] + copy_size * k;
+      int loc = k * outputs_count * copy_size + i * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams &params, const RuntimeShape *const *input_shapes,
+                     const uint8 *const *input_data, const RuntimeShape &output_shape,
+                     uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("PackWithScaling");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  const int32 *input_zeropoint = params.input_zeropoint;
+  const float *input_scale = params.input_scale;
+  int inputs_count = params.inputs_count;
+  const int32 output_zeropoint = params.output_zeropoint;
+  const float output_scale = params.output_scale;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  Scalar *output_ptr = output_data;
+  const float inverse_output_scale = 1.f / output_scale;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+      {
+        memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+      }
+      else
+      {
+        assert(false);
+        const float scale = input_scale[i] * inverse_output_scale;
+        const float bias = -input_zeropoint[i] * scale;
+        auto input_ptr = input_data[i];
+        for (int j = 0; j < copy_size; ++j)
+        {
+          const int value =
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+          output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+        }
+      }
+      output_ptr += copy_size;
+    }
+  }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams &params, const RuntimeShape *const *input_shapes,
+                        const Scalar *const *input_data, const RuntimeShape &output_shape,
+                        Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("DepthConcatenation");
+  auto params_copy = params;
+  params_copy.axis = 3;
+  Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+                     const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+                     const float *prev_activ_data, const RuntimeShape &weights_shape,
+                     const float *weights_data, const RuntimeShape &unextended_bias_shape,
+                     const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+                     const float *prev_state_data,
+                     const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+                     const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+                     const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+                     const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+                                  output_state_shape, 0, output_activ_shape, 0);
+  const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+                                 output_state_shape, 1, output_activ_shape, 1);
+  const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+                                output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+  // Concatenate prev_activ and input data together
+  std::vector<float const *> concat_input_arrays_data;
+  std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+  concat_input_arrays_data.push_back(input_data);
+  concat_input_arrays_data.push_back(prev_activ_data);
+  concat_input_arrays_shapes.push_back(&input_shape);
+  concat_input_arrays_shapes.push_back(&prev_activ_shape);
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = concat_input_arrays_data.size();
+  Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+                concat_temp_shape, concat_temp_data);
+
+  // Fully connected
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+                 bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+  // Memory state update (the LSTM "guts")
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int w = 0; w < width; ++w)
+    {
+      for (int h = 0; h < height; ++h)
+      {
+        for (int c = 0; c < output_depth; ++c)
+        {
+          const float input_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+          const float new_input =
+            std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+          const float forget_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+          const float output_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+          const float new_state =
+            input_gate * new_input +
+            forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+            output_gate * std::tanh(new_state);
+        }
+      }
+    }
+  }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+//  - The input activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that is the natural interval for output
+//    activations (see next point) and these need to be concatenated together.
+//    We could accommodate different ranges by re-scaling, but we empirically
+//    found that setting the input activations range to be [-1, 127/128] in the
+//    first place, removing the need for re-scaling, greatly improves accuracy.
+//  - The output activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that the definition of a LSTM cell makes them
+//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+//    makes for simpler, more accurate fixed-point arithmetic.
+//  - The output-at-previous-timestep state array is obviously quantized as
+//    the output activations.
+//  - The internal LSTM memory (not the output-at-previous-timestep, the other
+//    internal state array) is int16-quantized and may use any power-of-two,
+//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+//    StateIntegerBits below, see the below discussion of that template
+//    parameter ("The StateIntegerBits template parameter").
+//  - The output of the internal fully-connected node is int16-quantized
+//    on the interval [-8, 8 * 32767/32768], the rationale for which is
+//    explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
+//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
+//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+         const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+         const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+         const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+         const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+         const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+         int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+         uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+         uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+         int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+  (void)gemmlowp_context; // only used in optimized code.
+  int32 weights_zero_point = params.weights_zero_point;
+  int32 accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  // Gather dimensions information, and perform consistency checks.
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+                                                 output_state_shape, output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+  const int fc_output_depth =
+    MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+  // Depth-concatenate prev_activ and input data together.
+  uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+  const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+                concat_temp_shape, concat_temp_data_uint8);
+
+  // Implementation of the fully connected node inside the LSTM cell.
+  // The operands are 8-bit integers, the accumulators are internally 32bit
+  // integers, and the output is 16-bit fixed-point with 3 integer bits so
+  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+  // is explained in the function comment above.
+  for (int b = 0; b < fc_batches; ++b)
+  {
+    for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+    {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum = bias_data_int32[out_c];
+      // Accumulation loop.
+      for (int d = 0; d < fc_accum_depth; ++d)
+      {
+        int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+        int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+        accum += input_val * weights_val;
+      }
+      // Down-scale the final int32 accumulator to the scale used by our
+      // (16-bit, using 3 integer bits) fixed-point format. The quantized
+      // multiplier and shift here have been pre-computed offline
+      // (e.g. by toco).
+      accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+      // Saturate, cast to int16, and store to the temporary activations array.
+      accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+    }
+  }
+
+  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+  // and muls, all done in 16-bit fixed-point.
+  for (int b = 0; b < outer_size; ++b)
+  {
+    for (int c = 0; c < output_depth; ++c)
+    {
+      // Define the fixed-point data types that we will use here. All use
+      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+      // They only differ by the number of integral vs. fractional bits,
+      // determining the range of values that they can represent.
+      //
+      // F0 uses 0 integer bits, range [-1, 1].
+      // This is the return type of math functions such as tanh, logistic,
+      // whose range is in [-1, 1].
+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+      // F3 uses 3 integer bits, range [-8, 8].
+      // This is the range of the previous fully-connected node's output,
+      // which is our input here.
+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+      // 2^StateIntegerBits]. It's used to represent the internal state, whose
+      // number of integer bits is currently dictated by the model. See comment
+      // on the StateIntegerBits template parameter above.
+      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+      // Implementation of input gate, using fixed-point logistic function.
+      F3 input_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+      // Implementation of input modulation gate, using fixed-point tanh
+      // function.
+      F3 input_modulation_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+      F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+      // Implementation of forget gate, using fixed-point logistic function.
+      F3 forget_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+      // Implementation of output gate, using fixed-point logistic function.
+      F3 output_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+      // Implementation of internal multiplication nodes, still in fixed-point.
+      F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+      FS prev_state_times_forget_state = forget_gate_output * prev_state;
+      // Implementation of internal addition node, saturating.
+      FS new_state =
+        gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+                                prev_state_times_forget_state);
+      // Implementation of last internal Tanh node, still in fixed-point.
+      // Since a Tanh fixed-point implementation is specialized for a given
+      // number or integer bits, and each specialization can have a substantial
+      // code size, and we already used above a Tanh on an input with 3 integer
+      // bits, and per the table in the above function comment there is no
+      // significant accuracy to be lost by clamping to [-8, +8] for a
+      // 3-integer-bits representation, let us just do that. This helps people
+      // porting this to targets where code footprint must be minimized.
+      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+      // Store the new internal state back to memory, as 16-bit integers.
+      // Note: here we store the original value with StateIntegerBits, not
+      // the rescaled 3-integer-bits value fed to tanh.
+      output_state_data_int16[b * output_depth + c] = new_state.raw();
+      // Down-scale the output activations to 8-bit integers, saturating,
+      // and store back to memory.
+      int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+      int16 clamped_output_activ =
+        std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+      output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+    }
+  }
+}
+
+template <typename Scalar>
+void Split(const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+           const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+  ruy::profiler::ScopeLabel label("Split");
+  const int split_dimensions = input_shape.DimensionsCount();
+  int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+  int outputs_count = params.num_split;
+  TFLITE_DCHECK_LT(axis, split_dimensions);
+
+  int64_t split_size = 0;
+  for (int i = 0; i < outputs_count; i++)
+  {
+    TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+    for (int j = 0; j < split_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        MatchingDim(*output_shapes[i], j, input_shape, j);
+      }
+    }
+    split_size += output_shapes[i]->Dims(axis);
+  }
+  TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  // For all output arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < split_dimensions; ++i)
+  {
+    base_inner_size *= input_shape.Dims(i);
+  }
+
+  const Scalar *input_ptr = input_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < outputs_count; ++i)
+    {
+      const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+      memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+      input_ptr += copy_size;
+    }
+  }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+  return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                                       const RuntimeShape &input_shape, const float *input_data,
+                                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int c = 0; c < depth; ++c)
+    {
+      const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+      const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+      float accum = 0.f;
+      for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+      {
+        const float input_val = input_data[i * depth + input_c];
+        accum += input_val * input_val;
+      }
+      const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+      output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+    }
+  }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = static_cast<float>(input_data[i]);
+  }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+                      const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("FakeQuant");
+  float rmin = op_params.minmax.min;
+  float rmax = op_params.minmax.max;
+  int num_bits = op_params.num_bits;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  TFLITE_DCHECK_LE(rmin, 0.0f);
+  TFLITE_DCHECK_GE(rmax, 0.0f);
+  TFLITE_DCHECK_LT(rmin, rmax);
+
+  // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+  int quant_min = 0;
+  int quant_max = (1 << num_bits) - 1;
+  float nudged_min, nudged_max, nudged_scale;
+  NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+  int n_slices;
+  int slice_size;
+  int indices_nd;
+  std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape,
+                                           const RuntimeShape &indices_shape)
+{
+  GatherNdHelperResult ret;
+  ret.n_slices = 1;
+  ret.slice_size = 1;
+  const int indices_dims = indices_shape.DimensionsCount();
+  ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+  const int params_dims = params_shape.DimensionsCount();
+  for (int i = 0; i < indices_dims - 1; ++i)
+  {
+    ret.n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = ret.indices_nd; i < params_dims; ++i)
+  {
+    ret.slice_size *= params_shape.Dims(i);
+  }
+
+  int remain_flat_size = params_shape.FlatSize();
+  ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+  for (int i = 0; i < ret.indices_nd; ++i)
+  {
+    ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+    remain_flat_size = ret.dims_to_count[i];
+  }
+
+  return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape &params_shape, const ParamsT *params_data,
+                     const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                     const RuntimeShape &output_shape, ParamsT *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNd");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+                sizeof(ParamsT) * res.slice_size);
+  }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape &params_shape, const TfLiteTensor *params_data,
+                           const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                           const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNdString");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  DynamicBuffer buffer;
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    for (int j = 0; j < res.slice_size; ++j)
+    {
+      buffer.AddString(GetString(params_data, from_pos + j));
+    }
+  }
+  buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                      const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+                      const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+  ruy::profiler::ScopeLabel label("ScatterNd");
+
+  int n_slices = 1;
+  int slice_size = 1;
+  const int outer_dims = indices_shape.DimensionsCount() - 1;
+  const int indices_nd = indices_shape.Dims(outer_dims);
+  const int updates_dims = updates_shape.DimensionsCount();
+  for (int i = 0; i < outer_dims; ++i)
+  {
+    n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = outer_dims; i < updates_dims; ++i)
+  {
+    slice_size *= updates_shape.Dims(i);
+  }
+
+  int output_flat_size = output_shape.FlatSize();
+  int remain_flat_size = output_flat_size;
+  std::vector<int> dims_to_count(indices_nd, 0);
+  for (int i = 0; i < indices_nd; ++i)
+  {
+    dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+    remain_flat_size = dims_to_count[i];
+  }
+
+  memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+  for (int i = 0; i < n_slices; ++i)
+  {
+    int to_pos = 0;
+    for (int j = 0; j < indices_nd; ++j)
+    {
+      IndicesT idx = indices_data[i * indices_nd + j];
+      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+      to_pos += idx * dims_to_count[j];
+    }
+    for (int j = 0; j < slice_size; j++)
+    {
+      output_data[to_pos + j] += updates_data[i * slice_size + j];
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+  TFLITE_DCHECK_LE(op_params.begin_count, 5);
+  TFLITE_DCHECK_LE(op_params.size_count, 5);
+  const int begin_count = op_params.begin_count;
+  const int size_count = op_params.size_count;
+  // We front-pad the begin and size vectors.
+  std::array<int, 5> start;
+  std::array<int, 5> stop;
+  for (int i = 0; i < 5; ++i)
+  {
+    int padded_i = 5 - i;
+    start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+    stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+                ? ext_shape.Dims(i)
+                : start[i] + op_params.size[size_count - padded_i];
+  }
+
+  for (int i0 = start[0]; i0 < stop[0]; ++i0)
+  {
+    for (int i1 = start[1]; i1 < stop[1]; ++i1)
+    {
+      for (int i2 = start[2]; i2 < stop[2]; ++i2)
+      {
+        for (int i3 = start[3]; i3 < stop[3]; ++i3)
+        {
+          for (int i4 = start[4]; i4 < stop[4]; ++i4)
+          {
+            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  SequentialTensorWriter<T> writer(input_data, output_data);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+  SequentialTensorWriter<T> writer(input, output);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto min_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto max_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+            const RuntimeShape &output_shape, T2 *output_data)
+{
+  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+                   const RuntimeShape &input2_shape, const T3 *input2_data,
+                   const RuntimeShape &output_shape, T2 *output_data)
+{
+  // Drop shape of second input: not needed.
+  ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+            const RuntimeShape &input_x_shape, const T *input_x_data,
+            const RuntimeShape &input_y_shape, const T *input_y_data,
+            const RuntimeShape &output_shape, T *output_data)
+{
+  int64_t flatsize;
+  // Allow select operator executions on mixed scalar tensors and one element
+  // tensors.
+  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+  {
+    flatsize = 1;
+  }
+  else
+  {
+    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+  }
+  for (int64_t i = 0; i < flatsize; ++i)
+  {
+    output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                   const RuntimeShape &input_x_shape, const T *input_x_data,
+                   const RuntimeShape &input_y_shape, const T *input_y_data,
+                   const RuntimeShape &output_shape, T *output_data)
+{
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  int64_t inner_size;
+  if (input_condition_shape.DimensionsCount() == 0)
+  {
+    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+  }
+  else
+  {
+    TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+    inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+  }
+
+  int64_t offset = 0;
+  for (int64_t i = 0; i < outer_size; i++)
+  {
+    const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+    offset += inner_size;
+  }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                           const RuntimeShape &input_x_shape, const T *input_x_data,
+                           const RuntimeShape &input_y_shape, const T *input_y_data,
+                           const RuntimeShape &output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+  const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+  NdArrayDesc<4> desc_condition;
+  NdArrayDesc<4> desc_x;
+  NdArrayDesc<4> desc_y;
+  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+                                      &desc_condition, &desc_x, &desc_y);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest
+  // stride, typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for
+  // the best cache behavior.
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+        {
+          const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+          const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+          const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+            input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+        }
+      }
+    }
+  }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                      T *output_data)
+{
+  const size_t size = input_condition_shape.FlatSize();
+  if (size == 0)
+  {
+    // Dimension is zero, in which case we don't need to output.
+    return;
+  }
+  const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+  std::vector<int> dims_to_count(cond_rank, 0);
+  int cur_flat_size = size;
+  for (int i = 0; i < cond_rank; ++i)
+  {
+    dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+    cur_flat_size = dims_to_count[i];
+  }
+
+  int output_index = 0;
+  for (int i = 0; i < size; ++i)
+  {
+    if (input_condition_data[i])
+    {
+      // Insert the coordinate of the current item (row major) into output.
+      int flat_index = i;
+      for (int j = 0; j < cond_rank; ++j)
+      {
+        int coord_j = flat_index / dims_to_count[j];
+        output_data[output_index * cond_rank + j] = coord_j;
+        flat_index %= dims_to_count[j];
+      }
+      output_index++;
+    }
+  }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+                          T default_value, bool value_is_scalar,
+                          const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  const int value_count = indices.size();
+
+  // First fill the output_data with default value.
+  const int num_elements = output_shape.FlatSize();
+  for (int i = 0; i < num_elements; ++i)
+  {
+    output_data[i] = default_value;
+  }
+
+  // Special handle for value is scalar case to avoid checking the boolean
+  // condition within the loop every time.
+  if (value_is_scalar)
+  {
+    for (int i = 0; i < value_count; ++i)
+    {
+      const std::vector<TI> &index = indices[i];
+      TFLITE_DCHECK_EQ(index.size(), 4);
+      const T value = *values; // just use the first value.
+      output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+    }
+    return;
+  }
+
+  // Go through the values and indices to fill the sparse values.
+  for (int i = 0; i < value_count; ++i)
+  {
+    const std::vector<TI> &index = indices[i];
+    TFLITE_DCHECK_EQ(index.size(), 4);
+    const T value = values[i];
+    output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+  }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+                const RuntimeShape &input2_shape, const T *input2_data,
+                const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = std::pow(input1_data[i], input2_data[i]);
+  }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+                               const RuntimeShape &unextended_input2_shape, const T *input2_data,
+                               const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < output_shape.Dims(3); ++c)
+        {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = std::pow(in1_val, in2_val);
+        }
+      }
+    }
+  }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+             const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Reverse");
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_axis = input_shape.Dims(axis);
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int j = 0; j < dims_at_axis; ++j)
+    {
+      const int start_pos = (i * dims_at_axis + j) * copy_size;
+      Scalar *output_ptr = output_data + start_pos;
+      int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+                     const RuntimeShape &input_shape, const Scalar *input_data,
+                     const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("ReverseSequence");
+
+  int outer_size = 1;
+  int outer_dim = std::min(batch_dim, seq_dim);
+  int medium_dim = std::max(batch_dim, seq_dim);
+  for (int i = 0; i < outer_dim; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int medium_size = 1;
+  for (int i = outer_dim + 1; i < medium_dim; ++i)
+  {
+    medium_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+  const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+  Scalar *output_ptr;
+  if (batch_dim > seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            int sl = seq_lengths[q] - 1;
+            if (j > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+  else if (batch_dim < seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        int sl = seq_lengths[j] - 1;
+        const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            if (q > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+                       const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+                       const RuntimeShape &output_shape, T *output_data)
+{
+  const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+  memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+  for (int i = 0; i < input_shape.Dims(0); i++)
+  {
+    int output_index = segment_ids_data[i];
+    for (int j = 0; j < segment_flat_size; ++j)
+    {
+      output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+    }
+  }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/onert-micro/luci-interpreter/pal/mcu/pal.cmake b/onert-micro/luci-interpreter/pal/mcu/pal.cmake
new file mode 100644 (file)
index 0000000..24e469f
--- /dev/null
@@ -0,0 +1,56 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.8.0 REQUIRED)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 REQUIRED)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 REQUIRED)
+    nnas_find_package(TensorFlowRuySource EXACT 2.8.0 REQUIRED)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+    #find_package(Threads REQUIRED)
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    # TODO put it back, I changed my mind.
+    # instead add sources with visitors in this library
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_mcu_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
+    #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
+endmacro()
diff --git a/onert-micro/luci-interpreter/src/CMakeLists.txt b/onert-micro/luci-interpreter/src/CMakeLists.txt
new file mode 100644 (file)
index 0000000..75d6836
--- /dev/null
@@ -0,0 +1,45 @@
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
+
+initialize_pal()
+
+if (NOT PAL_INITIALIZED)
+  message("PAL Failed to initialize, skip luci-interpreter")
+  return()
+endif()
+
+message(STATUS "LUCI INTERPRETER BEGIN")
+
+set(LUCI_INTERPRETER_BINARY "luci_interpreter_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_MEMORY_MANAGER "luci_interpreter_micro_memory_manager${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import_micro${LUCI_INTERPRETER_SUFFIX}")
+
+add_subdirectory(memory_managers)
+message(STATUS "LUCI INTERPRETER MEMORY MANAGER")
+add_subdirectory(core)
+message(STATUS "LUCI INTERPRETER CORE")
+add_subdirectory(kernels)
+message(STATUS "LUCI INTERPRETER KERNELS")
+add_subdirectory(loader)
+message(STATUS "LUCI INTERPRETER LOADER")
+
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_KERNELS})
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_KERNELS})
+
+message(STATUS "LUCI INTERPTER INITALIZED")
+
+set(SOURCES
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h" Interpreter.cpp)
+
+add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+        PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER} ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE})
+
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
diff --git a/onert-micro/luci-interpreter/src/Interpreter.cpp b/onert-micro/luci-interpreter/src/Interpreter.cpp
new file mode 100644 (file)
index 0000000..dccef82
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/Interpreter.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+// Construct static interpreter with configurations
+Interpreter::Interpreter(const char *model_data_raw, const InterpreterConfigure &configuration)
+{
+  _runtime_module = std::make_unique<RuntimeModule>();
+
+  _memory_manager = StaticMemoryManager(configuration._input_buf_size, configuration._temp_buf_size,
+                                        configuration._output_buf_size)
+
+    // Note:
+    // configuration._input_buf_size, configuration._temp_buf_size, configuration._output_buf_size
+    // will be removed and will be read from circle file
+    if (configuration.isStaticManager())
+  {
+    _memory_manager = std::make_unique<StaticMemoryManager>(
+      configuration._input_buf_size, configuration._temp_buf_size, configuration._output_buf_size);
+  }
+  else { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+  _memory_manager->is_allocate_input(configuration.getAllocateInputValue());
+
+  ModuleLoader loader();
+  ModuleLoader::load(_runtime_module.get(), _memory_manager.get(),
+                     /* is_static_allocations */ configuration.isStaticManager(), model_data_raw);
+
+  ModuleLoader loader(_runtime_module.get(), _memory_manager.get());
+  loader.load(configuration.isStaticManager(), model_data_raw);
+}
+#else
+
+// Construct default interpreter with dynamic allocations and with input allocations
+Interpreter::Interpreter(const char *model_data_raw)
+{
+  ModuleLoader::load(&_runtime_module, &_memory_manager, model_data_raw);
+}
+
+#endif // USE_STATIC_ALLOC
+
+Interpreter::~Interpreter() = default;
+
+void Interpreter::interpret() { _runtime_module.execute(); }
+
+int32_t Interpreter::getInputDataSizeByIndex(int32_t input_tensor_index)
+{
+  auto *runtime_graph = _runtime_module.getMainGraph();
+
+  return runtime_graph->getInputDataSizeByIndex(input_tensor_index);
+}
+
+int32_t Interpreter::getOutputDataSizeByIndex(int32_t output_tensor_index)
+{
+  auto *runtime_graph = _runtime_module.getMainGraph();
+
+  return runtime_graph->getOutputDataSizeByIndex(output_tensor_index);
+}
+
+void Interpreter::allocateAndWriteInputTensor(int32_t input_tensor_index, const void *data,
+                                              size_t data_size)
+{
+  assert(data_size > 0);
+  assert(data != nullptr);
+  assert(input_tensor_index >= 0);
+  auto *runtime_graph = _runtime_module.getMainGraph();
+  auto tensor_data = runtime_graph->configureGraphInput(input_tensor_index);
+
+  std::memcpy(tensor_data, data, data_size);
+}
+
+uint8_t *Interpreter::allocateInputTensor(int32_t input_tensor_index)
+{
+  assert(input_tensor_index >= 0);
+
+  auto *runtime_graph = _runtime_module.getMainGraph();
+
+  return runtime_graph->configureGraphInput(input_tensor_index);
+}
+
+uint8_t *Interpreter::readOutputTensor(int32_t output_tensor_index)
+{
+  auto *runtime_graph = _runtime_module.getMainGraph();
+
+  return runtime_graph->getOutputDataByIndex(output_tensor_index);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/core/CMakeLists.txt b/onert-micro/luci-interpreter/src/core/CMakeLists.txt
new file mode 100644 (file)
index 0000000..48d92aa
--- /dev/null
@@ -0,0 +1,20 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
+    RuntimeGraph.h
+    RuntimeGraph.cpp
+    RuntimeModule.h)
+
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+    set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+add_subdirectory(reader)
+
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC "luci_micro_circle_reader${READER_SUFFIX}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_micro_circle_schema)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER})
+
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
diff --git a/onert-micro/luci-interpreter/src/core/RuntimeGraph.cpp b/onert-micro/luci-interpreter/src/core/RuntimeGraph.cpp
new file mode 100644 (file)
index 0000000..41447f9
--- /dev/null
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeGraph.h"
+#include "kernels/KernelBuilder.h"
+
+#include <algorithm>
+#include <map>
+
+namespace luci_interpreter
+{
+
+// IBaseRuntimeGraph
+RuntimeGraph::RuntimeGraph(SimpleMemoryManager *memory_manager, CircleReader *circle_reader)
+  : _memory_manager(memory_manager),
+    _tensor_to_data(std::unordered_map<const circle::Tensor *, uint8_t *>{}),
+    _reader(circle_reader), _inplace_op_indexes(std::unordered_set<uint32_t>{})
+{
+}
+
+RuntimeGraph::~RuntimeGraph()
+{
+  for (auto &idx_to_tensor : _tensor_to_data)
+  {
+    auto *data = idx_to_tensor.second;
+
+    _memory_manager->release_memory(data);
+  }
+}
+
+// TODO: modify this
+void RuntimeGraph::buildAllocDeallocPlan()
+{
+  invalidate();
+  using Lifetime = std::pair<int32_t, int32_t>;
+  std::map<const circle::Tensor *, Lifetime> lifetimes;
+  const size_t num_kernels = _reader->operators().size();
+
+  for (const auto input_ind : _reader->inputs())
+  {
+    const auto raw_tensor = _reader->tensors()[input_ind];
+
+    assert(lifetimes.count(raw_tensor) == 0);
+    lifetimes[raw_tensor] = Lifetime(-1, 0);
+  }
+
+  for (int32_t index = 0; index < num_kernels; ++index)
+  {
+    const auto kernel = _reader->operators().at(index);
+    assert(kernel != nullptr);
+
+    for (int32_t j = 0; j < kernel->inputs()->size(); ++j)
+    {
+      const auto input_index = kernel->inputs()->operator[](j);
+
+      if (input_index == -1)
+        continue;
+
+      const auto raw_tensor = _reader->tensors()[input_index];
+
+      // Pass constant tensors
+      auto const &buffer = wrap(_reader->buffers()[raw_tensor->buffer()]->data());
+      if (not buffer.empty())
+      {
+        // unknown shape tensor and scalar tensor
+        continue;
+      }
+
+      if (lifetimes.count(raw_tensor) > 0)
+      {
+        if (_inplace_op_indexes.find(index) != _inplace_op_indexes.end())
+          lifetimes.at(raw_tensor).second = -1;
+        else
+          lifetimes.at(raw_tensor).second = index;
+      }
+    }
+
+    for (int32_t j = 0; j < kernel->outputs()->size(); ++j)
+    {
+      const auto output_index = kernel->outputs()->operator[](j);
+      const auto raw_tensor = _reader->tensors()[output_index];
+
+      assert(lifetimes.count(raw_tensor) == 0);
+      if (_inplace_op_indexes.find(index) != _inplace_op_indexes.end())
+        lifetimes[raw_tensor] = Lifetime(-1, index);
+      else
+        lifetimes[raw_tensor] = Lifetime(index, index);
+    }
+  }
+
+  for (const auto output_ind : _reader->outputs())
+  {
+    const auto raw_tensor = _reader->tensors()[output_ind];
+
+    if (lifetimes.count(raw_tensor) > 0)
+      lifetimes.at(raw_tensor).second = num_kernels;
+  }
+
+  _alloc_plan.assign(num_kernels, std::vector<const circle::Tensor *>());
+  _dealloc_plan.assign(num_kernels + 1, std::vector<const circle::Tensor *>());
+  for (const auto &item : lifetimes)
+  {
+    if (item.second.first != -1)
+      _alloc_plan[item.second.first].push_back(item.first);
+    if (item.second.second != -1)
+      _dealloc_plan[item.second.second].push_back(item.first);
+  }
+  _is_valid = true;
+}
+
+void RuntimeGraph::allocate(size_t kernel_index)
+{
+  assert(_is_valid && kernel_index < _alloc_plan.size());
+  for (const circle::Tensor *tensor : _alloc_plan[kernel_index])
+  {
+    if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
+    {
+      auto *data = _tensor_to_data.at(tensor);
+      _memory_manager->release_memory(data);
+    }
+    auto *data = _memory_manager->allocate_memory(tensor);
+    _tensor_to_data[tensor] = data;
+  }
+}
+
+void RuntimeGraph::deallocate(size_t kernel_index)
+{
+  assert(_is_valid && kernel_index < _dealloc_plan.size());
+  for (const circle::Tensor *tensor : _dealloc_plan[kernel_index])
+  {
+    const auto it = _tensor_to_data.find(tensor);
+    assert(it != _tensor_to_data.end());
+
+    auto *data = _tensor_to_data.at(tensor);
+    _memory_manager->release_memory(data);
+
+    _tensor_to_data.erase(it);
+  }
+}
+
+void RuntimeGraph::resetOutputTensorsData()
+{
+  for (int i = 0; i < _reader->outputs().size(); ++i)
+  {
+    const auto tensor_index = _reader->outputs()[i];
+    assert(tensor_index != -1);
+    const auto tensor = _reader->tensors()[tensor_index];
+    assert(tensor != nullptr);
+
+    auto tensor_it = _tensor_to_data.find(tensor);
+    if (tensor_it != _tensor_to_data.end())
+    {
+      auto *data = _tensor_to_data.at(tensor);
+      _memory_manager->release_memory(data);
+      _tensor_to_data.erase(tensor_it);
+    }
+  }
+}
+
+uint8_t *RuntimeGraph::configureGraphInput(int32_t input_index)
+{
+  resetOutputTensorsData();
+
+  const auto tensor_index = _reader->inputs()[input_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
+  {
+    auto *data = _tensor_to_data.at(tensor);
+    _memory_manager->release_memory(data);
+  }
+
+  auto *data = _memory_manager->allocate_memory(tensor);
+  _tensor_to_data[tensor] = data;
+
+  return data;
+}
+
+// To save data
+// TODO maybe remove it
+void RuntimeGraph::configureGraphInput(int32_t input_index, uint8_t *data)
+{
+  resetOutputTensorsData();
+
+  const auto tensor_index = _reader->inputs()[input_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
+  {
+    auto *data_prev = _tensor_to_data.at(tensor);
+    _memory_manager->release_memory(data_prev);
+  }
+  _tensor_to_data[tensor] = data;
+}
+
+int32_t RuntimeGraph::getInputDataSizeByIndex(int32_t input_index)
+{
+  const auto tensor_index = _reader->inputs()[input_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  return Tensor::num_elements(tensor) * size(Tensor::element_type(tensor));
+}
+
+int32_t RuntimeGraph::getOutputDataSizeByIndex(int32_t output_index)
+{
+  const auto tensor_index = _reader->outputs()[output_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  return Tensor::num_elements(tensor) * size(Tensor::element_type(tensor));
+}
+
+uint8_t *RuntimeGraph::getOutputDataByIndex(int32_t output_index)
+{
+  const auto tensor_index = _reader->outputs()[output_index];
+  assert(tensor_index != -1);
+  const auto tensor = _reader->tensors()[tensor_index];
+  assert(tensor != nullptr);
+
+  assert(_tensor_to_data.find(tensor) != _tensor_to_data.end());
+
+  return _tensor_to_data[tensor];
+}
+
+uint8_t *RuntimeGraph::getDataByTensor(const circle::Tensor *raw_tensor)
+{
+  if (raw_tensor == nullptr)
+    return nullptr;
+
+  if (_tensor_to_data.find(raw_tensor) == _tensor_to_data.end())
+  {
+    return nullptr;
+  }
+
+  return _tensor_to_data.at(raw_tensor);
+}
+
+void RuntimeGraph::makeInplaceOperation(const circle::Tensor *src_tensor,
+                                        const circle::Tensor *dst_tensor)
+{
+  if (src_tensor == nullptr or dst_tensor == nullptr)
+    return;
+
+  auto src_it = _tensor_to_data.find(src_tensor);
+
+  assert(src_it != _tensor_to_data.end() && "Failed makeInplaceOperation");
+
+  auto *data = _tensor_to_data[src_tensor];
+
+  _tensor_to_data.erase(src_it);
+
+  assert(_tensor_to_data.find(dst_tensor) == _tensor_to_data.end() &&
+         "Failed makeInplaceOperation");
+  _tensor_to_data[dst_tensor] = data;
+}
+
+uint8_t *RuntimeGraph::getConstDataByTensor(const circle::Tensor *raw_tensor)
+{
+  if (raw_tensor == nullptr)
+    return nullptr;
+
+  auto const &buffer = wrap(_reader->buffers()[raw_tensor->buffer()]->data());
+
+  return const_cast<uint8_t *>(buffer.data());
+}
+
+const circle::Tensor *RuntimeGraph::getCircleTensorByIndex(int32_t index)
+{
+  if (index < 0)
+    return nullptr;
+
+  const auto raw_tensor = _reader->tensors()[index];
+
+  return raw_tensor;
+}
+
+void RuntimeGraph::configure()
+{
+  KernelConfigureRegistry kernel_configure;
+
+  for (uint32_t i = 0; i < _reader->operators().size(); ++i)
+  {
+    const auto op = _reader->operators().at(i);
+    assert(op != nullptr);
+
+    const auto opcode = _reader->builtin_code(op);
+
+    kernel_configure.configure_kernel(op, opcode, this);
+  }
+
+  if (not _is_valid)
+    buildAllocDeallocPlan();
+
+  _is_valid = true;
+}
+
+void RuntimeGraph::execute()
+{
+  if (not _is_valid)
+    configure();
+
+  KernelExecuteRegistry kernel_executor;
+
+  for (uint32_t i = 0; i < _reader->operators().size(); ++i)
+  {
+    const auto op = _reader->operators().at(i);
+    assert(op != nullptr);
+
+    const auto opcode = _reader->builtin_code(op);
+
+    allocate(i);
+
+    bool is_inplace = false;
+
+    if (_inplace_op_indexes.find(i) != _inplace_op_indexes.end())
+      is_inplace = true;
+
+    kernel_executor.execute_kernel(op, opcode, this, is_inplace);
+
+    deallocate(i);
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/core/RuntimeGraph.h b/onert-micro/luci-interpreter/src/core/RuntimeGraph.h
new file mode 100644 (file)
index 0000000..b64e8dc
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+
+#include "luci_interpreter/core/Tensor.h"
+#ifdef USE_STATIC_ALLOC
+#include "memory_managers/StaticMemoryManager.h"
+#else
+#include "memory_managers/SimpleMemoryManager.h"
+#endif // USE_STATIC_ALLOC
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <memory>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule;
+
+#ifdef USE_STATIC_ALLOC
+// TODO: Enable it
+#if 0
+class StaticRuntimeGraph final : public IBaseRuntimeGraph
+{
+public:
+  explicit StaticRuntimeGraph(IMemoryManager *memory_manager, CircleReader *circle_reader);
+  ~StaticRuntimeGraph() final;
+
+  void configureGraphInputs() final;
+  void execute() final;
+  void configure() final;
+
+  void configure_kernels() final;
+};
+#endif
+#else
+
+class RuntimeGraph
+{
+public:
+  explicit RuntimeGraph(SimpleMemoryManager *memory_manager, CircleReader *circle_reader);
+  ~RuntimeGraph();
+
+  Tensor *addTensor(const circle::Tensor *raw_tensor, std::unique_ptr<Tensor> &&tensor);
+
+  const circle::Tensor *getCircleTensorByIndex(int32_t index);
+
+  void makeInplaceOperation(const circle::Tensor *src_tensor, const circle::Tensor *dst_tensor);
+
+  uint8_t *getDataByTensor(const circle::Tensor *raw_tensor);
+  uint8_t *getConstDataByTensor(const circle::Tensor *raw_tensor);
+
+  uint8_t *configureGraphInput(int32_t input_index);
+  void configureGraphInput(int32_t input_index, uint8_t *data);
+
+  int32_t getInputDataSizeByIndex(int32_t input_index);
+  int32_t getOutputDataSizeByIndex(int32_t output_index);
+
+  uint8_t *getOutputDataByIndex(int32_t output_index);
+
+  void addInplaceOpIndex(uint32_t index) { _inplace_op_indexes.insert(index); }
+
+  void execute();
+  void configure();
+
+  void invalidate() { _is_valid = false; }
+  bool isValid() const { return _is_valid; }
+
+private:
+  void buildAllocDeallocPlan();
+  void allocate(size_t kernel_index);
+  void deallocate(size_t kernel_index);
+
+  void resetOutputTensorsData();
+
+private:
+  SimpleMemoryManager *_memory_manager;
+  CircleReader *_reader;
+
+  std::unordered_map<const circle::Tensor *, uint8_t *> _tensor_to_data;
+  std::unordered_set<uint32_t> _inplace_op_indexes;
+
+  bool _is_valid = false;
+
+  // Tensors that are not used anymore after given op
+  std::vector<std::vector<const circle::Tensor *>> _alloc_plan;
+  std::vector<std::vector<const circle::Tensor *>> _dealloc_plan;
+};
+
+#endif // USE_STATIC_ALLOC
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
diff --git a/onert-micro/luci-interpreter/src/core/RuntimeModule.h b/onert-micro/luci-interpreter/src/core/RuntimeModule.h
new file mode 100644 (file)
index 0000000..5dbe5a9
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+
+#include "core/RuntimeGraph.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+using BaseRuntimeGraph = StaticRuntimeGraph;
+using MemoryManager = StaticMemoryManager;
+#else
+using BaseRuntimeGraph = RuntimeGraph;
+using MemoryManager = SimpleMemoryManager;
+#endif // USE_STATIC_ALLOC
+
+class RuntimeModule
+{
+public:
+  RuntimeModule() = default;
+
+  void addGraph(MemoryManager *memory_manager)
+  {
+    _graphs.emplace_back(memory_manager, &_circle_reader);
+  }
+
+  BaseRuntimeGraph *getRuntimeGraphAt(uint32_t pos) { return &_graphs.at(pos); }
+
+  void execute() { getMainGraph()->execute(); }
+
+  CircleReader &getCircleReader() { return _circle_reader; }
+
+  BaseRuntimeGraph *getMainGraph() const { return const_cast<BaseRuntimeGraph *>(&_graphs[0]); }
+
+private:
+  std::vector<BaseRuntimeGraph> _graphs;
+
+  CircleReader _circle_reader;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
diff --git a/onert-micro/luci-interpreter/src/core/reader/CMakeLists.txt b/onert-micro/luci-interpreter/src/core/reader/CMakeLists.txt
new file mode 100644 (file)
index 0000000..0de1731
--- /dev/null
@@ -0,0 +1,12 @@
+set(MICRO_READER_SOURCE
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/reader/CircleMicroReader.h"
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+        "CircleMicroReader.cpp"
+        "CircleMicroReaderHelper.cpp"
+        )
+
+add_library("luci_micro_circle_reader${READER_SUFFIX}" STATIC ${MICRO_READER_SOURCE})
+target_link_libraries("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC luci_micro_circle_schema)
+
+target_include_directories("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC "${GENERATED_INCLUDE_DIR}")
+target_include_directories("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
diff --git a/onert-micro/luci-interpreter/src/core/reader/CircleMicroReader.cpp b/onert-micro/luci-interpreter/src/core/reader/CircleMicroReader.cpp
new file mode 100644 (file)
index 0000000..332e464
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+#include "luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+
+// TODO check can we remove it
+DataType luci_datatype(const circle::TensorType type)
+{
+  switch (type)
+  {
+    case circle::TensorType_FLOAT32:
+      return DataType::FLOAT32;
+    case circle::TensorType_FLOAT16:
+      return DataType::FLOAT16;
+    case circle::TensorType_INT32:
+      return DataType::S32;
+    case circle::TensorType_UINT8:
+      return DataType::U8;
+    case circle::TensorType_INT64:
+      return DataType::S64;
+    case circle::TensorType_BOOL:
+      return DataType::BOOL;
+    case circle::TensorType_INT16:
+      return DataType::S16;
+    case circle::TensorType_COMPLEX64:
+      break;
+    case circle::TensorType_INT8:
+      return DataType::S8;
+    default:
+      break;
+  }
+  assert(false);
+  return DataType::Unknown;
+}
+
+FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
+{
+  switch (type)
+  {
+    case circle::ActivationFunctionType::ActivationFunctionType_NONE:
+      return FusedActFunc::NONE;
+    case circle::ActivationFunctionType::ActivationFunctionType_RELU:
+      return FusedActFunc::RELU;
+    case circle::ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
+      return FusedActFunc::RELU_N1_TO_1;
+    case circle::ActivationFunctionType::ActivationFunctionType_RELU6:
+      return FusedActFunc::RELU6;
+    case circle::ActivationFunctionType::ActivationFunctionType_TANH:
+      return FusedActFunc::TANH;
+    case circle::ActivationFunctionType::ActivationFunctionType_SIGN_BIT:
+      return FusedActFunc::SIGN_BIT;
+    default:
+      break;
+  }
+  assert(false);
+  return FusedActFunc::UNDEFINED;
+}
+
+Padding luci_padding(const circle::Padding padding)
+{
+  switch (padding)
+  {
+    case circle::Padding::Padding_SAME:
+      return Padding::SAME;
+    case circle::Padding::Padding_VALID:
+      return Padding::VALID;
+  }
+  assert(false);
+  return Padding::UNDEFINED;
+}
+
+MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode)
+{
+  switch (mode)
+  {
+    case circle::MirrorPadMode::MirrorPadMode_REFLECT:
+      return MirrorPadMode::REFLECT;
+    case circle::MirrorPadMode::MirrorPadMode_SYMMETRIC:
+      return MirrorPadMode::SYMMETRIC;
+  }
+  assert(false);
+  return MirrorPadMode::UNDEFINED;
+}
+
+circle::BuiltinOperator CircleReader::builtin_code(const circle::Operator *op) const
+{
+  assert(op != nullptr);
+
+  const auto op_codes = opcodes();
+  uint32_t index = op->opcode_index();
+  assert(index < op_codes.size());
+  const auto opcode = op_codes[index];
+  assert(opcode != nullptr);
+
+  return circle::builtin_code_neutral(opcode);
+}
+
+bool CircleReader::parse(const circle::Model *model)
+{
+  assert(model != nullptr);
+
+  // for direct pointer access
+  _model = model;
+
+  return true;
+}
+
+bool CircleReader::select_subgraph(uint32_t sgindex)
+{
+  if (num_subgraph() <= sgindex)
+  {
+    assert(false);
+    return false;
+  }
+
+  // for direct pointer access
+  auto subgraphs = _model->subgraphs();
+  assert(subgraphs != nullptr);
+
+  _current_subgraph = subgraphs->Get(sgindex);
+  assert(_current_subgraph != nullptr);
+
+  return true;
+}
+
+template <typename T>
+VectorWrapper<T>::VectorWrapper(const flatbuffers::Vector<T> *ptr) : _vector(ptr)
+{
+  // Do nothing
+}
+
+template <typename T> uint32_t VectorWrapper<T>::size() const
+{
+  return null() ? 0 : _vector->size();
+}
+
+template <typename T> const T *VectorWrapper<T>::data() const
+{
+  return null() ? nullptr : _vector->data();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::begin() const
+{
+  return null() ? iterator(nullptr, 0) : _vector->begin();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::end() const
+{
+  return null() ? begin() : _vector->end();
+}
+
+template <typename T> typename VectorWrapper<T>::value_type VectorWrapper<T>::at(uint32_t i) const
+{
+  if (i >= size())
+  {
+    // TODO find better error message
+    assert(false && "Access to prohibited vector element");
+  }
+
+  return _vector->Get(i);
+}
+
+template <typename T>
+typename VectorWrapper<T>::value_type VectorWrapper<T>::operator[](uint32_t i) const
+{
+  return at(i);
+}
+
+template <typename T> bool VectorWrapper<T>::null() const { return _vector == nullptr; }
+template <typename T> bool VectorWrapper<T>::empty() const { return size() == 0; }
+
+#define REGISTER_WRAPPER(T) template class VectorWrapper<T>
+REGISTER_WRAPPER(flatbuffers::Offset<circle::SubGraph>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Buffer>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Tensor>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Operator>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::OperatorCode>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Metadata>);
+REGISTER_WRAPPER(int32_t);
+REGISTER_WRAPPER(uint8_t);
+#undef REGISTER_WRAPPER
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/core/reader/CircleMicroReaderHelper.cpp b/onert-micro/luci-interpreter/src/core/reader/CircleMicroReaderHelper.cpp
new file mode 100644 (file)
index 0000000..8cc0058
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  if (opcode->deprecated_builtin_code() == 127)
+  {
+    assert(opcode->builtin_code() >= 127);
+    return opcode->builtin_code();
+  }
+  // There was no 255(-1) value in v0.3
+  assert(opcode->deprecated_builtin_code() != -1);
+  return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return (::circle::BuiltinOperator_MIN <= code && code <= ::circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+} // namespace circle
diff --git a/onert-micro/luci-interpreter/src/kernels/Add.cpp b/onert-micro/luci-interpreter/src/kernels/Add.cpp
new file mode 100644 (file)
index 0000000..07b6c20
--- /dev/null
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/add.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
+  : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+{
+}
+
+void Add::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  if (input1()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1);
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
+  }
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Add::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Add::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Add::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Add::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+void Add::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  constexpr int left_shift = 12;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [input1_multiplier, input1_shift, //
+             input2_multiplier, input2_shift, //
+             output_multiplier, output_shift, //
+             activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
+    const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
+    const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
+    const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, input1_multiplier, input1_shift);
+    const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input2_val, input2_multiplier, input2_shift);
+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      raw_sum, output_multiplier, output_shift);
+    const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
+    return static_cast<int16_t>(clamped_output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ArgMax.cpp b/onert-micro/luci-interpreter/src/kernels/ArgMax.cpp
new file mode 100644 (file)
index 0000000..f51476d
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/Utils.h"
+#include "PALArgMax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params)
+  : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+{
+}
+
+void ArgMax::configure()
+{
+  assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64);
+  assert(input()->shape().num_dims() >= 1);
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+  Shape output_shape(num_dims - 1);
+
+  // If axis value is negative, then update by adding input_shape's num_dims.
+  // If updated value also negative, then assert.
+  assert(axis()->shape().num_elements() == 1);
+  int axis_value = getTensorData<int32_t>(axis())[0];
+  if (axis_value < 0)
+    axis_value = axis_value + num_dims;
+  assert(axis_value >= 0);
+
+  int j = 0;
+  for (int i = 0; i < num_dims; i++)
+  {
+    if (i == axis_value)
+      continue;
+    output_shape.dim(j++) = input_shape.dim(i);
+  }
+
+  assert(output()->element_type() == _params.output_type);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void ArgMax::execute() const
+{
+
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                    \
+  luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+                                  getTensorData<axis_type>(axis()), getTensorShape(output()), \
+                                  getTensorData<output_type>(output()), std::greater<data_type>())
+  if (axis()->element_type() == DataType::S32)
+  {
+    switch (_params.output_type)
+    {
+      case DataType::S32:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int32_t, int32_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
+            break;
+          default:
+            assert(false && "Unsupported input type.");
+        }
+        break;
+      case DataType::S64:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int32_t, int64_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
+            break;
+          default:
+            assert(false && "Unsupported input type.");
+        }
+        break;
+      default:
+        assert(false && "Unsupported output type.");
+    }
+  }
+  else
+  {
+    switch (_params.output_type)
+    {
+      case DataType::S32:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int64_t, int32_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
+            break;
+          default:
+            assert(false && "Unsupported input type.");
+        }
+        break;
+      case DataType::S64:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int64_t, int64_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
+            break;
+          default:
+            assert(false && "Unsupported input type.");
+        }
+        break;
+      default:
+        assert(false && "Unsupported output type.");
+    }
+  }
+#undef TF_LITE_ARG_MAX
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/AveragePool2D.cpp b/onert-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
new file mode 100644 (file)
index 0000000..498334c
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALAveragePool2d.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                             const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
+{
+}
+
+void AveragePool2D::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    assert(false && "Input Tensor and Output Tensor Type must be same");
+  }
+  if (input()->shape().num_dims() != 4)
+  {
+    assert(false && "Input Tensor Shape must be 4-D");
+  }
+  const Shape &input_shape = input()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t depth = input_shape.dim(3);
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+  _padding_height =
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+  _padding_width =
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+  else if (input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize({batches, output_height, output_width, depth});
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(output()));
+}
+
+void AveragePool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalSInt16();
+      break;
+    case DataType::S8:
+      evalSInt8();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void AveragePool2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()),
+                                     getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void AveragePool2D::evalQuantized() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::AveragePool(params, getTensorShape(input()),
+                                     getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                     getTensorData<uint8_t>(output()));
+}
+
+void AveragePool2D::evalSInt8() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::AveragePool<int8_t>(
+    params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void AveragePool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::AveragePool(
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/BatchMatMul.cpp b/onert-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
new file mode 100644 (file)
index 0000000..065e444
--- /dev/null
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+  tflite::RuntimeShape swapped_shape(shape);
+  const int32_t dims = shape.DimensionsCount();
+  swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+  swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+  return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+                         Tensor *y_tmp, const BatchMatMulParams &params)
+  : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+  auto lhs = x();
+  auto rhs = y();
+  auto adj_x = params().adj_x;
+  auto adj_y = params().adj_y;
+
+  // TODO Support non-float types
+  if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+    assert(false && "Unsupported type.");
+
+  LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+  auto lhs_rank = lhs->shape().num_dims();
+  auto rhs_rank = rhs->shape().num_dims();
+  LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+  LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+  auto lhs_scratchpad = temp_lhs();
+  auto rhs_scratchpad = temp_rhs();
+  luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+                                              getTensorShape(rhs));
+
+  auto output_rank = std::max(lhs_rank, rhs_rank);
+
+  auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+  auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+  // Ensure any batch dimensions obey broacasting rules.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    if (lhs_dim != rhs_dim)
+    {
+      if (lhs_dim != 1)
+      {
+        LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+      }
+    }
+  }
+
+  // Ensure other dimensions work for matrix multiplication.
+  int accum_dim_lhs =
+    adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+  int accum_dim_rhs =
+    adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+  LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+  Shape output_shape(output_rank);
+  // Fill in any broadcast dimensions.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    int broadcast_dim = lhs_dim;
+    if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+    {
+      broadcast_dim = rhs_dim;
+    }
+    output_shape.dim(i) = broadcast_dim;
+  }
+  // Fill in the matmul dimensions.
+  int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+  int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+  output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+  output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+  output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+  tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+  tflite::RuntimeShape shape(getTensorShape(tensor_in));
+  tflite::TransposeParams params;
+  int rank = shape.DimensionsCount();
+  params.perm_count = rank;
+  for (int i = 0; i < rank - 2; ++i)
+  {
+    params.perm[i] = i;
+  }
+  // Transpose the last two dimensions.
+  params.perm[rank - 2] = rank - 1;
+  params.perm[rank - 1] = rank - 2;
+  transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+  transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+  switch (tensor_in->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+                                       transposed_shape, getTensorData<float>(tensor_out));
+      break;
+    default:
+      assert(false && "Only suppport fp32 BatchMatMul for now.");
+  }
+}
+
+void BatchMatMul::execute() const
+{
+  auto lhs = x();
+  auto rhs = y();
+
+  bool adj_x = params().adj_x;
+  bool adj_y = params().adj_y;
+
+  auto orig_lhs_shape = getTensorShape(lhs);
+  auto orig_rhs_shape = getTensorShape(rhs);
+
+  auto rhs_tensor = adj_y ? rhs : temp_rhs();
+  auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+  if (not adj_y)
+  {
+    TransposeRowsColumns(rhs, temp_rhs());
+  }
+  if (adj_x)
+  {
+    TransposeRowsColumns(lhs, temp_lhs());
+  }
+  tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+  tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+                                        getTensorData<float>(lhs_tensor), getTensorShape(output()),
+                                        getTensorData<float>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/onert-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
new file mode 100644 (file)
index 0000000..9ebe289
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                               Tensor *output)
+  : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *crops_data = crops()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+  for (int i = 0; i < spatial_dims_num * 2; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+  }
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+    output_batch_size = output_batch_size / block_shape_data[i];
+    output_shape.dim(i + 1) =
+      input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+  }
+
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Builders.h b/onert-micro/luci-interpreter/src/kernels/Builders.h
new file mode 100644 (file)
index 0000000..b209b2e
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
+
+#include "KernelBuilder.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+
+#define REGISTER_KERNEL(builtin_operator, name)                        \
+  void configure_kernel_Circle##name(const circle::Operator *cur_op,   \
+                                     BaseRuntimeGraph *runtime_graph); \
+                                                                       \
+  void execute_kernel_Circle##name(const circle::Operator *cur_op,     \
+                                   BaseRuntimeGraph *runtime_graph, bool is_inplace);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
diff --git a/onert-micro/luci-interpreter/src/kernels/CMakeLists.txt b/onert-micro/luci-interpreter/src/kernels/CMakeLists.txt
new file mode 100644 (file)
index 0000000..8e5c02c
--- /dev/null
@@ -0,0 +1,40 @@
+set(SOURCES
+        BinaryOpCommon.h
+        Utils.h
+        Utils.cpp
+        Builders.h
+        KernelBuilder.h
+        KernelBuilder.cpp)
+
+macro(REGISTER_KERNEL OPERATOR, NODE)
+  list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
+
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+macro(REGISTER_KERNEL OPERATOR, NODE)
+  list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
diff --git a/onert-micro/luci-interpreter/src/kernels/Cast.cpp b/onert-micro/luci-interpreter/src/kernels/Cast.cpp
new file mode 100644 (file)
index 0000000..82b187a
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/Utils.h"
+
+namespace
+{
+
+using namespace luci_interpreter;
+using namespace luci_interpreter::kernels;
+
+template <typename InT, typename OutT>
+void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
+{
+  std::transform(in_data, in_data + elements_count, out_data,
+                 [](InT a) { return static_cast<OutT>(a); });
+}
+
+template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
+{
+  auto const out_type = out_tensor->element_type();
+  auto const elements_count = out_tensor->shape().num_elements();
+
+  switch (out_type)
+  {
+    case DataType::U8:
+      cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
+      break;
+    case DataType::U16:
+      cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
+      break;
+    case DataType::U32:
+      cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
+      break;
+    case DataType::U64:
+      cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
+      break;
+    case DataType::S8:
+      cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
+      break;
+    case DataType::S16:
+      cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
+      break;
+    case DataType::S32:
+      cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
+      break;
+    case DataType::S64:
+      cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
+      break;
+    case DataType::FLOAT32:
+      cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
+      break;
+    case DataType::BOOL:
+      cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
+      break;
+    default:
+      assert(false && "Unsupported output type.");
+  }
+}
+
+void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
+{
+  auto in_type = in_tensor->element_type();
+
+  switch (in_type)
+  {
+    case DataType::U8:
+      cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
+      break;
+    case DataType::U16:
+      cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
+      break;
+    case DataType::U32:
+      cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
+      break;
+    case DataType::U64:
+      cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
+      break;
+    case DataType::S8:
+      cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
+      break;
+    case DataType::S16:
+      cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
+      break;
+    case DataType::S32:
+      cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
+      break;
+    case DataType::S64:
+      cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
+      break;
+    case DataType::FLOAT32:
+      cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
+      break;
+    case DataType::BOOL:
+      cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
+      break;
+    default:
+      assert(false && "Unsupported input type.");
+  }
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Cast::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() != DataType::Unknown);
+  LUCI_INTERPRETER_CHECK(output()->element_type() != DataType::Unknown);
+
+  const Shape &shape = input()->shape();
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(shape);
+}
+
+void Cast::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  cast_from_tensor_to_tensor(input(), output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Concatenation.cpp b/onert-micro/luci-interpreter/src/kernels/Concatenation.cpp
new file mode 100644 (file)
index 0000000..a9eff09
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+template <typename T>
+void evalGeneric(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph, bool)
+{
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(output_index != -1);
+
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const auto *options = cur_op->builtin_options_as_ConcatenationOptions();
+
+  int axis = options->axis();
+  if (axis < 0)
+    axis += Tensor::num_dims(output);
+
+  const auto input_sizes = cur_op->inputs()->size();
+
+  std::vector<const T *> all_input_data;
+  std::vector<tflite::RuntimeShape> all_shape;
+  std::vector<tflite::RuntimeShape *> all_shape_ptr;
+
+  all_input_data.reserve(input_sizes);
+  all_shape.reserve(input_sizes);
+  all_shape_ptr.reserve(input_sizes);
+
+  for (int32_t i = 0; i < input_sizes; ++i)
+  {
+    auto input_index = cur_op->inputs()->operator[](i);
+    const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+
+    auto *data = reinterpret_cast<const T *>(runtime_graph->getDataByTensor(tensor));
+
+    all_input_data.push_back(data);
+    all_shape.push_back(kernels::getTensorShape(tensor));
+  }
+
+  for (tflite::RuntimeShape &shape : all_shape)
+  {
+    all_shape_ptr.push_back(&shape);
+  }
+
+  auto *output_data = reinterpret_cast<T *>(runtime_graph->getDataByTensor(output));
+
+  // kernels::VectorOfTensors<T, true> inputs(_inputs);
+  tflite::ConcatenationParams params{};
+  params.axis = axis;
+  params.inputs_count = input_sizes;
+  tflite::reference_ops::Concatenation(params, all_shape_ptr.data(), all_input_data.data(),
+                                       kernels::getTensorShape(output), output_data);
+}
+
+} // namespace
+
+void configure_kernel_CircleConcatenation(const circle::Operator *cur_op,
+                                          BaseRuntimeGraph *runtime_graph)
+{
+  const int num_inputs = cur_op->inputs()->size();
+  LUCI_INTERPRETER_CHECK(num_inputs > 0);
+
+  auto input_index = cur_op->inputs()->operator[](0);
+  auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto *t0 = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto *output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const auto *params = cur_op->builtin_options_as_ConcatenationOptions();
+
+  // TODO: Support concat with fused activation function
+  LUCI_INTERPRETER_CHECK(luci_actfunc(params->fused_activation_function()) == FusedActFunc::NONE);
+
+  int axis = params->axis();
+  if (axis < 0)
+    axis += Tensor::num_dims(t0);
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis < Tensor::num_dims(t0));
+
+  int32_t sum_axis = Tensor::dim(t0, axis);
+  for (int i = 1; i < num_inputs; ++i)
+  {
+    input_index = cur_op->inputs()->operator[](i);
+    const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(tensor) == Tensor::element_type(t0));
+    LUCI_INTERPRETER_CHECK(Tensor::num_dims(tensor) == Tensor::num_dims(t0));
+    for (int d = 0; d < Tensor::num_dims(t0); ++d)
+    {
+      if (d == axis)
+      {
+        sum_axis += Tensor::dim(tensor, axis);
+      }
+      else
+      {
+        LUCI_INTERPRETER_CHECK(Tensor::dim(tensor, d) == Tensor::dim(t0, d));
+      }
+    }
+  }
+
+#ifndef DIS_QUANT
+  // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+  // should be the same
+  for (int i = 1; i < num_inputs; ++i)
+  {
+    input_index = cur_op->inputs()->operator[](i);
+    const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+    if (Tensor::element_type(tensor) == DataType::S8)
+    {
+      LUCI_INTERPRETER_CHECK(Tensor::quantized_dimension(tensor) ==
+                             Tensor::quantized_dimension(output));
+
+      LUCI_INTERPRETER_CHECK(Tensor::zero_points(tensor).size() == Tensor::scales(tensor).size());
+      LUCI_INTERPRETER_CHECK(Tensor::zero_points(tensor) == Tensor::zero_points(output));
+      LUCI_INTERPRETER_CHECK(Tensor::scales(tensor) == Tensor::scales(output));
+    }
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleConcatenation(const circle::Operator *cur_op,
+                                        BaseRuntimeGraph *runtime_graph, bool is_inplace)
+{
+  int num_inputs = cur_op->inputs()->size();
+  LUCI_INTERPRETER_CHECK(num_inputs > 0);
+
+  const auto input_index = cur_op->inputs()->operator[](0);
+  assert(input_index != -1);
+  const auto *t0 = runtime_graph->getCircleTensorByIndex(input_index);
+
+  switch (Tensor::element_type(t0))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalGeneric<float>(cur_op, runtime_graph, is_inplace);
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      evalGeneric<int8_t>(cur_op, runtime_graph, is_inplace);
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>(cur_op, runtime_graph, is_inplace);
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>(cur_op, runtime_graph, is_inplace);
+      break;
+#endif
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Concatenation.test.cpp b/onert-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
new file mode 100644 (file)
index 0000000..441ecfe
--- /dev/null
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/Concatenation.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ConcatenationTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ConcatenationTest, Float)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  // Try different 'axis' and expect different results.
+  {
+    params.axis = 0;
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    for (auto t : kernel.getOutputTensors())
+    {
+      _memory_manager->allocate_memory(*t);
+    }
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+  }
+  {
+    params.axis = -2; // Same as '0'.
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+  }
+  {
+    params.axis = 1;
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+  }
+  {
+    params.axis = -1; // Same as '1'.
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+  }
+}
+
+TEST_F(ConcatenationTest, Input_Number_Check_NEG)
+{
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Invalid_Axis_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -3;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
+{
+  std::vector<uint8_t> input1_data{1, 2, 3, 4};
+  std::vector<int8_t> input2_data{5, 6, 7, 8};
+  Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  int quantized_dimension = 3;
+  std::vector<float> scales{0.1, 0.2, 0.3};
+  std::vector<int32_t> zero_points{1, -1, 1};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4};
+  std::vector<float> input2_data{5, 6, 7, 8};
+  float scale = 0.1;
+  int32_t zero_point_1 = 1;
+  int32_t zero_point_2 = -1;
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+// TODO: Remove this test when concat w/ fused_activation is supported
+TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = 1;
+  params.activation = luci::FusedActFunc::RELU;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
diff --git a/onert-micro/luci-interpreter/src/kernels/Conv2D.cpp b/onert-micro/luci-interpreter/src/kernels/Conv2D.cpp
new file mode 100644 (file)
index 0000000..7045249
--- /dev/null
@@ -0,0 +1,524 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALConv2d.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+int32_t compute_padding_h(const circle::Tensor *input, const circle::Tensor *filter,
+                          const circle::Conv2DOptions *options)
+{
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t filter_height = Tensor::dim(filter, 1);
+  const int32_t output_height =
+    kernels::computeOutputSize(luci_padding(options->padding()), input_height, filter_height,
+                               options->stride_h(), options->dilation_h_factor());
+
+  const auto padding_height = kernels::computePadding(
+    options->stride_h(), options->dilation_h_factor(), input_height, filter_height, output_height);
+  return padding_height;
+}
+
+int32_t compute_padding_w(const circle::Tensor *input, const circle::Tensor *filter,
+                          const circle::Conv2DOptions *options)
+{
+  const int32_t input_width = Tensor::dim(input, 2);
+  const int32_t filter_width = Tensor::dim(filter, 2);
+  const int32_t output_width =
+    kernels::computeOutputSize(luci_padding(options->padding()), input_width, filter_width,
+                               options->stride_w(), options->dilation_w_factor());
+
+  const auto padding_width = kernels::computePadding(
+    options->stride_w(), options->dilation_w_factor(), input_width, filter_width, output_width);
+
+  return padding_width;
+}
+
+#ifndef DIS_FLOAT
+
+void evalFloat(const circle::Tensor *input, const circle::Tensor *filter,
+               const circle::Tensor *bias, const circle::Tensor *output,
+               const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  float activation_min{};
+  float activation_max{};
+  kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+                                    &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = compute_padding_h(input, filter, options);
+  params.padding_values.width = compute_padding_w(input, filter, options);
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.dilation_height_factor = options->dilation_h_factor();
+  params.dilation_width_factor = options->dilation_w_factor();
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *filter_data = runtime_graph->getConstDataByTensor(filter);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  luci_interpreter_pal::Conv(
+    params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+    kernels::getTensorShape(filter), kernels::getTensorData<float>(filter_data),
+    kernels::getTensorShape(bias), kernels::getTensorData<float>(bias_data),
+    kernels::getTensorShape(output), kernels::getTensorData<float>(output_data),
+    kernels::getTensorShape(nullptr), nullptr);
+}
+
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *filter,
+                   const circle::Tensor *bias, const circle::Tensor *output,
+                   const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_scale = static_cast<double>(Tensor::scale(input));
+  const auto filter_scale = static_cast<double>(Tensor::scale(filter));
+  const auto output_scale = static_cast<double>(Tensor::scale(output));
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = compute_padding_h(input, filter, options);
+  params.padding_values.width = compute_padding_w(input, filter, options);
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.dilation_height_factor = options->dilation_h_factor();
+  params.dilation_width_factor = options->dilation_w_factor();
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -Tensor::zero_point(input);    // Note the '-'.
+  params.weights_offset = -Tensor::zero_point(filter); // Note the '-'.
+  params.output_offset = Tensor::zero_point(output);
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *filter_data = runtime_graph->getConstDataByTensor(filter);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  luci_interpreter_pal::Conv(
+    params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
+    kernels::getTensorShape(filter), kernels::getTensorData<uint8_t>(filter_data),
+    kernels::getTensorShape(bias), kernels::getTensorData<int32_t>(bias_data),
+    kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data),
+    kernels::getTensorShape(nullptr), nullptr);
+}
+
+void evalQuantizedPerChannel(const circle::Tensor *input, const circle::Tensor *filter,
+                             const circle::Tensor *bias, const circle::Tensor *output,
+                             const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  auto *raw_input_data = runtime_graph->getDataByTensor(input);
+  auto *raw_output_data = runtime_graph->getDataByTensor(output);
+
+  auto *raw_filter_data = runtime_graph->getConstDataByTensor(filter);
+  auto *raw_bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  const auto *input_data = kernels::getTensorData<uint8_t>(raw_input_data);
+  const auto *filter_data = kernels::getTensorData<uint8_t>(raw_filter_data);
+  const auto *bias_data = kernels::getTensorData<int32_t>(raw_bias_data);
+  auto *output_data = kernels::getTensorData<uint8_t>(raw_output_data);
+
+  const int32_t batches = Tensor::dim(input, 0);
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t input_width = Tensor::dim(input, 2);
+  const int32_t input_depth = Tensor::dim(input, 3);
+  const int32_t output_depth = Tensor::dim(filter, 0);
+  const int32_t filter_height = Tensor::dim(filter, 1);
+  const int32_t filter_width = Tensor::dim(filter, 2);
+  const int32_t output_height = Tensor::dim(output, 1);
+  const int32_t output_width = Tensor::dim(output, 2);
+
+  const int32_t stride_height = options->stride_h();
+  const int32_t stride_width = options->stride_w();
+  const int32_t dilation_height_factor = options->dilation_h_factor();
+  const int32_t dilation_width_factor = options->dilation_w_factor();
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale = kernels::getQuantizedConvolutionMultiplers(
+    Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
+
+  const std::vector<kernels::ChannelQuantMultipliers> multipliers_raw =
+    kernels::quantizeMultipliers(effective_output_scale);
+  kernels::BroadcastableWrapper<kernels::ChannelQuantMultipliers> quant_multipliers(
+    multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin =
+            out_y * stride_height - compute_padding_h(input, filter, options);
+          const int32_t in_x_origin =
+            out_x * stride_width - compute_padding_w(input, filter, options);
+          int32_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const uint8_t input_val =
+                    input_data[kernels::calcOffset(input, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[kernels::calcOffset(filter, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int32_t>(input_val - Tensor::zero_point(input)) *
+                         static_cast<int32_t>(filter_val - Tensor::zero_points(filter)[out_c]);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+
+          scaled_acc += Tensor::zero_point(output);
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+          output_data[kernels::calcOffset(output, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void evalQuantizedS8PerChannel(const circle::Tensor *input, const circle::Tensor *filter,
+                               const circle::Tensor *bias, const circle::Tensor *output,
+                               const circle::Conv2DOptions *options,
+                               BaseRuntimeGraph *runtime_graph)
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = compute_padding_h(input, filter, options);
+  params.padding_values.width = compute_padding_w(input, filter, options);
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.dilation_height_factor = options->dilation_h_factor();
+  params.dilation_width_factor = options->dilation_w_factor();
+  // The kernel expects filter zero points to be negated.
+  params.input_offset = -Tensor::zero_point(input); // Note the '-'.
+  params.weights_offset = 0;                        // Unused in tflite code
+  params.output_offset = Tensor::zero_point(output);
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales = kernels::getQuantizedConvolutionMultiplers(
+    Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
+
+  std::vector<kernels::ChannelQuantMultipliers> quant_multipliers =
+    kernels::quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](kernels::ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](kernels::ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *filter_data = runtime_graph->getConstDataByTensor(filter);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  luci_interpreter_pal::ConvPerChannel(
+    params, multipliers.data(), shifts.data(), kernels::getTensorShape(input),
+    kernels::getTensorData<int8_t>(input_data), kernels::getTensorShape(filter),
+    kernels::getTensorData<int8_t>(filter_data), kernels::getTensorShape(bias),
+    kernels::getTensorData<int32_t>(bias_data), kernels::getTensorShape(output),
+    kernels::getTensorData<int8_t>(output_data), kernels::getTensorShape(nullptr), nullptr);
+}
+
+void evalQuantizedS16(const circle::Tensor *input, const circle::Tensor *filter,
+                      const circle::Tensor *bias, const circle::Tensor *output,
+                      const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  auto *raw_input_data = runtime_graph->getDataByTensor(input);
+  auto *raw_output_data = runtime_graph->getDataByTensor(output);
+
+  auto *raw_filter_data = runtime_graph->getConstDataByTensor(filter);
+  auto *raw_bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  const auto *input_data = kernels::getTensorData<uint8_t>(raw_input_data);
+  const auto *filter_data = kernels::getTensorData<uint8_t>(raw_filter_data);
+  const auto *bias_data = kernels::getTensorData<int32_t>(raw_bias_data);
+  auto *output_data = kernels::getTensorData<uint8_t>(raw_output_data);
+
+  const int32_t batches = Tensor::dim(input, 0);
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t input_width = Tensor::dim(input, 2);
+  const int32_t input_depth = Tensor::dim(input, 3);
+  const int32_t output_depth = Tensor::dim(filter, 0);
+  const int32_t filter_height = Tensor::dim(filter, 1);
+  const int32_t filter_width = Tensor::dim(filter, 2);
+  const int32_t output_height = Tensor::dim(output, 1);
+  const int32_t output_width = Tensor::dim(output, 2);
+
+  const int32_t stride_height = options->stride_h();
+  const int32_t stride_width = options->stride_w();
+  const int32_t dilation_height_factor = options->dilation_h_factor();
+  const int32_t dilation_width_factor = options->dilation_w_factor();
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale = kernels::getQuantizedConvolutionMultiplers(
+    Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
+
+  const std::vector<kernels::ChannelQuantMultipliers> multipliers_raw =
+    kernels::quantizeMultipliers(effective_output_scale);
+  kernels::BroadcastableWrapper<kernels::ChannelQuantMultipliers> multipliers(multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin =
+            out_y * stride_height - compute_padding_h(input, filter, options);
+          const int32_t in_x_origin =
+            out_x * stride_width - compute_padding_w(input, filter, options);
+          int64_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const int16_t input_val =
+                    input_data[kernels::calcOffset(input, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[kernels::calcOffset(filter, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[kernels::calcOffset(output, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+#endif // DIS_QUANT
+
+} // namespace
+
+void configure_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto filter_index = cur_op->inputs()->operator[](1);
+  const auto bias_index = cur_op->inputs()->operator[](2);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(filter_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto filter = runtime_graph->getCircleTensorByIndex(filter_index);
+  const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(filter != nullptr);
+
+  auto filter_data = runtime_graph->getConstDataByTensor(filter);
+
+  assert(filter_data != nullptr);
+
+  const auto *options = cur_op->builtin_options_as_Conv2DOptions();
+
+  if (Tensor::element_type(input) == DataType::FLOAT32 &&
+      Tensor::element_type(filter) == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::FLOAT32);
+  }
+#ifndef DIS_QUANT
+  else if (Tensor::element_type(input) == DataType::U8 &&
+           Tensor::element_type(filter) == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S32);
+  }
+  else if (Tensor::element_type(input) == DataType::S8 &&
+           Tensor::element_type(filter) == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S32);
+    LUCI_INTERPRETER_CHECK(Tensor::num_dims(filter) == 4);
+    LUCI_INTERPRETER_CHECK(Tensor::scales(filter).size() ==
+                           static_cast<size_t>(Tensor::dim(filter, 0)));
+    for (auto zerop : Tensor::zero_points(filter))
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+  }
+  else if (Tensor::element_type(input) == DataType::S16 &&
+           Tensor::element_type(filter) == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S64);
+  }
+#endif // DIS_QUANT
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == Tensor::element_type(input));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) == 4 && Tensor::num_dims(filter) == 4);
+
+  const int32_t output_depth = Tensor::dim(filter, 0);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(filter, 3) == Tensor::dim(input, 3));
+
+  LUCI_INTERPRETER_CHECK(bias == nullptr ||
+                         (Tensor::num_dims(bias) == 1 && Tensor::dim(bias, 0) == output_depth));
+
+  switch (options->fused_activation_function())
+  {
+    case circle::ActivationFunctionType_NONE:
+    case circle::ActivationFunctionType_RELU:
+    case circle::ActivationFunctionType_RELU6:
+    case circle::ActivationFunctionType_RELU_N1_TO_1:
+      break;
+    default:
+      assert(false && "Unsupported fused activation");
+  }
+}
+
+void execute_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+                                 bool)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto weight_index = cur_op->inputs()->operator[](1);
+  const auto bias_index = cur_op->inputs()->operator[](2);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(weight_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+  const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(weights != nullptr);
+  assert(output != nullptr);
+
+  const auto *options = cur_op->builtin_options_as_Conv2DOptions();
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      if (Tensor::element_type(weights) == DataType::FLOAT32)
+      {
+        evalFloat(input, weights, bias, output, options, runtime_graph);
+        break;
+      }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::U8:
+      if (Tensor::scales(weights).size() == 1)
+      {
+        evalQuantized(input, weights, bias, output, options, runtime_graph);
+      }
+      else if (Tensor::scales(weights).size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(Tensor::num_dims(weights) == 4);
+        LUCI_INTERPRETER_CHECK(Tensor::scales(weights).size() ==
+                               static_cast<size_t>(Tensor::dim(weights, 0)));
+        evalQuantizedPerChannel(input, weights, bias, output, options, runtime_graph);
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel(input, weights, bias, output, options, runtime_graph);
+      break;
+    case DataType::S16:
+      evalQuantizedS16(input, weights, bias, output, options, runtime_graph);
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Conv2D.test.cpp b/onert-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
new file mode 100644 (file)
index 0000000..b23f9d4
--- /dev/null
@@ -0,0 +1,710 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO enable it
+#if 0
+#include "kernels/Conv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Conv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Conv2DTest, Float)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatPointwise)
+{
+  Shape input_shape{1, 2, 2, 2};
+  Shape filter_shape{2, 1, 1, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1, 2, // row = 0, col = 0
+    3, 4, // row = 0, col = 1
+    5, 6, // row = 1, col = 0
+    7, 8, // row = 1, col = 1
+  };
+  std::vector<float> filter_data{
+    -1, 2, // out = 0
+    -3, 4, // out = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    4, 7,  6,  9,  // row = 0
+    8, 11, 10, 13, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatCheck)
+{
+  Shape input_shape{2, 2, 4, 1};
+  Shape filter_shape{3, 2, 2, 1};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+    // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8)
+{
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt16)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  std::vector<float> ref_output_data{
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, SInt16_CWQ_weights)
+{
+  Shape input_shape{1, 2, 2, 2};  // Batch x H x W x C
+  Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
+  Shape bias_shape{3};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
+
+  std::vector<float> input_data{
+    1, 2, // row = 0, col 0
+    3, 4, // row = 0, col 1
+    5, 6, // row = 1, col 0
+    7, 8, // row = 1, col 1
+  };
+  std::vector<float> filter_data{
+    4, -3, // out = 0
+    1, -3, // out = 1
+    5, -3, // out = 2
+  };
+  std::vector<float> bias_data{1, 10, 5};
+  std::vector<float> ref_output_data{
+    0, 5, 4,  // row 0, col 0
+    1, 1, 8,  // row 0, col 1
+    3, 0, 12, // row 1, col 0
+    5, 0, 16, // row 1, col 1
+  };
+
+  float input_scale = 0.25f;
+  float output_scale = 0.05f;
+  std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f};
+  std::vector<float> bias_scales;
+  for (int i = 0; i < filter_scales.size(); ++i)
+    bias_scales.push_back(filter_scales[i] * input_scale);
+  std::vector<int32_t> zerop = {0, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<uint8_t> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 4, 6, 1};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::TANH;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
diff --git a/onert-micro/luci-interpreter/src/kernels/DepthToSpace.cpp b/onert-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
new file mode 100644 (file)
index 0000000..937958b
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpace.h"
+#include "Utils.h"
+#include "PALDepthToSpace.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
+  : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+{
+}
+
+void DepthToSpace::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8)
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  const int32_t input_channels = input()->shape().dim(3);
+  int32_t output_height = input_height * block_size;
+  int32_t output_width = input_width * block_size;
+  int32_t output_channels = input_channels / block_size / block_size;
+
+  LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
+  LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
+  LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = output_channels;
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void DepthToSpace::execute() const
+{
+  tflite::DepthToSpaceParams op_params;
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported Type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/onert-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
new file mode 100644 (file)
index 0000000..201eaf3
--- /dev/null
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALDepthwiseConv2d.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
+                                 Tensor *output, Tensor *scratchpad,
+                                 const DepthwiseConv2DParams &params)
+  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void DepthwiseConv2D::configure()
+{
+  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (1) | float float  float float  |
+  // (2) | float int8   float float  | hybrid
+  // (3) | uint8 uint8  int32 uint8  | quantized
+  // (4) | int8  int8   int32 int8   | quantized per channel
+  // (5) | int16 int8   int64 int16  | quantized per channel 16x8
+  //
+  // We only support (1), (3) and (4) for now, and additionally the following:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (5) | int16 int16  int64 int16  |
+  //
+  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
+  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+                           filter()->scales().size());
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+  }
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  // Filter format: [1, H, W, O].
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t channels_out = filter_shape.dim(3);
+
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == channels_out));
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
+
+  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+                                   input_height, filter_height, output_height);
+  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+                                  filter_width, output_width);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize({batches, output_height, output_width, channels_out});
+
+  tflite::DepthwiseParams params{};
+
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
+}
+
+void DepthwiseConv2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      if (filter()->element_type() == DataType::FLOAT32)
+      {
+        evalFloat();
+        break;
+      }
+      assert(false && "Unsupported type.");
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(3)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void DepthwiseConv2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          for (int m = 0; m < depth_multiplier; ++m)
+          {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - _padding_width;
+            const int in_y_origin = (out_y * stride_height) - _padding_height;
+            int32_t acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                if (is_point_inside_image)
+                {
+                  int32_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+                  int32_t filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+                  acc += (filter_val - filter()->zero_points()[output_channel]) *
+                         (input_val - input()->zero_point());
+                }
+              }
+            }
+            if (bias_data)
+            {
+              acc += bias_data[output_channel];
+            }
+            int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
+            int output_shift = quant_multipliers[output_channel].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+            scaled_acc += output()->zero_point();
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+            output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
+              static_cast<uint8_t>(scaled_acc);
+          }
+        }
+      }
+    }
+  }
+}
+
+void DepthwiseConv2D::evalQuantized() const
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto filter_scale = static_cast<double>(filter()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+
+  params.padding_type = tflite::PaddingType::kSame;
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = 1; // unused in tflite code
+  params.output_shift = 0;      // unused in tflite code
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void DepthwiseConv2D::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          for (int32_t m = 0; m < depth_multiplier; ++m)
+          {
+            const int32_t out_c = m + in_c * depth_multiplier;
+            const int32_t in_y_origin = out_y * stride_height - _padding_height;
+            const int32_t in_x_origin = out_x * stride_width - _padding_width;
+            int64_t acc = 0;
+            for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+                const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+                if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+            if (bias_data != nullptr)
+            {
+              acc += bias_data[out_c];
+            }
+
+            int32_t output_multiplier = quant_multipliers[out_c].multiplier;
+            int output_shift = quant_multipliers[out_c].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+
+            output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Dequantize.cpp b/onert-micro/luci-interpreter/src/kernels/Dequantize.cpp
new file mode 100644 (file)
index 0000000..a097342
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 ||
+                         input()->element_type() == DataType::U8 ||
+                         input()->element_type() == DataType::S16);
+
+  LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+  if (input()->element_type() == DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+  tflite::DequantizationParams op_params;
+  op_params.zero_point = input()->zero_point();
+  op_params.scale = input()->scale();
+
+  switch (input()->element_type())
+  {
+    case DataType::U8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case DataType::S8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case DataType::S16:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int16_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Div.cpp b/onert-micro/luci-interpreter/src/kernels/Div.cpp
new file mode 100644 (file)
index 0000000..efa90f8
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/div.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
+  : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+{
+}
+
+void Div::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Div::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Div::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Div::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Div::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
+
+  int32_t output_multiplier{};
+  int output_shift{};
+
+  quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Elu.cpp b/onert-micro/luci-interpreter/src/kernels/Elu.cpp
new file mode 100644 (file)
index 0000000..0f0df02
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/Utils.h"
+
+#include "PALElu.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Elu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Elu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Equal.cpp b/onert-micro/luci-interpreter/src/kernels/Equal.cpp
new file mode 100644 (file)
index 0000000..b7cf8e5
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Equal::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Equal::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Equal::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                 y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Equal::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                         getTensorShape(y()), y_data,
+                                                         getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                          getTensorShape(y()), y_data, getTensorShape(output()),
+                                          output_data);
+  }
+}
+
+void Equal::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Exp.cpp b/onert-micro/luci-interpreter/src/kernels/Exp.cpp
new file mode 100644 (file)
index 0000000..ceb54e7
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/exp.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Exp::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Exp::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Exp::evalFloat() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ExpandDims.cpp b/onert-micro/luci-interpreter/src/kernels/ExpandDims.cpp
new file mode 100644 (file)
index 0000000..6c7e20b
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleExpandDims(const circle::Operator *cur_op,
+                                       BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto axis_index = cur_op->inputs()->operator[](1);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(axis_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto axis = runtime_graph->getCircleTensorByIndex(axis_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(axis != nullptr);
+  assert(output != nullptr);
+
+  auto axis_data = runtime_graph->getConstDataByTensor(axis);
+
+  int32_t axis_value;
+
+  switch (Tensor::element_type(axis))
+  {
+    case DataType::S32:
+      axis_value = *reinterpret_cast<int32_t *>(axis_data);
+      break;
+    case DataType::S64:
+      axis_value = static_cast<int32_t>(*reinterpret_cast<int64_t *>(axis_data));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+
+  if (axis_value < 0)
+  {
+    axis_value += Tensor::num_dims(input) + 1;
+  }
+
+  LUCI_INTERPRETER_CHECK(axis_value <= Tensor::num_dims(input) and axis_value >= 0);
+}
+
+void execute_kernel_CircleExpandDims(const circle::Operator *cur_op,
+                                     BaseRuntimeGraph *runtime_graph, bool is_inplace)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  if (is_inplace)
+  {
+    runtime_graph->makeInplaceOperation(input, output);
+    return;
+  }
+
+  // Just copy input to output
+  const auto input_data = runtime_graph->getDataByTensor(input);
+  auto output_data = runtime_graph->getDataByTensor(output);
+
+  assert(input_data != nullptr);
+  assert(output_data != nullptr);
+
+  const size_t element_size = getDataTypeSize(Tensor::element_type(input));
+  const int32_t num_elements = Tensor::num_elements(input);
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp b/onert-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
new file mode 100644 (file)
index 0000000..1ebbcd8
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO enable it
+#if 0
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {-1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<float> axis_value = {1.0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {3};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
diff --git a/onert-micro/luci-interpreter/src/kernels/Fill.cpp b/onert-micro/luci-interpreter/src/kernels/Fill.cpp
new file mode 100644 (file)
index 0000000..addab74
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/Utils.h"
+#include "PALFill.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
+  : Kernel({dims, value}, {output})
+{
+}
+
+template <typename T> void Fill::configureShape()
+{
+  const auto dims_data = getTensorData<T>(dims());
+  Shape output_shape(dims()->shape().dim(0));
+
+  for (int i = 0; i < output_shape.num_dims(); ++i)
+  {
+    T data = dims_data[i];
+    if (data < 0)
+      assert(false && "Fill dimensions must be >= 0");
+
+    output_shape.dim(i) = data;
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void Fill::configure()
+{
+  const auto dims_shape = dims()->shape();
+  const auto value_shape = value()->shape();
+
+  // Make sure the 1st input tensor is 1-D
+  LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
+
+  // Make sure the 1st input tensor is int32 or int64
+  LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
+                         dims()->element_type() == DataType::S64);
+
+  // Make sure the 2nd input tensor is a scalar
+  LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
+
+  // Check zero point and scale for S16 and S8
+  if (value()->element_type() == DataType::S16 or value()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
+    LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
+
+    if (value()->element_type() == DataType::S16)
+      LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
+  }
+  // Resize output
+  switch (dims()->element_type())
+  {
+    case DataType::S32:
+      configureShape<int32_t>();
+      break;
+    case DataType::S64:
+      configureShape<int64_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Fill::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::S8:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
+                                  getTensorShape(output()), getTensorData<int8_t>(output()));
+      break;
+    case DataType::S16:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
+                                  getTensorShape(output()), getTensorData<int16_t>(output()));
+      break;
+    case DataType::S32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
+                                  getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::S64:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
+                                  getTensorShape(output()), getTensorData<int64_t>(output()));
+      break;
+    case DataType::FLOAT32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Floor.cpp b/onert-micro/luci-interpreter/src/kernels/Floor.cpp
new file mode 100644 (file)
index 0000000..f7871b5
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/floor.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Floor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Floor::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Floor::evalFloat() const
+{
+  tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/FloorDiv.cpp b/onert-micro/luci-interpreter/src/kernels/FloorDiv.cpp
new file mode 100644 (file)
index 0000000..6a8631a
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+void FloorDiv::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorDiv::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void FloorDiv::evalFloat() const
+{
+  auto FloorDivFunc = [](float x, float y) -> float {
+    return std::floor(static_cast<double>(x) / static_cast<double>(y));
+  };
+
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+
+  // Check the denominator
+  for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+  }
+
+  if (x()->shape() != y()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+  else
+  {
+    tflite::reference_ops::BinaryFunction<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/FullyConnected.cpp b/onert-micro/luci-interpreter/src/kernels/FullyConnected.cpp
new file mode 100644 (file)
index 0000000..1f695af
--- /dev/null
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALFullyConnected.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+void evalFloat(const circle::Tensor *input, const circle::Tensor *weights,
+               const circle::Tensor *bias, const circle::Tensor *output,
+               const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  float activation_min{};
+  float activation_max{};
+  kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+                                    &activation_min, &activation_max);
+
+  tflite::FullyConnectedParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+  params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *weights_data = runtime_graph->getConstDataByTensor(weights);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  assert(input_data != nullptr);
+  assert(weights_data != nullptr);
+  assert(output_data != nullptr);
+
+  tflite::reference_ops::FullyConnected(
+    params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+    kernels::getTensorShape(weights), kernels::getTensorData<float>(weights_data),
+    kernels::getTensorShape(bias), kernels::getTensorData<float>(bias_data),
+    kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+}
+
+#ifndef DIS_QUANT
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *weights,
+                   const circle::Tensor *bias, const circle::Tensor *output,
+                   const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier = kernels::getQuantizedConvolutionMultipler(
+    Tensor::scale(input), Tensor::scale(weights), Tensor::scale(output));
+  kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &output_activation_min,
+                                             &output_activation_max);
+
+  int32_t input_offset = -Tensor::zero_point(input);
+  int32_t filter_offset = -Tensor::zero_point(weights);
+  int32_t output_offset = Tensor::zero_point(output);
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *weights_data = runtime_graph->getConstDataByTensor(weights);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  assert(input_data != nullptr);
+  assert(weights_data != nullptr);
+  assert(output_data != nullptr);
+
+  tflite::reference_ops::FullyConnected(
+    op_params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
+    kernels::getTensorShape(weights), kernels::getTensorData<uint8_t>(weights_data),
+    kernels::getTensorShape(bias), kernels::getTensorData<int32_t>(bias_data),
+    kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data));
+}
+
+void evalQuantizedS8(const circle::Tensor *input, const circle::Tensor *weights,
+                     const circle::Tensor *bias, const circle::Tensor *output,
+                     const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier = kernels::getQuantizedConvolutionMultipler(
+    Tensor::scale(input), Tensor::scale(weights), Tensor::scale(output));
+  kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &output_activation_min,
+                                             &output_activation_max);
+
+  int32_t input_offset = -Tensor::zero_point(input);
+  int32_t filter_offset = -Tensor::zero_point(weights);
+  int32_t output_offset = Tensor::zero_point(output);
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+
+  auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  auto *weights_data = runtime_graph->getConstDataByTensor(weights);
+  auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+  assert(input_data != nullptr);
+  assert(weights_data != nullptr);
+  assert(output_data != nullptr);
+
+  luci_interpreter_pal::FullyConnected<int8_t>(
+    op_params, kernels::getTensorShape(input), kernels::getTensorData<int8_t>(input_data),
+    kernels::getTensorShape(weights), kernels::getTensorData<int8_t>(weights_data),
+    kernels::getTensorShape(bias), kernels::getTensorData<int32_t>(bias_data),
+    kernels::getTensorShape(output), kernels::getTensorData<int8_t>(output_data));
+}
+#endif
+
+} // namespace
+
+// TODO think how remove unused param
+void configure_kernel_CircleFullyConnected(const circle::Operator *cur_op,
+                                           BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto weight_index = cur_op->inputs()->operator[](1);
+  const auto bias_index = cur_op->inputs()->operator[](2);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(weight_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+  const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(weights != nullptr);
+  assert(output != nullptr);
+
+#ifndef DIS_FLOAT
+  if (Tensor::element_type(weights) == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::FLOAT32)
+  }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+  else if (Tensor::element_type(weights) == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::U8);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::U8);
+    LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::S32)
+  }
+  else if (Tensor::element_type(weights) == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::S8);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::S8);
+    LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::S32)
+  }
+#endif // DIS_QUANT
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(weights) == 2);
+  LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::num_elements(bias) == Tensor::dim(weights, 0));
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(input) % Tensor::dim(weights, 1) == 0);
+
+  if (bias)
+    LUCI_INTERPRETER_CHECK(Tensor::num_elements(bias) == Tensor::dim(weights, 0));
+
+  const auto *options = cur_op->builtin_options_as_FullyConnectedOptions();
+
+  // TODO: handle with it
+  assert(options->keep_num_dims() == false);
+}
+
+// TODO think how remove unused param
+void execute_kernel_CircleFullyConnected(const circle::Operator *cur_op,
+                                         BaseRuntimeGraph *runtime_graph, bool)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto weight_index = cur_op->inputs()->operator[](1);
+  const auto bias_index = cur_op->inputs()->operator[](2);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(weight_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+  const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(weights != nullptr);
+  assert(output != nullptr);
+
+  const auto *options = cur_op->builtin_options_as_FullyConnectedOptions();
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_QUANT
+    case DataType::U8:
+      evalQuantized(input, weights, bias, output, options, runtime_graph);
+      break;
+    case DataType::S8:
+      evalQuantizedS8(input, weights, bias, output, options, runtime_graph);
+      break;
+#endif // DIS_QUANT
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalFloat(input, weights, bias, output, options, runtime_graph);
+      break;
+#endif // DIS_FLOAT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp b/onert-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
new file mode 100644 (file)
index 0000000..43fa994
--- /dev/null
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/FullyConnected.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+           std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<int8_t>(std::initializer_list<int32_t> input_shape,
+                   std::initializer_list<int32_t> weights_shape,
+                   std::initializer_list<int32_t> bias_shape,
+                   std::initializer_list<int32_t> output_shape,
+                   std::initializer_list<float> input_data,
+                   std::initializer_list<float> weights_data,
+                   std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+  std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+  std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class FullyConnectedTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
+
+TYPED_TEST(FullyConnectedTest, Simple)
+{
+  Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
+                   {
+                     -3, -5, 5, 4, 9, -2,  // batch = 0
+                     -3, -2, -4, 9, -8, 1, // batch = 1
+                   },
+                   {
+                     -3, -7, 4, -4, -6, 4, // unit = 0
+                     3, 5, 2, 3, -3, -8,   // unit = 1
+                     -3, 7, 4, 9, 0, -5,   // unit = 2
+                   },
+                   {-1, -5, -8},
+                   {
+                     0, 0, 32,   // batch = 0
+                     22, 11, 47, // batch = 1
+                   });
+}
+
+TEST(FullyConnectedTest, InvalidBiasType_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{3, 6};
+  std::vector<float> weights_data{
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
+  std::vector<int32_t> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{1, 3, 6};
+  std::vector<float> weights_data{
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{6, 3};
+  std::vector<float> weights_data{
+    -3, -7, 4,  // unit = 0
+    -4, -6, 4,  // unit = 1
+    3,  5,  2,  // unit = 2
+    3,  -3, -8, // unit = 3
+    -3, 7,  4,  // unit = 4
+    9,  0,  -5, // unit = 5
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
diff --git a/onert-micro/luci-interpreter/src/kernels/Gather.cpp b/onert-micro/luci-interpreter/src/kernels/Gather.cpp
new file mode 100644 (file)
index 0000000..d26b718
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+               const GatherParams &gparams)
+  : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+  if (params()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  }
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+                         indices()->element_type() == DataType::S64);
+
+  // refer tensorflow/lite/kernels/gather.cc
+
+  const Shape &params_shape = params()->shape();
+  const Shape &indices_shape = indices()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+  {
+    axis += params_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+  int batch_dims = _params.batch_dims;
+  // batch_dims should be in range: [-rank(indices), rank(indices)].
+  // Negative batch_dims is added with rank of positions.
+  if (batch_dims < 0)
+  {
+    batch_dims += indices_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+  LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+  LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+  for (int i = 0; i < batch_dims; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+  }
+
+  const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+  Shape output_shape(num_dimensions);
+  int output_index = 0;
+  for (int i = 0; i < axis; ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = indices_shape.dim(i);
+  }
+  for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+  switch (params()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Gather::evalFloat() const
+{
+  assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+  const auto params_data = getTensorData<float>(params());
+  auto output_data = getTensorData<float>(output());
+
+  tflite::GatherParams tparams;
+  tparams.axis = _params.axis;
+  tparams.batch_dims = _params.batch_dims;
+
+  if (indices()->element_type() == DataType::S32)
+  {
+    const auto indices_data = getTensorData<int32_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+  else
+  {
+    const auto indices_data = getTensorData<int64_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Greater.cpp b/onert-micro/luci-interpreter/src/kernels/Greater.cpp
new file mode 100644 (file)
index 0000000..9d77780
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Greater::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Greater::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Greater::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
+                                                  getTensorShape(y()), y_data,
+                                                  getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                   y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Greater::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+void Greater::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/GreaterEqual.cpp b/onert-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
new file mode 100644 (file)
index 0000000..8f37160
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
+  : Kernel({x, y}, {output})
+{
+}
+
+void GreaterEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void GreaterEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void GreaterEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
+                                                       getTensorShape(y()), y_data,
+                                                       getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                        y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void GreaterEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                 getTensorShape(y()), y_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+void GreaterEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/InstanceNorm.cpp b/onert-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
new file mode 100644 (file)
index 0000000..577dc64
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/InstanceNorm.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/common.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
+                           Tensor *output, const InstanceNormParams &params)
+  : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+{
+}
+
+void InstanceNorm::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
+                         gamma()->shape().dim(0) == 1);
+  LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
+                         beta()->shape().dim(0) == 1);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void InstanceNorm::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void InstanceNorm::evalFloat() const
+{
+  float activation_min, activation_max;
+  calculateActivationRange(params().activation, &activation_min, &activation_max);
+  auto input_shape = getTensorShape(input());
+  auto output_shape = getTensorShape(output());
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+  const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+  const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+  const float *input_data = getTensorData<float>(input());
+  const float *gamma_data = getTensorData<float>(gamma());
+  auto gamma_shape = getTensorShape(gamma());
+  bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
+  const float *beta_data = getTensorData<float>(beta());
+  auto beta_shape = getTensorShape(beta());
+  bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
+  float *output_data = getTensorData<float>(output());
+  for (int32_t batch = 0; batch < batches; batch++)
+  {
+    for (int32_t channel = 0; channel < channels; channel++)
+    {
+      double sum = 0.0f;
+      double square_sum = 0.0f;
+      int32_t size = heights * widths;
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+          sum += input_val;
+          square_sum += (input_val * input_val);
+        }
+      }
+      double mean = sum / size;
+      double var = square_sum / size - mean * mean;
+
+      double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+      double beta = single_beta ? beta_data[0] : beta_data[channel];
+      double a = gamma / (std::sqrt(var + params().epsilon));
+      double b = -mean * a + beta;
+
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_value =
+            input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+          double output_value = input_value * a + b;
+          output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
+            tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+                                                 activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/KernelBuilder.cpp b/onert-micro/luci-interpreter/src/kernels/KernelBuilder.cpp
new file mode 100644 (file)
index 0000000..fc9c09c
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelBuilder.h"
+#include "Builders.h"
+
+namespace luci_interpreter
+{
+
+KernelConfigureRegistry::KernelConfigureRegistry()
+{
+#define REGISTER_KERNEL(builtin_operator, name)                                          \
+  register_kernel_configure(circle::BuiltinOperator::BuiltinOperator_##builtin_operator, \
+                            configure_kernel_Circle##name);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+}
+
+void KernelConfigureRegistry::configure_kernel(const circle::Operator *cur_op,
+                                               circle::BuiltinOperator opcode,
+                                               BaseRuntimeGraph *runtime_graph)
+{
+  auto specific_configure_func = get_kernel_configure_func(opcode);
+  if (specific_configure_func == nullptr)
+    assert(false && "Unsupported operator");
+
+  specific_configure_func(cur_op, runtime_graph);
+}
+
+KernelExecuteRegistry::KernelExecuteRegistry()
+{
+#define REGISTER_KERNEL(builtin_operator, name)                                        \
+  register_kernel_execute(circle::BuiltinOperator::BuiltinOperator_##builtin_operator, \
+                          execute_kernel_Circle##name);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+}
+
+void KernelExecuteRegistry::execute_kernel(const circle::Operator *cur_op,
+                                           circle::BuiltinOperator opcode,
+                                           BaseRuntimeGraph *runtime_graph, bool is_inplace)
+{
+  auto specific_execute_func = get_kernel_execute_func(opcode);
+  if (specific_execute_func == nullptr)
+    assert(false && "Unsupported operator");
+
+  specific_execute_func(cur_op, runtime_graph, is_inplace);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/KernelBuilder.h b/onert-micro/luci-interpreter/src/kernels/KernelBuilder.h
new file mode 100644 (file)
index 0000000..c73cf05
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
+#define LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
+
+#include "core/RuntimeGraph.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <memory>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+namespace
+{
+#ifdef USE_STATIC_ALLOC
+using BaseRuntimeGraph = StaticRuntimeGraph;
+#else
+using BaseRuntimeGraph = RuntimeGraph;
+#endif
+} // namespace
+
+class KernelConfigureRegistry
+{
+public:
+  using KernelConfigureFunc = void(const circle::Operator *, BaseRuntimeGraph *);
+
+  KernelConfigureRegistry();
+
+  void configure_kernel(const circle::Operator *cur_op, circle::BuiltinOperator opcode,
+                        BaseRuntimeGraph *runtime_graph);
+
+private:
+  std::unordered_map<int32_t, KernelConfigureFunc *> _operator_configure;
+
+private:
+  KernelConfigureFunc *get_kernel_configure_func(circle::BuiltinOperator opcode) const
+  {
+    return _operator_configure.at(size_t(opcode));
+  }
+
+  void register_kernel_configure(circle::BuiltinOperator id, KernelConfigureFunc *func)
+  {
+    _operator_configure[size_t(id)] = func;
+  }
+};
+
+class KernelExecuteRegistry
+{
+public:
+  using KernelExecuteFunc = void(const circle::Operator *, BaseRuntimeGraph *, bool);
+
+  KernelExecuteRegistry();
+
+  void execute_kernel(const circle::Operator *cur_op, circle::BuiltinOperator opcode,
+                      BaseRuntimeGraph *runtime_graph, bool is_inplace);
+
+private:
+  std::unordered_map<int32_t, KernelExecuteFunc *> _operator_execute;
+
+private:
+  KernelExecuteFunc *get_kernel_execute_func(circle::BuiltinOperator opcode) const
+  {
+    return _operator_execute.at(size_t(opcode));
+  }
+
+  void register_kernel_execute(circle::BuiltinOperator id, KernelExecuteFunc *func)
+  {
+    _operator_execute[size_t(id)] = func;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
diff --git a/onert-micro/luci-interpreter/src/kernels/L2Normalize.cpp b/onert-micro/luci-interpreter/src/kernels/L2Normalize.cpp
new file mode 100644 (file)
index 0000000..97c9db8
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Normalize.h"
+#include "kernels/Utils.h"
+
+#include "PALL2Normalize.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
+  : KernelWithParams<L2NormParams>({input}, {output}, params)
+{
+}
+
+void L2Normalize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (output()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
+  }
+  LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void L2Normalize::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>(0);
+      break;
+    case DataType::U8:
+      eval<uint8_t>(input()->zero_point());
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> void L2Normalize::eval(int32_t zero_point) const
+{
+  tflite::L2NormalizationParams op_params{};
+  op_params.input_zero_point = zero_point;
+  luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+                                        getTensorData<T>(input()), getTensorShape(output()),
+                                        getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/L2Pool2D.cpp b/onert-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
new file mode 100644 (file)
index 0000000..e465c22
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALL2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void L2Pool2D::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int batches = input()->shape().dim(0);
+  int height = input()->shape().dim(1);
+  int width = input()->shape().dim(2);
+  int channels_out = input()->shape().dim(3);
+
+  // Matching GetWindowedOutputSize in TensorFlow.
+  auto padding = params().padding;
+  int out_width, out_height;
+  out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
+  out_height =
+    computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+  _padding_width =
+    computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+  _padding_height =
+    computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize({batches, out_height, out_width, channels_out});
+}
+
+void L2Pool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      float activation_min, activation_max;
+      calculateActivationRange(params().activation, &activation_min, &activation_max);
+      tflite::PoolParams op_params;
+      op_params.stride_height = params().stride_height;
+      op_params.stride_width = params().stride_width;
+      op_params.filter_height = params().filter_height;
+      op_params.filter_width = params().filter_width;
+      op_params.padding_values.height = _padding_height;
+      op_params.padding_values.width = _padding_width;
+      op_params.float_activation_min = activation_min;
+      op_params.float_activation_max = activation_max;
+      luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+                                   getTensorData<float>(input()), getTensorShape(output()),
+                                   getTensorData<float>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LeakyRelu.cpp b/onert-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
new file mode 100644 (file)
index 0000000..d13fef2
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+#include "PALLeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params)
+  : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+{
+}
+
+void LeakyRelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void LeakyRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void LeakyRelu::evalFloat() const
+{
+  tflite::LeakyReluParams op_params{};
+  op_params.alpha = params().alpha;
+  luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LeakyRelu::evalQuantized() const
+{
+  tflite::LeakyReluParams op_params{};
+  op_params.input_offset = input()->zero_point();
+  op_params.output_offset = output()->zero_point();
+  op_params.output_multiplier_alpha = _output_multiplier_alpha;
+  op_params.output_shift_alpha = _output_shift_alpha;
+  op_params.output_multiplier_identity = _output_multiplier_identity;
+  op_params.output_shift_identity = _output_shift_identity;
+
+  tflite::reference_ops::QuantizeLeakyRelu(
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+    getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Less.cpp b/onert-micro/luci-interpreter/src/kernels/Less.cpp
new file mode 100644 (file)
index 0000000..1d7bbd6
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Less::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Less::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Less::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+                                getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Less::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+                                                        getTensorShape(y()), y_data,
+                                                        getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+                                         getTensorShape(y()), y_data, getTensorShape(output()),
+                                         output_data);
+  }
+}
+
+void Less::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
+                                                          getTensorShape(y()), y_data,
+                                                          getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
+                                           getTensorShape(y()), y_data, getTensorShape(output()),
+                                           output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LessEqual.cpp b/onert-micro/luci-interpreter/src/kernels/LessEqual.cpp
new file mode 100644 (file)
index 0000000..d750893
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void LessEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void LessEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void LessEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
+                                                    getTensorShape(y()), y_data,
+                                                    getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                     y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void LessEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+void LessEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/onert-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
new file mode 100644 (file)
index 0000000..bf08db0
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+
+#include "kernels/Utils.h"
+
+#include "PALLocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LocalResponseNormalization::LocalResponseNormalization(
+  const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
+  : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+{
+}
+
+void LocalResponseNormalization::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void LocalResponseNormalization::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::LocalResponseNormalizationParams op_params;
+      op_params.range = params().radius;
+      op_params.bias = params().bias;
+      op_params.alpha = params().alpha;
+      op_params.beta = params().beta;
+      luci_interpreter_pal::LocalResponseNormalization(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogSoftmax.cpp b/onert-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
new file mode 100644 (file)
index 0000000..b467cb0
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
+
+#include "PALLogSoftmax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogSoftmax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
+
+    tflite::SoftmaxParams params{};
+
+    params.table = _table;
+    params.beta = 1.0;
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void LogSoftmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void LogSoftmax::evalFloat() const
+{
+  tflite::SoftmaxParams params{};
+  tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
+                                    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LogSoftmax::evalQuantized() const
+{
+  const auto input_shape = getTensorShape(input());
+  const auto output_shape = getTensorShape(output());
+  const auto input_scale = input()->scale();
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  const float beta = 1.0;
+
+  tflite::SoftmaxParams params{};
+
+  params.table = const_cast<float *>(_table);
+  params.zero_point = output()->zero_point();
+  params.scale = output()->scale();
+
+  luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
+  luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                   output_data);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalAnd.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
new file mode 100644 (file)
index 0000000..65e0b50
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalAnd::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalAnd::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalAnd();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+inline void LogicalAnd::evalLogicalAnd() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+                        getTensorShape(input2()), getTensorData<bool>(input2()),
+                        getTensorShape(output()), getTensorData<bool>(output()),
+                        [](bool x, bool y) { return x && y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalNot.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalNot.cpp
new file mode 100644 (file)
index 0000000..4ba4499
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogicalNot::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void LogicalNot::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalNot();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+inline void LogicalNot::evalLogicalNot() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  bool *output_data = getTensorData<bool>(output());
+  const bool *input_data = getTensorData<bool>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = !input_data[i];
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/LogicalOr.cpp b/onert-micro/luci-interpreter/src/kernels/LogicalOr.cpp
new file mode 100644 (file)
index 0000000..ecb8ee4
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+
+#include "kernels/Utils.h"
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalOr::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalOr::execute() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+                        getTensorShape(input2()), getTensorData<bool>(input2()),
+                        getTensorShape(output()), getTensorData<bool>(output()),
+                        [](bool x, bool y) { return x || y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Logistic.cpp b/onert-micro/luci-interpreter/src/kernels/Logistic.cpp
new file mode 100644 (file)
index 0000000..631c060
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/logistic.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+#ifndef DIS_FLOAT
+void evalFloat(const circle::Tensor *input, const circle::Tensor *output, bool is_inplace,
+               BaseRuntimeGraph *runtime_graph)
+{
+  const float *input_data = reinterpret_cast<const float *>(runtime_graph->getDataByTensor(input));
+  float *output_data = reinterpret_cast<float *>(runtime_graph->getDataByTensor(output));
+
+  if (is_inplace)
+  {
+    output_data = const_cast<float *>(input_data);
+  }
+
+  assert(input_data != nullptr);
+  assert(output_data != nullptr);
+
+  tflite::reference_ops::Logistic(kernels::getTensorShape(input), input_data,
+                                  kernels::getTensorShape(output), output_data);
+  if (is_inplace)
+  {
+    runtime_graph->makeInplaceOperation(input, output);
+  }
+}
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *output, bool is_inplace,
+                   BaseRuntimeGraph *runtime_graph)
+{
+  const int8_t *input_data =
+    reinterpret_cast<const int8_t *>(runtime_graph->getDataByTensor(input));
+  int8_t *output_data = reinterpret_cast<int8_t *>(runtime_graph->getDataByTensor(output));
+  if (is_inplace)
+    output_data = const_cast<int8_t *>(input_data);
+
+  tflite::reference_ops::Logistic(kernels::getTensorShape(input), input_data, Tensor::scale(input),
+                                  Tensor::zero_point(input), kernels::getTensorShape(output),
+                                  output_data, Tensor::scale(output), Tensor::zero_point(output));
+  if (is_inplace)
+  {
+    runtime_graph->makeInplaceOperation(input, output);
+  }
+}
+#endif // DIS_QUANT
+
+} // namespace
+
+void configure_kernel_CircleLogistic(const circle::Operator *cur_op,
+                                     BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+
+#ifndef DIS_QUANT
+  if (Tensor::element_type(input) == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::scale(output) == 1. / 256);
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleLogistic(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+                                   bool is_inplace)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalFloat(input, output, is_inplace, runtime_graph);
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      evalQuantized(input, output, is_inplace, runtime_graph);
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Logistic.test.cpp b/onert-micro/luci-interpreter/src/kernels/Logistic.test.cpp
new file mode 100644 (file)
index 0000000..951dd3d
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/Logistic.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<T>());
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale() * 2));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class LogisticTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
+
+TYPED_TEST(LogisticTest, Simple)
+{
+  Check<TypeParam>(
+    {89}, {89},
+    {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+     -8.6363636364,  -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+     -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+     -5.9090909091,  -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+     -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+     -3.1818181818,  -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+     -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+     -0.4545454545,  -0.2272727273, 0.0000000000,  0.2272727273,  0.4545454545,  0.6818181818,
+     0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,  1.8181818182,  2.0454545455,
+     2.2727272727,   2.5000000000,  2.7272727273,  2.9545454545,  3.1818181818,  3.4090909091,
+     3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,  4.5454545455,  4.7727272727,
+     5.0000000000,   5.2272727273,  5.4545454545,  5.6818181818,  5.9090909091,  6.1363636364,
+     6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,  7.2727272727,  7.5000000000,
+     7.7272727273,   7.9545454545,  8.1818181818,  8.4090909091,  8.6363636364,  8.8636363636,
+     9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,  10.0000000000},
+    {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+     0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+     0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+     0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+     0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+     0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+     0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+     0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+     0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+     0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+     0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+     0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+     0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+     0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+     0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
+}
+
+TEST(LogisticTest, IvalidInputOutputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape = {1};
+  std::vector<float> input_data{10};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LogisticTest, IvalidQuantParam_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape = {2};
+  std::vector<float> input_data{-10, 10};
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
diff --git a/onert-micro/luci-interpreter/src/kernels/MaxPool2D.cpp b/onert-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
new file mode 100644 (file)
index 0000000..865e4f8
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+#ifndef DIS_FLOAT
+
+void evalFloat(const circle::Tensor *input, const circle::Tensor *output,
+               const circle::Pool2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t input_width = Tensor::dim(input, 2);
+
+  const int32_t output_height = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_height, options->filter_height(), options->stride_h());
+  const int32_t output_width = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_width, options->filter_width(), options->stride_w());
+
+  const auto padding_height = kernels::computePadding(options->stride_h(), 1, input_height,
+                                                      options->filter_height(), output_height);
+  const auto padding_width = kernels::computePadding(options->stride_w(), 1, input_width,
+                                                     options->filter_width(), output_width);
+
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  float activation_min{};
+  float activation_max{};
+  kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+                                    &activation_min, &activation_max);
+  tflite::PoolParams params{};
+  params.padding_values.height = padding_height;
+  params.padding_values.width = padding_width;
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.filter_height = options->filter_height();
+  params.filter_width = options->filter_width();
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::MaxPool(
+    params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+    kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+}
+
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *output,
+                   const circle::Pool2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  // Compute padding
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t input_width = Tensor::dim(input, 2);
+
+  const int32_t output_height = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_height, options->filter_height(), options->stride_h());
+  const int32_t output_width = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_width, options->filter_width(), options->stride_w());
+
+  const auto padding_height = kernels::computePadding(options->stride_h(), 1, input_height,
+                                                      options->filter_height(), output_height);
+  const auto padding_width = kernels::computePadding(options->stride_w(), 1, input_width,
+                                                     options->filter_width(), output_width);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = padding_height;
+  params.padding_values.width = padding_width;
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.filter_height = options->filter_height();
+  params.filter_width = options->filter_width();
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  tflite::reference_ops::MaxPool(
+    params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
+    kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data));
+}
+
+void evalSInt16(const circle::Tensor *input, const circle::Tensor *output,
+                const circle::Pool2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+                                             output, &activation_min, &activation_max);
+
+  // Compute padding
+  const int32_t input_height = Tensor::dim(input, 1);
+  const int32_t input_width = Tensor::dim(input, 2);
+
+  const int32_t output_height = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_height, options->filter_height(), options->stride_h());
+  const int32_t output_width = kernels::computeOutputSize(
+    luci_padding(options->padding()), input_width, options->filter_width(), options->stride_w());
+
+  const auto padding_height = kernels::computePadding(options->stride_h(), 1, input_height,
+                                                      options->filter_height(), output_height);
+  const auto padding_width = kernels::computePadding(options->stride_w(), 1, input_width,
+                                                     options->filter_width(), output_width);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = padding_height;
+  params.padding_values.width = padding_width;
+  params.stride_height = options->stride_h();
+  params.stride_width = options->stride_w();
+  params.filter_height = options->filter_height();
+  params.filter_width = options->filter_width();
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  tflite::reference_integer_ops::MaxPool(
+    params, kernels::getTensorShape(input), kernels::getTensorData<int16_t>(input_data),
+    kernels::getTensorShape(output), kernels::getTensorData<int16_t>(output_data));
+}
+
+#endif // DIS_QUANT
+
+} // namespace
+
+void configure_kernel_CircleMaxPool2D(const circle::Operator *cur_op,
+                                      BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+  assert(Tensor::num_dims(input) == 4);
+
+#ifndef DIS_QUANT
+  if (Tensor::element_type(input) == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(Tensor::scale(output) - Tensor::scale(input)) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(Tensor::zero_point(output) == Tensor::zero_point(input));
+  }
+  else if (Tensor::element_type(input) == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(Tensor::scale(output) - Tensor::scale(input)) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(Tensor::zero_point(input) == 0 && Tensor::zero_point(output) == 0);
+  }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleMaxPool2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+                                    bool)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  const auto *options = cur_op->builtin_options_as_Pool2DOptions();
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalFloat(input, output, options, runtime_graph);
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::U8:
+      evalQuantized(input, output, options, runtime_graph);
+      break;
+    case DataType::S16:
+      evalSInt16(input, output, options, runtime_graph);
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/onert-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
new file mode 100644 (file)
index 0000000..d59c5f0
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/MaxPool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaxPool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaxPool2DTest, Float)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    1, 2, //
+    5, 6, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, Uint8)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 6.0};
+  std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
+  };
+  std::vector<float> ref_output_data{
+    1, 2, //
+    5, 6, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#ednif
diff --git a/onert-micro/luci-interpreter/src/kernels/Maximum.cpp b/onert-micro/luci-interpreter/src/kernels/Maximum.cpp
new file mode 100644 (file)
index 0000000..1a7ee4c
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Maximum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Maximum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMaximum<float>();
+      break;
+    case DataType::U8:
+      evalMaximum<uint8_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> inline void Maximum::evalMaximum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::max(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Mean.cpp b/onert-micro/luci-interpreter/src/kernels/Mean.cpp
new file mode 100644 (file)
index 0000000..4128aa6
--- /dev/null
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
+{
+  params->axis_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    params->axis[i] = static_cast<int16>(axes_data[i]);
+  }
+  for (int i = num_axes; i < 4; ++i)
+  {
+    params->axis[i] = 1;
+  }
+}
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+           Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+                                    params)
+{
+}
+
+void Mean::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  assert(num_axes <= 4);
+  // TODO: enable it only if kernel with dynamic shapes
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+  _need_temporaries = !(
+    _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+    ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
+  if (_need_temporaries)
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->resize(Shape(input_num_dims));
+    resolved_axes->resize(Shape(num_axes));
+    temp_sum->resize(output()->shape());
+  }
+  else
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->set_allocatable(false);
+    resolved_axes->set_allocatable(false);
+    temp_sum->set_allocatable(false);
+  }
+}
+
+void Mean::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Mean::evalFloat() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<float>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<float>(temp_sum));
+  }
+}
+
+void Mean::evalQuantized() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                input()->zero_point(), input()->scale(), getTensorShape(output()),
+                                getTensorData<uint8_t>(output()), output()->zero_point(),
+                                output()->scale());
+  }
+  else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
+  {
+    tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<int>(temp_sum));
+  }
+  else
+  {
+    tflite::reference_ops::QuantizedMeanOrSum<>(
+      getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+      getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+      getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+      _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+      getTensorData<int>(temp_sum),
+      /*compute_sum=*/false);
+  }
+}
+
+void Mean::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  const int num_axes = axes()->shape().num_elements();
+
+  constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+      ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+  {
+    const int32_t batches = input_shape.dim(0);
+    const int32_t input_height = input_shape.dim(1);
+    const int32_t input_width = input_shape.dim(2);
+    const int32_t depth = input_shape.dim(3);
+    assert(output_shape.num_dims() == 4);
+    assert(output_shape.dim(0) == batches);
+    assert(output_shape.dim(1) == 1);
+    assert(output_shape.dim(2) == 1);
+    assert(output_shape.dim(3) == depth);
+
+    const double real_multiplier =
+      static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+    int32_t output_multiplier{};
+    int output_shift{};
+    quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+    const int32_t num_elements_in_axes = input_height * input_width;
+
+    for (int32_t batch = 0; batch < batches; ++batch)
+    {
+      for (int32_t c = 0; c < depth; ++c)
+      {
+        int32_t acc = 0;
+        for (int32_t in_y = 0; in_y < input_height; ++in_y)
+        {
+          for (int32_t in_x = 0; in_x < input_width; ++in_x)
+          {
+            acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+          }
+        }
+        int32_t scaled_acc =
+          tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+        // Divide by the number of elements rounding to the nearest integer.
+        scaled_acc = scaled_acc > 0
+                       ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+                       : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+        scaled_acc = std::max(scaled_acc, output_min);
+        scaled_acc = std::min(scaled_acc, output_max);
+
+        output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+      }
+    }
+  }
+  else
+  {
+    assert(false && "Unsupported configuration.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Minimum.cpp b/onert-micro/luci-interpreter/src/kernels/Minimum.cpp
new file mode 100644 (file)
index 0000000..f74e6c0
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Minimum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Minimum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMinimum<float>();
+      break;
+    case DataType::U8:
+      evalMinimum<uint8_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> inline void Minimum::evalMinimum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::min(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/MirrorPad.cpp b/onert-micro/luci-interpreter/src/kernels/MirrorPad.cpp
new file mode 100644 (file)
index 0000000..d9e60b0
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+
+#include "kernels/Utils.h"
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+                     const MirrorPadParams &params)
+  : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
+{
+}
+
+void MirrorPad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    assert(false && "Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output);
+
+void MirrorPad::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+
+      MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output)
+{
+  auto const input_dims = input.shape().num_dims();
+  auto const input_data = input.data<T>();
+  auto const paddings_data = paddings.data<int32_t>();
+  auto const output_data = output.data<T>();
+
+  auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+  auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+  auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+  auto const input_d = input.shape().dim(input_dims - 1);
+
+  auto const input_h_offset = input_d * input_w;
+  auto const input_b_offset = input_h_offset * input_h;
+
+  auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+  auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+  auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+  auto const output_d = output.shape().dim(input_dims - 1);
+
+  auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+  auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+  auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+  auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+  auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+  auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+  auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+  auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+  const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+  const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+                                                                      auto b) {
+    return d + w * input_d + h * input_h_offset + b * input_b_offset;
+  };
+
+  const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+    bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+    return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+  };
+
+  const T *in_ptr = input_data;
+  T *out_ptr = output_data;
+
+  for (int32_t b = 0; b < output_b; ++b)
+  {
+    for (int32_t h = 0; h < output_h; ++h)
+    {
+      for (int32_t w = 0; w < output_w; ++w)
+      {
+        for (int32_t d = 0; d < output_d; ++d)
+        {
+          if (b < left_b_pad || b >= output_b - right_b_pad || //
+              h < left_h_pad || h >= output_h - right_h_pad || //
+              w < left_w_pad || w >= output_w - right_w_pad || //
+              d < left_d_pad || d >= output_d - right_d_pad)
+          {
+            if (mode == MirrorPadMode::REFLECT)
+            {
+              *out_ptr++ = input_data[offset_index(
+                positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+                positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+            }
+            else
+            {
+              *out_ptr++ = input_data[offset_index(
+                symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+                symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+            }
+          }
+          else
+          {
+            *out_ptr++ = *in_ptr++;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Mul.cpp b/onert-micro/luci-interpreter/src/kernels/Mul.cpp
new file mode 100644 (file)
index 0000000..bac9fe3
--- /dev/null
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include "PALMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
+  : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+{
+}
+
+void Mul::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
+  if (input1()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1)
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Mul::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Mul::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Mul::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                              getTensorShape(input2()), getTensorData<T>(input2()),
+                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Mul::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input1_scale * input2_scale / output_scale;
+
+  int32_t output_multiplier;
+  int output_shift;
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
+                                                                              int16_t input2_val) {
+    int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
+    output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
+    output = std::max(output, activation_min);
+    output = std::min(output, activation_max);
+    return static_cast<int16_t>(output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Neg.cpp b/onert-micro/luci-interpreter/src/kernels/Neg.cpp
new file mode 100644 (file)
index 0000000..961148a
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/Utils.h"
+
+#include "PALNeg.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Neg::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Neg::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Neg::evalFloat() const
+{
+  luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/NotEqual.cpp b/onert-micro/luci-interpreter/src/kernels/NotEqual.cpp
new file mode 100644 (file)
index 0000000..8c3983e
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void NotEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void NotEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void NotEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                    y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void NotEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                            getTensorShape(y()), y_data,
+                                                            getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                             getTensorShape(y()), y_data, getTensorShape(output()),
+                                             output_data);
+  }
+}
+
+void NotEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/OneHot.cpp b/onert-micro/luci-interpreter/src/kernels/OneHot.cpp
new file mode 100644 (file)
index 0000000..fa99895
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+                       const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+                       Tensor *output_tensor)
+{
+  // define input shape and correct axis
+  auto const &input_shape = indices_tensor->shape();
+  axis = axis == -1 ? input_shape.num_dims() : axis;
+
+  // TODO support other integer input types
+  auto const *indices = getTensorData<int32_t>(indices_tensor);
+  auto const on_value = getTensorData<T>(on_value_tensor)[0];
+  auto const off_value = getTensorData<T>(off_value_tensor)[0];
+  auto *output = getTensorData<T>(output_tensor);
+
+  // prefix_dim_size == # of elements before the axis
+  // depth == # of elements per axis
+  // suffix_dim_size == # of elements after the axis
+  auto prefix_dim_size = 1;
+  for (int32_t i = 0; i < axis; ++i)
+  {
+    prefix_dim_size *= input_shape.dim(i);
+  }
+  assert(prefix_dim_size > 0);
+  auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+  // View the indices as a matrix of size:
+  //     prefix_dim_size x suffix_dim_size
+  // View the output as a matrix of size:
+  //     prefix_dim_size x depth x suffix_dim_size
+  // Then the output is:
+  //     output(i, j, k) == (indices(i, k) == j) ? on : off
+  for (int32_t i = 0; i < prefix_dim_size; ++i)
+    for (int32_t j = 0; j < depth; ++j)
+      for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+        *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+               const Tensor *off_value, Tensor *output, const OneHotParams &params)
+  : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+  // Do nothing
+}
+
+void OneHot::configure()
+{
+  // check types
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+  // check shape dependent parameters
+  LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+  // define parameters that affect the output shape
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const &input_shape = indices()->shape();
+  auto const input_dims = input_shape.num_dims();
+  auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+  // define output shape
+  Shape output_shape(input_shape.num_dims() + 1);
+  {
+    for (int32_t d = 0; d < axis; ++d)
+      output_shape.dim(d) = input_shape.dim(d);
+
+    output_shape.dim(axis) = depth_value;
+
+    for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+      output_shape.dim(d) = input_shape.dim(d - 1);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  // reshape output
+  output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const axis = params().axis;
+
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case DataType::U8:
+      OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case DataType::S16:
+      OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    default:
+      // TODO Support other data types
+      assert(false && "Not supported, yet!");
+      break;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/PRelu.cpp b/onert-micro/luci-interpreter/src/kernels/PRelu.cpp
new file mode 100644 (file)
index 0000000..3d64215
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+PRelu::~PRelu()
+{
+  // Destructor declared to delete vector of alpha quantized data properly
+}
+
+void PRelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+  LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
+
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+    _alpha_multipliers.resize(1);
+    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+                       &_alpha_multipliers[0].shift);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    // Common check for correctness of quant params
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+    for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
+    }
+    // PRelu specific checks for CWQ
+    LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+    LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+                           alpha()->shape().dim(alpha()->quantized_dimension()));
+    LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+                           input()->shape().dim(input()->shape().num_dims() - 1));
+
+    // all dimension of alpha except last one should be size 1
+    for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+    }
+
+    std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+    _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
+}
+
+void PRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void PRelu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto alpha_data = getTensorData<float>(alpha());
+  const auto size = getTensorShape(input()).FlatSize();
+  auto output_data = getTensorData<float>(output());
+
+  auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+      getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+      PReluFunc);
+  }
+  else
+  {
+    for (auto i = decltype(size){0}; i < size; ++i)
+    {
+      if (input_data[i] >= 0)
+        output_data[i] = input_data[i];
+      else
+        output_data[i] = input_data[i] * alpha_data[i];
+    }
+  }
+}
+
+void PRelu::evalQuantized() const
+{
+  tflite::PreluParams op_params{};
+
+  op_params.input_offset = -input()->zero_point(); // Note the '-'.
+  op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_shift_1 = _output_shift_identity;
+  op_params.output_multiplier_1 = _output_multiplier_identity;
+  op_params.output_shift_2 = _alpha_multipliers[0].shift;
+  op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastPrelu4DSlow(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Prelu<uint8_t>(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
+                                       const ChannelQuantMultipliers &identity_mult,
+                                       const ChannelQuantMultipliers &alpha_mult)
+{
+  constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
+  constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t output_val =
+    input_val >= 0
+      ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+                                              identity_mult.multiplier, identity_mult.shift)
+      : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+                                              alpha_mult.multiplier, alpha_mult.shift);
+  const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+  return clamped_output;
+}
+
+void PRelu::evalQuantizedS16() const
+{
+  // Note that this kernel assumes alpha is CWQ
+  tflite::RuntimeShape input_shape = getTensorShape(input());
+  const int16_t *input_data = input()->data<int16_t>();
+  const int16_t *alpha_data = alpha()->data<int16_t>();
+  int16_t *output_data = output()->data<int16_t>();
+
+  const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+  const int last_dim = input()->shape().num_dims() - 1;
+
+  int32_t outer_dims_size = 1;
+  for (int i = 0; i < last_dim; ++i)
+    outer_dims_size *= input_shape.Dims(i);
+  int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+  for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+    for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+    {
+      const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+      size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+      offset += quant_channel;
+
+      output_data[offset] =
+        evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+    }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pack.cpp b/onert-micro/luci-interpreter/src/kernels/Pack.cpp
new file mode 100644 (file)
index 0000000..62228f1
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params)
+  : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Pack::configure()
+{
+  LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
+
+  if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
+      t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
+      t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
+  {
+    assert(false && "Unsupported type.");
+  }
+
+  for (uint32_t i = 1; i < _inputs.size(); ++i)
+  {
+    const Tensor *tensor = _inputs[i];
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+    for (int d = 0; d < t0->shape().num_dims(); ++d)
+    {
+      LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+    }
+  }
+
+  Shape output_shape(dimension_size);
+  int i = 0;
+  for (int index = 0; index < dimension_size; ++index)
+  {
+    if (index == axis)
+    {
+      output_shape.dim(index) = params().values_count;
+    }
+    else
+    {
+      output_shape.dim(index) = t0->shape().dim(i++);
+    }
+  }
+
+  if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
+      t0->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
+    LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
+    // Guarantee input/output quantization params match as we do not support
+    // packing quantized tensors.
+    for (int i = 0; i < params().values_count; i++)
+    {
+      LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
+      LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
+    }
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void Pack::execute() const
+{
+  switch (_inputs[0]->element_type())
+  {
+    case DataType::FLOAT32:
+      evalGeneric<float>();
+      break;
+    case DataType::U8:
+      evalGeneric<uint8_t>();
+      break;
+    case DataType::S8:
+      evalGeneric<int8_t>();
+      break;
+    case DataType::S16:
+      evalGeneric<int16_t>();
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>();
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> void Pack::evalGeneric() const
+{
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+
+  VectorOfTensors<T, true> inputs(_inputs);
+  tflite::PackParams params{};
+  params.axis = axis;
+  params.inputs_count = _inputs.size();
+  tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
+                                 getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pad.cpp b/onert-micro/luci-interpreter/src/kernels/Pad.cpp
new file mode 100644 (file)
index 0000000..6b45265
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
+  : Kernel({input, paddings}, {output})
+{
+}
+
+void Pad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    assert(false && "Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void Pad::execute() const
+{
+  const int num_dims = input()->shape().num_dims();
+
+  tflite::PadParams params{};
+  params.left_padding_count = num_dims;
+  params.right_padding_count = num_dims;
+
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = num_dims - 1; i >= 0; --i)
+  {
+    params.left_padding[i] = paddings_data[i * 2];
+    params.right_padding[i] = paddings_data[i * 2 + 1];
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      const float pad_value = 0.0f;
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<float>(output()));
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+      const auto pad_value = static_cast<uint8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<uint8_t>(output()));
+      break;
+    }
+    case DataType::S8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+      const auto pad_value = static_cast<int8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<int8_t>(output()));
+      break;
+    }
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/PadV2.cpp b/onert-micro/luci-interpreter/src/kernels/PadV2.cpp
new file mode 100644 (file)
index 0000000..c141e3d
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values,
+             Tensor *output)
+  : Kernel({input, paddings, constant_values}, {output})
+{
+}
+
+void PadV2::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    assert(false && "Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  assert(constant_values()->element_type() == output()->element_type());
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+  // Constant values elements number should be 1.
+  assert(constant_values()->shape().num_elements() == 1);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void PadV2::execute() const
+{
+  const int num_dims = input()->shape().num_dims();
+
+  tflite::PadParams params{};
+  params.left_padding_count = num_dims;
+  params.right_padding_count = num_dims;
+
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = num_dims - 1; i >= 0; --i)
+  {
+    params.left_padding[i] = paddings_data[i * 2];
+    params.right_padding[i] = paddings_data[i * 2 + 1];
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      const auto pad_value = getTensorData<float>(constant_values())[0];
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<float>(output()));
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+      const auto pad_value = getTensorData<uint8_t>(constant_values())[0];
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<uint8_t>(output()));
+      break;
+    }
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Pow.cpp b/onert-micro/luci-interpreter/src/kernels/Pow.cpp
new file mode 100644 (file)
index 0000000..6345bf3
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Pow::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Pow::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>();
+      break;
+    case DataType::S32:
+      eval<int32_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> void Pow::eval() const
+{
+  tflite::ArithmeticParams params{};
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                                              getTensorShape(input2()), getTensorData<T>(input2()),
+                                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Quantize.cpp b/onert-micro/luci-interpreter/src/kernels/Quantize.cpp
new file mode 100644 (file)
index 0000000..9f622d0
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+  int32_t multiplier;
+  int shift;
+
+  const double effective_output_scale = input->scale() / output->scale();
+  quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+  const auto input_shape = getTensorShape(input);
+  const auto output_shape = getTensorShape(output);
+  const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+  const auto input_data = getTensorData<input_dtype>(input);
+
+  switch (output->element_type())
+  {
+    case DataType::S8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int8_t>(output));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<uint8_t>(output));
+      break;
+    case DataType::S16:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int16_t>(output));
+      break;
+    default:
+      assert(false && "Unsupported quantized type, yet!");
+  }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+  if (input()->element_type() == DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8 ||
+                             output()->element_type() == DataType::S8 ||
+                             output()->element_type() == DataType::S16);
+      break;
+    }
+    case DataType::S16:
+    case DataType::S8:
+    case DataType::U8:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8 ||
+                             output()->element_type() == DataType::U8 ||
+                             output()->element_type() == DataType::S16);
+      if (output()->element_type() == DataType::S16)
+      {
+        LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+      }
+      break;
+    }
+    default:
+      assert(false && "Unsupported type");
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      tflite::QuantizationParams op_params;
+      op_params.zero_point = output()->zero_point();
+      op_params.scale = output()->scale();
+      const auto input_data = getTensorData<float>(input());
+
+      switch (output()->element_type())
+      {
+        case DataType::S8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()), getTensorData<int8_t>(output()));
+          break;
+        }
+        case DataType::U8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+          break;
+        }
+        case DataType::S16:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<int16_t>(output()));
+          break;
+        }
+        default:
+          assert(false && "Unsupported type.");
+      }
+      break;
+    }
+    case DataType::S16:
+    {
+      call_requantize<int16_t>(input(), output());
+      break;
+    }
+    case DataType::S8:
+    {
+      call_requantize<int8_t>(input(), output());
+      break;
+    }
+    case DataType::U8:
+    {
+      call_requantize<uint8_t>(input(), output());
+      break;
+    }
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Relu.cpp b/onert-micro/luci-interpreter/src/kernels/Relu.cpp
new file mode 100644 (file)
index 0000000..9cd1556
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Relu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Relu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
+
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void Relu::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  constexpr int32_t output_min = 0;
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t num_elements = input()->shape().num_elements();
+
+  for (int32_t i = 0; i < num_elements; ++i)
+  {
+    const int32_t input_val = input_data[i];
+    int32_t output_val =
+      tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+    output_val = std::max(output_val, output_min);
+    output_val = std::min(output_val, output_max);
+    output_data[i] = static_cast<int16_t>(output_val);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Relu6.cpp b/onert-micro/luci-interpreter/src/kernels/Relu6.cpp
new file mode 100644 (file)
index 0000000..c2072e3
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu6.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu6::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  if (input()->element_type() == DataType::U8)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Relu6::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Relu6::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu6::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max =
+    std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+             params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Reshape.cpp b/onert-micro/luci-interpreter/src/kernels/Reshape.cpp
new file mode 100644 (file)
index 0000000..ba47df4
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleReshape(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  // Do nothing
+}
+
+void execute_kernel_CircleReshape(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+                                  bool is_inplace)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  if (is_inplace)
+  {
+    runtime_graph->makeInplaceOperation(input, output);
+    return;
+  }
+
+  const auto input_data = (runtime_graph->getDataByTensor(input));
+  auto output_data = (runtime_graph->getDataByTensor(output));
+
+  assert(input_data != nullptr);
+  assert(output_data != nullptr);
+
+  const size_t element_size = getDataTypeSize(Tensor::element_type(input));
+  const int32_t num_elements = Tensor::num_elements(input);
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Reshape.test.cpp b/onert-micro/luci-interpreter/src/kernels/Reshape.test.cpp
new file mode 100644 (file)
index 0000000..745f6ca
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#if 0
+#include "kernels/Reshape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReshapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// TODO Test types other than FLOAT32.
+
+TEST_F(ReshapeTest, Regular)
+{
+  Shape input_shape{1, 2, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{2};
+  std::vector<int32_t> shape_data{3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+TEST_F(ReshapeTest, UnknownDimension)
+{
+  Shape input_shape{2, 1, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{3};
+  std::vector<int32_t> shape_data{2, -1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
diff --git a/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp b/onert-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
new file mode 100644 (file)
index 0000000..8d07ea7
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+
+#include "kernels/Utils.h"
+
+#include "PALResizeBilinear.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
+                               const ResizeBilinearParams &params)
+  : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeBilinear::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  if (params().half_pixel_centers && params().align_corners)
+    assert(false && "If half_pixel_centers is True, align_corners must be False.");
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void ResizeBilinear::execute() const
+{
+  tflite::ResizeBilinearParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/onert-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
new file mode 100644 (file)
index 0000000..57ca4d5
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
+                                             Tensor *output,
+                                             const ResizeNearestNeighborParams &params)
+  : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeNearestNeighbor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void ResizeNearestNeighbor::execute() const
+{
+  tflite::ResizeNearestNeighborParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/ReverseV2.cpp b/onert-micro/luci-interpreter/src/kernels/ReverseV2.cpp
new file mode 100644 (file)
index 0000000..76eadbd
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
+  : Kernel({input, axes}, {output})
+{
+}
+
+void ReverseV2::configure()
+{
+  assert(axes()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+  if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+      input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+      input()->element_type() != DataType::S64)
+  {
+    assert(false && "Unsupported input type.");
+  }
+  if (axes()->element_type() != DataType::S32)
+  {
+    assert(false && "Unsupported axes type.");
+  }
+  if (axes()->shape().num_elements() > 1)
+  {
+    assert(false && "Current implementation does not support more than 1 axis.");
+  }
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+  {
+    assert(false && "Invalid axes value");
+  }
+  assert(input()->element_type() == output()->element_type());
+
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void ReverseV2::execute() const
+{
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+                                            getTensorData<float>(input()), getTensorShape(output()),
+                                            getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Reverse<uint8_t>(
+        axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported output type");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Rsqrt.cpp b/onert-micro/luci-interpreter/src/kernels/Rsqrt.cpp
new file mode 100644 (file)
index 0000000..c45c3e4
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    assert(false && "Input/output tensor data type mismatch.");
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Rsqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = 1.f / std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SVDF.cpp b/onert-micro/luci-interpreter/src/kernels/SVDF.cpp
new file mode 100644 (file)
index 0000000..a0ff302
--- /dev/null
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+  switch (activation)
+  {
+    case FusedActFunc::RELU:
+      return kTfLiteActRelu;
+    case FusedActFunc::RELU6:
+      return kTfLiteActRelu6;
+    case FusedActFunc::RELU_N1_TO_1:
+      return kTfLiteActReluN1To1;
+    case FusedActFunc::TANH:
+      return kTfLiteActTanh;
+    case FusedActFunc::SIGN_BIT:
+      return kTfLiteActSignBit;
+    case FusedActFunc::NONE:
+      return kTfLiteActNone;
+    default:
+      assert(false && "Unsupported activation type");
+  }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+           const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+           Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+           Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+           const SVDFParams &params)
+  : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+                                 {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+                                  scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+                                 params)
+{
+  // Do nothing
+}
+
+void SVDF::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const Shape &weight_features_shape = weight_feature()->shape();
+  const Shape &weight_time_shape = weight_time()->shape();
+
+  // Validate Input Tensor:
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32 ||
+                         input()->element_type() == DataType::S8);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+  // Validate inputs and output types
+  if (input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == DataType::S16 ||
+                           weight_time()->element_type() == DataType::S8);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == DataType::S32);
+
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == DataType::S16 ||
+                           input_activation_state()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+
+    // Note: now tflite support only ReLU activation for integer SVDF
+    LUCI_INTERPRETER_CHECK(params().activation == FusedActFunc::RELU);
+  }
+  else if (weight_feature()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == DataType::FLOAT32);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  }
+  else if ((weight_feature()->element_type() == DataType::U8 ||
+            weight_feature()->element_type() == DataType::S8) &&
+           input()->element_type() == DataType::FLOAT32)
+  {
+    // TODO:: support hybrid SVDF op
+    assert(false && "Hybrid type is not currently supported");
+  }
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+
+  // Check all the parameters of tensor match within themselves and match the
+  // input configuration.
+  const int rank = params().svdf_rank;
+  const int batch_size = input_shape.dim(0);
+  const int num_filters = weight_features_shape.dim(0);
+  LUCI_INTERPRETER_CHECK(rank != 0);
+  LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+  const int num_units = num_filters / rank;
+  const int memory_size = weight_time_shape.dim(1);
+
+  // Validate Weight_Feature Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+  // Validate Weight_Time Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+  // Validate Bias
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+  // Validate Input Activation State
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+  // Resize scratchpad_state to input_activation_state
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+  // TODO: enable it only if kernel with dynamic shapes
+  // Resize output tensor
+  output()->resize({batch_size, num_units});
+
+  luci_interpreter_pal::SetupScratchpadTensor(
+    input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+    getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+    getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+  switch (weight_feature()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S8:
+    {
+      if (input()->element_type() == DataType::S8)
+        evalInteger();
+      else
+        // TODO:: support hybrid SVDF op
+        assert(false && "Hybrid type is not currently supported");
+      break;
+    }
+    default:
+      assert(false && "Unsupported type");
+  }
+}
+
+void SVDF::evalInteger() const
+{
+  const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+                                                     input_activation_state()->scale());
+  const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+                                                     weight_time()->scale() / output()->scale());
+
+  int32_t effective_scale_1_a;
+  int effective_scale_1_b;
+  int32_t effective_scale_2_a;
+  int effective_scale_2_b;
+
+  tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+  tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad = getOutputTensors()[2];
+  auto output_temp = getOutputTensors()[3];
+
+  int32_t input_zp = input()->zero_point();
+  int32_t output_zp = output()->zero_point();
+  luci_interpreter_pal::IntegerSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+    getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+    getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+    getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+    effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad_1 = getOutputTensors()[2];
+
+  luci_interpreter_pal::FloatSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+    getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+    getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Shape.cpp b/onert-micro/luci-interpreter/src/kernels/Shape.cpp
new file mode 100644 (file)
index 0000000..87c12d2
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params)
+  : KernelWithParams<ShapeParams>({input}, {output}, params)
+{
+}
+
+void ShapeKernel::configure()
+{
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
+                         output()->element_type() == DataType::S64);
+  const auto input_shape = input()->shape();
+
+  Shape output_shape(1);
+  output_shape.dim(0) = input_shape.num_dims();
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void ShapeKernel::execute() const
+{
+  switch (params().out_type)
+  {
+    case DataType::S32:
+      evalInt<int32_t>();
+      break;
+    case DataType::S64:
+      evalInt<int64_t>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> void ShapeKernel::evalInt() const
+{
+  const auto input_shape = input()->shape();
+
+  auto output_data = getTensorData<T>(output());
+
+  for (int i = 0; i < input_shape.num_dims(); ++i)
+  {
+    output_data[i] = input_shape.dim(i);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Slice.cpp b/onert-micro/luci-interpreter/src/kernels/Slice.cpp
new file mode 100644 (file)
index 0000000..cb41e26
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "Utils.h"
+#include "PALSlice.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+const int max_dim = 4;
+
+Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
+  : Kernel({input, begin, size}, {output})
+{
+}
+
+template <typename T>
+Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
+{
+  Shape output_shape = Shape(input->shape().num_dims());
+  for (int idx = 0; idx < input->shape().num_dims(); idx++)
+  {
+    T size_value = getTensorData<T>(size)[idx];
+    if (size_value < 0)
+    {
+      if (size_value != -1)
+      {
+        assert(false && "Invalid size.");
+      }
+      size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
+    }
+    else
+    {
+      if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
+      {
+        assert(false && "Invalid begin and size.");
+      }
+    }
+    output_shape.dim(idx) = static_cast<int>(size_value);
+  }
+  return output_shape;
+}
+
+template <typename T>
+void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
+                            std::vector<int> *begins, std::vector<int> *sizes)
+{
+  for (int idx = dimensions - 1; idx >= 0; --idx)
+  {
+    begins->push_back(getTensorData<T>(begin)[idx]);
+    sizes->push_back(getTensorData<T>(size)[idx]);
+  }
+}
+
+void Slice::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
+  assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
+  assert(begin()->shape().num_dims() == 1);
+  assert(size()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() <= max_dim);
+  // TODO: enable it only if kernel with dynamic shapes
+  if (begin()->element_type() == DataType::S32)
+  {
+    output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
+  }
+  else if (begin()->element_type() == DataType::S64)
+  {
+    output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
+  }
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+}
+
+void Slice::execute() const
+{
+  std::vector<int> begins;
+  begins.reserve(max_dim);
+  std::vector<int> sizes;
+  sizes.reserve(max_dim);
+  if (begin()->element_type() == DataType::S32)
+  {
+    getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+  }
+  else if (begin()->element_type() == DataType::S64)
+  {
+    getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+  }
+  else
+  {
+    assert(false && "Unsupported begin type.");
+  }
+  for (int i = input()->shape().num_dims(); i < max_dim; ++i)
+  {
+    begins.push_back(0);
+    sizes.push_back(1);
+  }
+
+  assert(begins.size() == 4);
+  assert(sizes.size() == 4);
+  tflite::SliceParams op_params{};
+  op_params.begin_count = 4;
+  op_params.size_count = 4;
+  for (int i = 0; i < 4; i++)
+  {
+    op_params.begin[i] = begins[3 - i];
+    op_params.size[i] = sizes[3 - i];
+  }
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                  getTensorData<uint8_t>(output()));
+      break;
+    case DataType::S8:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<int8_t>(input()), getTensorShape(output()),
+                                  getTensorData<int8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported input type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Softmax.cpp b/onert-micro/luci-interpreter/src/kernels/Softmax.cpp
new file mode 100644 (file)
index 0000000..651040b
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include "PALSoftmax.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+#ifndef DIS_FLOAT
+void evalFloat(const circle::Tensor *input, const circle::Tensor *output,
+               const circle::SoftmaxOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  tflite::SoftmaxParams op_params{};
+  op_params.beta = options->beta();
+
+  tflite::reference_ops::Softmax(
+    op_params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+    kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+}
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+template <typename T>
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *output,
+                   const circle::SoftmaxOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+  // TODO: Enable it
+  assert(false && "Not impl yet");
+
+  const auto *input_data = runtime_graph->getDataByTensor(input);
+  auto *output_data = runtime_graph->getDataByTensor(output);
+
+  tflite::SoftmaxParams op_params{};
+
+  luci_interpreter_pal::InitializeParams(&op_params, Tensor::scale(input), options->beta());
+  luci_interpreter_pal::Softmax(
+    op_params, kernels::getTensorShape(input), kernels::getTensorData<T>(input_data),
+    kernels::getTensorShape(output), kernels::getTensorData<T>(output_data));
+}
+#endif
+
+} // namespace
+
+void configure_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) >= 1);
+
+#ifndef DIS_QUANT
+  if (Tensor::element_type(input) == DataType::U8 || Tensor::element_type(input) == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::S8 ||
+                           Tensor::zero_point(output) == 0);
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::U8 ||
+                           Tensor::zero_point(output) == std::numeric_limits<int8_t>::min());
+  }
+#endif
+}
+
+void execute_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+                                  bool)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  const auto output_index = cur_op->outputs()->operator[](0);
+
+  assert(input_index != -1);
+  assert(output_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+  auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  const auto *options = cur_op->builtin_options_as_SoftmaxOptions();
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalFloat(input, output, options, runtime_graph);
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      evalQuantized<int8_t>(input, output, options, runtime_graph);
+      break;
+    case DataType::U8:
+      evalQuantized<uint8_t>(input, output, options, runtime_graph);
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Softmax.test.cpp b/onert-micro/luci-interpreter/src/kernels/Softmax.test.cpp
new file mode 100644 (file)
index 0000000..f026e89
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/Softmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+                          std::max<float>(std::max<float>(input_data), 0.f));
+  std::pair<float, int32_t> output_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+                          std::max<float>(std::max<float>(output_data), 0.f));
+  Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+                                                             input_quant_param.second, input_data,
+                                                             memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class SoftmaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
+
+TYPED_TEST(SoftmaxTest, Simple)
+{
+  Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
+                   {
+                     5, -9, 8,  //
+                     -7, 2, -4, //
+                     1, -2, 9,  //
+                     3, -6, -1, //
+                   },
+                   {
+                     0.38514, 0.09497, 0.51989, //
+                     0.20792, 0.51141, 0.28067, //
+                     0.25212, 0.18678, 0.56110, //
+                     0.48149, 0.19576, 0.32275, //
+                   });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
diff --git a/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/onert-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
new file mode 100644 (file)
index 0000000..0a5b447
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include "PALSpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+                               const Tensor *paddings, Tensor *output)
+  : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *paddings_data = paddings()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    int final_dim_size =
+      (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+    LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+    output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+    output_batch_size = output_batch_size * block_shape_data[i];
+  }
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+  switch (input()->element_type())
+  {
+    tflite::SpaceToBatchParams op_params;
+    case DataType::FLOAT32:
+      op_params.output_offset = 0;
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<float>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      op_params.output_offset = output()->zero_point();
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp b/onert-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
new file mode 100644 (file)
index 0000000..06cc5fa
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepth.h"
+#include "Utils.h"
+#include "PALSpaceToDepth.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
+  : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+{
+}
+
+void SpaceToDepth::configure()
+{
+  assert(input()->shape().num_dims() == 4);
+  assert(output()->element_type() == DataType::FLOAT32 ||
+         output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
+         output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
+  assert(input()->element_type() == output()->element_type());
+
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  int32_t output_height = input_height / block_size;
+  int32_t output_width = input_width / block_size;
+
+  assert(input_height == output_height * block_size);
+  assert(input_width == output_width * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(output_shape);
+}
+
+void SpaceToDepth::execute() const
+{
+  tflite::SpaceToDepthParams op_params{};
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Split.cpp b/onert-micro/luci-interpreter/src/kernels/Split.cpp
new file mode 100644 (file)
index 0000000..87ec028
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Split.h"
+
+#include "Utils.h"
+
+#include "PALSplit.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
+  : Kernel({axis, input}, std::move(outputs))
+{
+}
+
+void Split::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  const int32_t input_size = input()->shape().dim(_axis_value);
+  assert(input_size % _outputs.size() == 0);
+  const int32_t slice_size = input_size / _outputs.size();
+  // TODO: enable it only if kernel with dynamic shapes
+  Shape output_shape = input()->shape();
+  output_shape.dim(_axis_value) = slice_size;
+  for (Tensor *output : _outputs)
+  {
+    output->resize(output_shape);
+  }
+}
+
+void Split::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                    \
+  {                                                                                              \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                        \
+    luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SplitV.cpp b/onert-micro/luci-interpreter/src/kernels/SplitV.cpp
new file mode 100644 (file)
index 0000000..0e3a541
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitV.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+               std::vector<Tensor *> outputs)
+  : Kernel({input, size_splits, axis}, std::move(outputs))
+{
+}
+
+void SplitV::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  auto num_split = static_cast<int32_t>(_outputs.size());
+  auto sizes_data = getTensorData<int32_t>(size_splits());
+
+  assert(size_splits()->shape().num_dims() == 1);
+
+  int32_t sum = 0;
+  const auto num_dims_size_spits = size_splits()->shape().dim(0);
+  int32_t count_neg_dim = 0;
+
+  for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
+  {
+    if (sizes_data[i] != -1)
+    {
+      sum += sizes_data[i];
+    }
+    else
+    {
+      count_neg_dim++;
+    }
+  }
+  assert(count_neg_dim < 2);
+  assert(size_splits()->shape().num_elements() == num_split);
+
+  // TODO: enable it only if kernel with dynamic shapes
+  auto output_shape = input()->shape();
+  for (int32_t i = 0; i < num_split; ++i)
+  {
+    if (sizes_data[i] == -1)
+    {
+      output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
+    }
+    else
+    {
+      output_shape.dim(_axis_value) = sizes_data[i];
+    }
+    _outputs[i]->resize(output_shape);
+  }
+}
+
+void SplitV::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                     \
+  {                                                                                               \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
+    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                 all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    case DataType::S16:
+      TF_LITE_SPLIT(int16_t);
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Sqrt.cpp b/onert-micro/luci-interpreter/src/kernels/Sqrt.cpp
new file mode 100644 (file)
index 0000000..eed50df
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    assert(false && "Input/output tensor data type mismatch.");
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Sqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Square.cpp b/onert-micro/luci-interpreter/src/kernels/Square.cpp
new file mode 100644 (file)
index 0000000..6386b91
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Square::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    assert(false && "Input/output tensor data type mismatch.");
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Square::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Square::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = (*i) * (*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/SquaredDifference.cpp b/onert-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
new file mode 100644 (file)
index 0000000..27f395a
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalSquaredDifference<float>();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+                          const T difference = x - y;
+                          return difference * difference;
+                        });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Squeeze.cpp b/onert-micro/luci-interpreter/src/kernels/Squeeze.cpp
new file mode 100644 (file)
index 0000000..9736dce
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
+  : KernelWithParams<SqueezeParams>({input}, {output}, params)
+{
+}
+
+void Squeeze::configure()
+{
+  int input_num_dims = input()->shape().num_dims();
+  int num_squeeze_dims = params().squeeze_dims.size();
+  assert(input_num_dims <= 8);
+  bool should_squeeze[8] = {false};
+  int num_squeezed_dims = 0;
+  if (num_squeeze_dims == 0)
+  {
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      if (input()->shape().dim(idx) == 1)
+      {
+        should_squeeze[idx] = true;
+        ++num_squeezed_dims;
+      }
+    }
+  }
+  else
+  {
+    for (int idx = 0; idx < num_squeeze_dims; ++idx)
+    {
+      int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
+                                                   : params().squeeze_dims[idx];
+      assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
+      if (!should_squeeze[current])
+        ++num_squeezed_dims;
+      should_squeeze[current] = true;
+    }
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  Shape output_shape(input_num_dims - num_squeezed_dims);
+  for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
+  {
+    if (!should_squeeze[in_idx])
+    {
+      output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
+    }
+  }
+  output()->resize(output_shape);
+}
+
+void Squeeze::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+  std::memcpy(output_data, input_data,
+              getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/StridedSlice.cpp b/onert-micro/luci-interpreter/src/kernels/StridedSlice.cpp
new file mode 100644 (file)
index 0000000..654fd3c
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
+                           const Tensor *strides, Tensor *output, const StridedSliceParams &params)
+  : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+{
+}
+
+void StridedSlice::configure()
+{
+  assert(begin()->shape().num_dims() == 1);
+  assert(end()->shape().num_dims() == 1);
+  assert(strides()->shape().num_dims() == 1);
+  assert(input()->element_type() == output()->element_type());
+  assert(begin()->element_type() == DataType::S32);
+  assert(end()->element_type() == DataType::S32);
+  assert(strides()->element_type() == DataType::S32);
+  assert(input()->shape().num_dims() <= 4);
+  if (params().ellipsis_mask != 0)
+  {
+    assert(false && "ellipsis_mask is not implemented yet.");
+  }
+  if (params().new_axis_mask != 0)
+  {
+    assert(false && "new_axis_mask is not implemented yet.");
+  }
+  if (input()->element_type() == DataType::U8)
+  {
+    assert(input()->scale() == output()->scale());
+    assert(input()->zero_point() == output()->zero_point());
+  }
+  tflite::StridedSliceParams op_params{};
+  op_params.start_indices_count = input()->shape().num_dims();
+  op_params.stop_indices_count = input()->shape().num_dims();
+  op_params.strides_count = input()->shape().num_dims();
+
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+  }
+  op_params.begin_mask = params().begin_mask;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = params().end_mask;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = params().shrink_axis_mask;
+  std::vector<int32_t> output_shape_vector;
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    int idx = input()->shape().num_dims() - i - 1;
+    int32_t stride = getTensorData<int32_t>(strides())[idx];
+    assert(stride != 0);
+    int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
+    int32_t end =
+      ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+
+    const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
+    if (shrink_axis)
+    {
+      end = begin + 1;
+    }
+
+    int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+    dim_shape = dim_shape < 0 ? 0 : dim_shape;
+    if (!shrink_axis)
+    {
+      output_shape_vector.push_back(dim_shape);
+    }
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  Shape output_shape = Shape(output_shape_vector.size());
+  for (size_t i = 0; i < output_shape_vector.size(); i++)
+  {
+    output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1];
+  }
+  output()->resize(output_shape);
+}
+
+void StridedSlice::execute() const
+{
+  tflite::StridedSliceParams op_params{};
+  op_params.start_indices_count = input()->shape().num_dims();
+  op_params.stop_indices_count = input()->shape().num_dims();
+  op_params.strides_count = input()->shape().num_dims();
+
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+  }
+  op_params.begin_mask = params().begin_mask;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = params().end_mask;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = params().shrink_axis_mask;
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<float>(input()), getTensorShape(output()),
+                                          getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                          getTensorData<uint8_t>(output()));
+      break;
+    case DataType::S32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<int32_t>(input()), getTensorShape(output()),
+                                          getTensorData<int32_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Sub.cpp b/onert-micro/luci-interpreter/src/kernels/Sub.cpp
new file mode 100644 (file)
index 0000000..7b02c1e
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/Utils.h"
+
+#include "PALSub.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
+  : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+{
+}
+
+void Sub::configure()
+{
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Sub::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Sub::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Sub::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Sub::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Tanh.cpp b/onert-micro/luci-interpreter/src/kernels/Tanh.cpp
new file mode 100644 (file)
index 0000000..069320e
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/tanh.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    populateLookupTable();
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void Tanh::evalFloat() const
+{
+  tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = getTableValue(input_data[i]);
+  }
+}
+
+void Tanh::populateLookupTable()
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+  const auto output_scale = static_cast<double>(output()->scale());
+  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed = std::tanh(dequantized);
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+                  static_cast<uint8_t>(val));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/TestUtils.cpp b/onert-micro/luci-interpreter/src/kernels/TestUtils.cpp
new file mode 100644 (file)
index 0000000..78caa37
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+using ::testing::FloatNear;
+using ::testing::Matcher;
+
+Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); }
+
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
+{
+  return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
+}
+
+std::vector<float> dequantizeTensorData(const Tensor &tensor)
+{
+  if (tensor.element_type() == DataType::U8)
+  {
+    std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
+  if (tensor.element_type() == DataType::S8)
+  {
+    std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
+  else if (tensor.element_type() == DataType::S16)
+  {
+    // S16 quantization is symmetric, so zero point should be zero.
+    for (auto zp : tensor.zero_points())
+    {
+      (void)zp;
+      assert(zp == 0);
+    }
+
+    std::vector<int16_t> data = extractTensorData<int16_t>(tensor);
+    if (tensor.scales().size() == 1)
+    {
+      return dequantize(data.data(), data.size(), tensor.scale(), 0);
+    }
+
+    // quantize_dimension breaks shape into two parts:
+    // inner dimensions that contains continuous data with one quantization type
+    // outer dimensions that contains other dimensions
+    const Shape shape = tensor.shape();
+    const int32_t quantized_dimension = tensor.quantized_dimension();
+    assert(quantized_dimension < shape.num_dims());
+    size_t outer_dims_size = 1;
+    int32_t quant_dim_size = shape.dim(quantized_dimension);
+    size_t inner_dims_size = 1;
+    assert(quant_dim_size == tensor.scales().size());
+
+    for (int i = 0; i < quantized_dimension; ++i)
+      outer_dims_size *= shape.dim(i);
+    for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+      inner_dims_size *= shape.dim(i);
+
+    assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+    std::vector<float> dequantized_data;
+    dequantized_data.reserve(shape.num_elements());
+    for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+      for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+      {
+        float scale = tensor.scales()[channel];
+        size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+        std::vector<float> part_dequantized_data =
+          dequantize(data.data() + offset, inner_dims_size, scale, 0);
+        dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
+                                part_dequantized_data.end());
+      }
+    return dequantized_data;
+  }
+  else
+  {
+    assert(false && "Unsupported type.");
+  }
+}
+
+Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
+{
+  std::vector<Matcher<float>> matchers;
+  matchers.reserve(values.size());
+  for (const float v : values)
+  {
+    matchers.emplace_back(FloatNear(v, max_abs_error));
+  }
+  return ElementsAreArray(matchers);
+}
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor)
+{
+  std::vector<int32_t> result;
+  int dims = tensor.shape().num_dims();
+  for (int i = 0; i < dims; i++)
+  {
+    result.push_back(tensor.shape().dim(i));
+  }
+  return result;
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Transpose.cpp b/onert-micro/luci-interpreter/src/kernels/Transpose.cpp
new file mode 100644 (file)
index 0000000..c7396ae
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
+  : Kernel({input, perm}, {output})
+{
+}
+
+void Transpose::configure()
+{
+  // Transpose op only supports 1D-4D input arrays.
+  int dims = input()->shape().num_dims();
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
+
+  assert(input()->shape().num_dims() <= 4);
+  assert(input()->element_type() == output()->element_type());
+
+  assert(perm()->shape().num_dims() == 1);
+  assert(perm()->shape().dim(0) == dims);
+
+  Shape output_shape(dims);
+  for (int i = 0; i < dims; i++)
+  {
+    assert(perm_data[i] < dims && perm_data[i] >= 0);
+    output_shape.dim(i) = input()->shape().dim(perm_data[i]);
+  }
+  // TODO: enable it only if kernel with dynamic shapes
+
+  output()->resize(output_shape);
+}
+
+void Transpose::execute() const
+{
+  tflite::TransposeParams params{};
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
+  const int32_t size = perm()->shape().dim(0);
+  params.perm_count = size;
+  for (int i = 0; i < size; i++)
+    params.perm[i] = perm_data[i];
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, getTensorShape(input()),
+                                       getTensorData<float>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Transpose(params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<uint8_t>(output()));
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/TransposeConv.cpp b/onert-micro/luci-interpreter/src/kernels/TransposeConv.cpp
new file mode 100644 (file)
index 0000000..f8483ea
--- /dev/null
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                             const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                             const TransposeConvParams &params)
+  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+                                          {output, scratch_tensor}, params)
+{
+}
+
+TransposeConv::~TransposeConv()
+{
+  // Define destructor here, to delete vector of qunatized multipliers properly
+}
+
+void TransposeConv::configure()
+{
+  assert(output_shape()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() == 4);
+  assert(filter()->shape().num_dims() == 4);
+  assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
+         input()->element_type() == DataType::S16);
+  assert(input()->element_type() == output()->element_type());
+  assert(input()->shape().dim(3) == filter()->shape().dim(3));
+
+  const int num_dims = output_shape()->shape().dim(0);
+  Shape out_shape(num_dims);
+  const auto *shape_data = getTensorData<int32_t>(output_shape());
+  for (int i = 0; i < num_dims; i++)
+    out_shape.dim(i) = shape_data[i];
+  // TODO: enable it only if kernel with dynamic shapes
+  output()->resize(out_shape);
+
+  const int32_t filter_height = filter()->shape().dim(1);
+  const int32_t filter_width = filter()->shape().dim(2);
+  const int32_t output_height = out_shape.dim(1);
+  const int32_t output_width = out_shape.dim(2);
+
+  const int32_t unused_output_height =
+    computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+  const int32_t unused_output_width =
+    computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+
+  _padding_height =
+    computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+  _padding_width =
+    computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->resize(output()->shape());
+    const std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+    _quant_multipliers = quantizeMultipliers(real_multipliers);
+  }
+  else
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->set_allocatable(false);
+  }
+}
+
+void TransposeConv::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(0)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+void TransposeConv::evalFloat() const
+{
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<float>(input()),   //
+                                       getTensorShape(filter()), getTensorData<float>(filter()), //
+                                       getTensorShape(bias()), getTensorData<float>(bias()),     //
+                                       getTensorShape(output()), getTensorData<float>(output()), //
+                                       tflite::RuntimeShape(), nullptr);
+}
+
+void TransposeConv::evalQuantized() const
+{
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  // The kernel expects input and filter zero points to be negated.
+  op_params.input_offset = -input()->zero_point();    // Note the '-'.
+  op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_multiplier = _quant_multipliers[0].multiplier;
+  op_params.output_shift = _quant_multipliers[0].shift;
+  op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
+  op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
+
+  auto scratch_tensor = getOutputTensors()[1];
+
+  tflite::reference_ops::TransposeConv(
+    op_params,                                                  //
+    getTensorShape(input()), getTensorData<uint8_t>(input()),   //
+    getTensorShape(filter()), getTensorData<uint8_t>(filter()), //
+    getTensorShape(bias()), getTensorData<int32_t>(bias()),     //
+    getTensorShape(output()), getTensorData<uint8_t>(output()), //
+    tflite::RuntimeShape(), nullptr,                            //
+    getTensorData<int32_t>(scratch_tensor));
+}
+
+void TransposeConv::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const uint8_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int32_t>(input_val - input()->zero_point()) *
+                    static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc += output()->zero_point();
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void TransposeConv::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.cpp
new file mode 100644 (file)
index 0000000..57cf9f2
--- /dev/null
@@ -0,0 +1,441 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALUnidirectionalSequenceLSTM.h"
+#include "PALApplyActivationToVector.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+#ifndef DIS_QUANT
+
+bool checkedLog2(const float x, int *log2_result)
+{
+  // Using TfLiteRound instead of std::round and std::log instead of
+  // std::log2 to work around these functions being missing in a toolchain
+  // used in some TensorFlow tests as of May 2018.
+  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
+  const float x_log2_rounded = std::round(x_log2);
+  const float x_log2_fracpart = x_log2 - x_log2_rounded;
+
+  *log2_result = static_cast<int>(x_log2_rounded);
+  return std::abs(x_log2_fracpart) < 1e-3f;
+}
+
+// Create parameters for element wise multiplication that happens in a) cell
+// state update ; b) hidden state update
+// Note that all the output of gates are symmetrically quantized so only scales
+// are required for input. However, during the hidden state update phase, the
+// output is the updated hidden state, which is asymmetrically quantized. Thus
+// output may require zero point
+lstm::ArithmeticParams createInterGateParams(const float input1_scale, const float input2_scale,
+                                             const float output_scale, const DataType output_type,
+                                             const int output_zp)
+{
+  lstm::ArithmeticParams op_params;
+  if (output_type == DataType::S16)
+  {
+    op_params.quantized_activation_min = std::numeric_limits<int16_t>::min();
+    op_params.quantized_activation_max = std::numeric_limits<int16_t>::max();
+  }
+  else if (output_type == DataType::S8)
+  {
+    op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
+    op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
+  }
+
+  op_params.input1_offset = 0; // symmetric
+  op_params.input2_offset = 0; // symmetric
+  op_params.output_offset = output_zp;
+
+  const double input_product_scale =
+    static_cast<double>(input1_scale) * static_cast<double>(input2_scale);
+  double effective_scale = input_product_scale / static_cast<double>(output_scale);
+  auto output_shift = static_cast<int>(op_params.output_shift);
+  kernels::quantizeMultiplier(effective_scale, &op_params.output_multiplier, &output_shift);
+  op_params.output_shift = output_shift;
+  return op_params;
+}
+
+void createGateParams(const circle::Tensor *input, const circle::Tensor *input_weight,
+                      const circle::Tensor *input_bias, const circle::Tensor *hidden_state,
+                      const circle::Tensor *hidden_state_weight,
+                      const float nonlinear_activation_input_scale, const DataType cell_type,
+                      lstm::GateParameters *gate_params)
+{
+  // Input CalculateOpDataFullyConnected
+  {
+    lstm::FullyConnectedParams input_gate_params;
+    double real_multiplier = 0.0;
+    int output_shift;
+    int32_t output_activation_min;
+    int32_t output_activation_max;
+    int32_t output_multiplier;
+    real_multiplier = kernels::getQuantizedConvolutionMultipler(
+      Tensor::scale(input), Tensor::scale(input_weight), nonlinear_activation_input_scale);
+    kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+    kernels::calculateActivationRangeQuantized(FusedActFunc::NONE, 0,
+                                               nonlinear_activation_input_scale, cell_type,
+                                               &output_activation_min, &output_activation_max);
+
+    input_gate_params.output_shift = output_shift;
+    input_gate_params.output_multiplier = output_multiplier;
+    input_gate_params.quantized_activation_max = output_activation_max;
+    input_gate_params.quantized_activation_min = output_activation_min;
+    input_gate_params.input_offset = -Tensor::zero_point(input);
+    input_gate_params.weights_offset = -Tensor::zero_point(input_weight);
+    input_gate_params.output_offset = 0;
+
+    gate_params->input_fc_params = input_gate_params;
+  }
+
+  // Recurrent CalculateOpDataFullyConnected
+  {
+    lstm::FullyConnectedParams recurrent_gate_params;
+    double real_multiplier = 0.0;
+    int output_shift;
+    int32_t output_activation_min;
+    int32_t output_activation_max;
+    int32_t output_multiplier;
+    real_multiplier = kernels::getQuantizedConvolutionMultipler(Tensor::scale(hidden_state),
+                                                                Tensor::scale(hidden_state_weight),
+                                                                nonlinear_activation_input_scale);
+    kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+    kernels::calculateActivationRangeQuantized(FusedActFunc::NONE, 0,
+                                               nonlinear_activation_input_scale, cell_type,
+                                               &output_activation_min, &output_activation_max);
+
+    recurrent_gate_params.output_shift = output_shift;
+    recurrent_gate_params.output_multiplier = output_multiplier;
+    recurrent_gate_params.quantized_activation_max = output_activation_max;
+    recurrent_gate_params.quantized_activation_min = output_activation_min;
+    recurrent_gate_params.input_offset = -Tensor::zero_point(hidden_state);
+    recurrent_gate_params.weights_offset = -Tensor::zero_point(hidden_state_weight);
+    recurrent_gate_params.output_offset = 0;
+
+    gate_params->recurrent_fc_params = recurrent_gate_params;
+  }
+}
+
+void prepareGateParamsInteger(lstm::LSTMStruct *lstm_struct,
+                              lstm::LSTMParameters *quant_lstm_params)
+{
+  float nonlinear_input_scale = 0.00024414062; // 2^-12 Q3.12 -> Q0.15
+
+  createGateParams(lstm_struct->input(), lstm_struct->input_to_forget_weights(),
+                   lstm_struct->forget_gate_bias(), lstm_struct->output_state(),
+                   lstm_struct->recurrent_to_forget_weights(), nonlinear_input_scale, DataType::S16,
+                   &quant_lstm_params->forget_gate_parameters);
+
+  createGateParams(lstm_struct->input(), lstm_struct->input_to_input_weights(),
+                   lstm_struct->input_gate_bias(), lstm_struct->output_state(),
+                   lstm_struct->recurrent_to_input_weights(), nonlinear_input_scale, DataType::S16,
+                   &quant_lstm_params->input_gate_parameters);
+
+  // lstm::GateParameters cell_gate_parameters;
+  createGateParams(lstm_struct->input(), lstm_struct->input_to_cell_weights(),
+                   lstm_struct->cell_gate_bias(), lstm_struct->output_state(),
+                   lstm_struct->recurrent_to_cell_weights(), nonlinear_input_scale, DataType::S16,
+                   &quant_lstm_params->cell_gate_parameters);
+
+  // lstm::GateParameters output_gate_parameters;
+  createGateParams(lstm_struct->input(), lstm_struct->input_to_output_weights(),
+                   lstm_struct->output_gate_bias(), lstm_struct->output_state(),
+                   lstm_struct->recurrent_to_output_weights(), nonlinear_input_scale, DataType::S16,
+                   &quant_lstm_params->output_gate_parameters);
+
+  // Inter gate multiplication parameters
+  float nonlinear_output_scale = 0.00003051757; // 2^-15 Q3.12 -> Q0.15
+  float cell_state_scale =
+    Tensor::scale(lstm_struct->cell_state()); // lstm_tensors.CellStateTensor()->params.scale;
+  // forget gate output (nonlinear output) x cell state -> cell state
+  quant_lstm_params->inter_gate_parameters.forget_cell_mul_params = createInterGateParams(
+    nonlinear_output_scale, cell_state_scale, cell_state_scale, DataType::S16, 0);
+
+  // input gate output x cell gate output -> cell state
+  quant_lstm_params->inter_gate_parameters.input_mul_params = createInterGateParams(
+    nonlinear_output_scale, nonlinear_output_scale, cell_state_scale, DataType::S16, 0);
+
+  // tanh output x output gate output -> hidden state (potentially asymmetric)
+  quant_lstm_params->inter_gate_parameters.output_mul_params = createInterGateParams(
+    nonlinear_output_scale, nonlinear_output_scale, Tensor::scale(lstm_struct->output_state()),
+    Tensor::element_type(lstm_struct->output_state()),
+    Tensor::zero_point(lstm_struct->output_state()));
+}
+
+// Create the additional information about the cell state, which include:
+// cell_state_scale_power: used in integer nonlinear function (e.g., tanh)
+// quantized_cell_clip: quantized cell clip range
+lstm::CellStateInfo createLstmCellStateInfo(const float cell_state_scale, const float cell_clip)
+{
+  lstm::CellStateInfo cell_state_info;
+  // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale
+  int buffer;
+  checkedLog2(cell_state_scale, &buffer);
+  cell_state_info.cell_state_scale_power = buffer;
+  // Cell state specifics
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.quantized_cell_clip = static_cast<int16_t>(std::min(
+    std::max(static_cast<double>(cell_clip) / static_cast<double>(cell_state_scale), -32768.0),
+    32767.0));
+  return cell_state_info;
+}
+
+void evalInt8(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph, bool)
+{
+  lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+  lstm::LSTMParameters quant_lstm_params;
+  prepareGateParamsInteger(&lstm_struct, &quant_lstm_params);
+
+  lstm::CellStateInfo cell_state_info = createLstmCellStateInfo(
+    luci_interpreter::Tensor::scale(lstm_struct.cell_state()), lstm_struct.options->cell_clip());
+
+  const bool time_major = lstm_struct.options->time_major();
+  const auto batch_size =
+    time_major ? Tensor::dim(lstm_struct.input(), 1) : Tensor::dim(lstm_struct.input(), 0);
+  const auto state_dimension = Tensor::dim(lstm_struct.output_state(), 1);
+  const auto cell_state_type_size = getDataTypeSize(Tensor::element_type(lstm_struct.cell_state()));
+
+  auto scratch_0_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_1_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_2_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_3_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+
+  // Create and fill with 0 output state tensor
+  auto output_state_data =
+    std::make_unique<int8_t[]>(Tensor::num_elements(lstm_struct.output_state()));
+  std::fill_n(output_state_data.get(), Tensor::num_elements(lstm_struct.output_state()), 0);
+
+  // Create and fill with 0 cell state tensor
+  auto cell_state_data =
+    std::make_unique<int16_t[]>(Tensor::num_elements(lstm_struct.cell_state()));
+  std::fill_n(cell_state_data.get(), Tensor::num_elements(lstm_struct.cell_state()), 0);
+
+  luci_interpreter_pal::evalLSTM<int8_t, int8_t, int16_t, int32_t>(
+    &lstm_struct, &quant_lstm_params, &cell_state_info, output_state_data.get(),
+    cell_state_data.get(), kernels::getTensorData<int16_t>(scratch_0_data.get()),
+    kernels::getTensorData<int16_t>(scratch_1_data.get()),
+    kernels::getTensorData<int16_t>(scratch_2_data.get()),
+    kernels::getTensorData<int16_t>(scratch_3_data.get()), runtime_graph);
+}
+
+#endif // DIS_QUANT
+
+#ifndef DIS_FLOAT
+lstm::FullyConnectedParams createFcParamsFloat()
+{
+  lstm::FullyConnectedParams op_params;
+  kernels::calculateActivationRange(FusedActFunc::NONE, &op_params.float_activation_min,
+                                    &op_params.float_activation_max);
+  return op_params;
+}
+
+lstm::GateParameters createGateParamsFloat()
+{
+  lstm::GateParameters gate_params;
+
+  gate_params.input_fc_params = createFcParamsFloat();
+  gate_params.recurrent_fc_params = createFcParamsFloat();
+
+  return gate_params;
+}
+
+lstm::CellStateInfo createLstmCellStateInfoFloat(const float cell_clip)
+{
+  lstm::CellStateInfo cell_state_info;
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.cell_state_scale_power = 0; // no quantization
+  cell_state_info.quantized_cell_clip = 0;    // no quantization
+  return cell_state_info;
+}
+
+void prepareGateParamsFloat(lstm::LSTMParameters *float_lstm_params)
+{
+  // Gate Parameters
+  float_lstm_params->forget_gate_parameters = createGateParamsFloat();
+  float_lstm_params->input_gate_parameters = createGateParamsFloat();
+  float_lstm_params->cell_gate_parameters = createGateParamsFloat();
+  float_lstm_params->output_gate_parameters = createGateParamsFloat();
+
+  // Inter gate multiplication parameters
+  lstm::ArithmeticParams op_params;
+  kernels::calculateActivationRange(FusedActFunc::NONE, &op_params.float_activation_min,
+                                    &op_params.float_activation_max);
+  float_lstm_params->inter_gate_parameters.forget_cell_mul_params = op_params;
+  float_lstm_params->inter_gate_parameters.input_mul_params = op_params;
+  float_lstm_params->inter_gate_parameters.output_mul_params = op_params;
+}
+
+void evalFloat(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph, bool)
+{
+  lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+  lstm::CellStateInfo cell_state_info =
+    createLstmCellStateInfoFloat(lstm_struct.options->cell_clip());
+
+  lstm::LSTMParameters lstm_params;
+  prepareGateParamsFloat(&lstm_params);
+
+  const bool time_major = lstm_struct.options->time_major();
+  const auto batch_size =
+    time_major ? Tensor::dim(lstm_struct.input(), 1) : Tensor::dim(lstm_struct.input(), 0);
+  const auto state_dimension = Tensor::dim(lstm_struct.output_state(), 1);
+  const auto cell_state_type_size = getDataTypeSize(Tensor::element_type(lstm_struct.cell_state()));
+
+  auto scratch_0_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_1_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_2_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+  auto scratch_3_data =
+    std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+
+  // Create and fill with 0 output state tensor
+  auto output_state_data =
+    std::make_unique<float[]>(Tensor::num_elements(lstm_struct.output_state()));
+  std::fill_n(output_state_data.get(), Tensor::num_elements(lstm_struct.output_state()), 0);
+
+  // Create and fill with 0 cell state tensor
+  auto cell_state_data = std::make_unique<float[]>(Tensor::num_elements(lstm_struct.cell_state()));
+  std::fill_n(cell_state_data.get(), Tensor::num_elements(lstm_struct.cell_state()), 0);
+
+  luci_interpreter_pal::evalLSTM<float, float, float, float>(
+    &lstm_struct, &lstm_params, &cell_state_info, output_state_data.get(), cell_state_data.get(),
+    kernels::getTensorData<float>(scratch_0_data.get()),
+    kernels::getTensorData<float>(scratch_1_data.get()),
+    kernels::getTensorData<float>(scratch_2_data.get()),
+    kernels::getTensorData<float>(scratch_3_data.get()), runtime_graph);
+}
+#endif // DIS_FLOAT
+
+void validateWeightTensorSize(const circle::Tensor *weight_tensor, int dim1_size, int dim2_size)
+{
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(weight_tensor) == 2);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(weight_tensor, 0) == dim1_size);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(weight_tensor, 1) == dim2_size);
+}
+
+void validateTensorsSize(lstm::LSTMStruct *lstm_struct, const bool time_major)
+{
+  const auto batch_size =
+    time_major ? Tensor::dim(lstm_struct->input(), 1) : Tensor::dim(lstm_struct->input(), 0);
+
+  const auto input_dimension = Tensor::dim(lstm_struct->input(), 2);
+  const auto state_dimension = Tensor::dim(lstm_struct->output_state(), 1);
+
+  // Input FC weights
+  for (int32_t i = 1; i < 5; i++)
+  {
+    validateWeightTensorSize(lstm_struct->get_internal_tensor(i), state_dimension, input_dimension);
+  }
+
+  // Recurrent FC weights
+  for (int32_t i = 5; i < 9; i++)
+  {
+    validateWeightTensorSize(lstm_struct->get_internal_tensor(i), state_dimension, state_dimension);
+  }
+
+  // Biases
+  for (int32_t i = 12; i < 16; i++)
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::num_dims(lstm_struct->get_internal_tensor(i)) == 1);
+    LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->get_internal_tensor(i), 0) == state_dimension);
+  }
+
+  // Check the shape of input state tensors.
+  // These tensor may be 1D or 2D. It's fine as long as the total size is
+  // correct.
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(lstm_struct->output_state()) ==
+                         batch_size * state_dimension);
+  LUCI_INTERPRETER_CHECK(Tensor::num_elements(lstm_struct->cell_state()) ==
+                         batch_size * state_dimension);
+
+  // Check the shape of output tensor against that of input tensor
+  LUCI_INTERPRETER_CHECK(Tensor::num_dims(lstm_struct->output()) == 3);
+  LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->input(), 0) ==
+                         Tensor::dim(lstm_struct->output(), 0));
+  LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->input(), 1) ==
+                         Tensor::dim(lstm_struct->output(), 1));
+  LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->output(), 2) == state_dimension);
+}
+
+} // namespace
+
+void configure_kernel_CircleUnidirectionalSequenceLSTM(const circle::Operator *cur_op,
+                                                       BaseRuntimeGraph *runtime_graph)
+{
+  lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+  LUCI_INTERPRETER_CHECK(Tensor::element_type(lstm_struct.input()) == DataType::FLOAT32 or
+                         Tensor::element_type(lstm_struct.input()) == DataType::S8);
+
+  lstm_struct.validateTensorTypes();
+
+  const bool time_major = lstm_struct.options->time_major();
+
+  validateTensorsSize(&lstm_struct, time_major);
+
+  // No peephole
+  for (int32_t i = 9; i < 12; ++i)
+    LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+
+  // No projection
+  for (int32_t i = 16; i < 18; ++i)
+    LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+
+  // No internal layer norm
+  for (int32_t i = 20; i < 24; ++i)
+    LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+}
+
+void execute_kernel_CircleUnidirectionalSequenceLSTM(const circle::Operator *cur_op,
+                                                     BaseRuntimeGraph *runtime_graph, bool in_place)
+{
+  const auto input_index = cur_op->inputs()->operator[](0);
+  assert(input_index != -1);
+
+  const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+
+  switch (Tensor::element_type(input))
+  {
+#ifndef DIS_FLOAT
+    case DataType::FLOAT32:
+      evalFloat(cur_op, runtime_graph, in_place);
+      break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+    case DataType::S8:
+      evalInt8(cur_op, runtime_graph, in_place);
+      break;
+#endif // DIS_QUANT
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.h
new file mode 100644 (file)
index 0000000..4a24978
--- /dev/null
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "Utils.h"
+
+namespace luci_interpreter
+{
+namespace lstm
+{
+
+struct LSTMStruct
+{
+  LSTMStruct() = delete;
+  LSTMStruct(const LSTMStruct &) = delete;
+
+  explicit LSTMStruct(const circle::Operator *cur_op,
+                      luci_interpreter::BaseRuntimeGraph *runtime_graph)
+  {
+    const auto input_index = cur_op->inputs()->operator[](0);
+    const auto input_to_input_weights_index = cur_op->inputs()->operator[](1);
+    const auto input_to_forget_weights_index = cur_op->inputs()->operator[](2);
+    const auto input_to_cell_weights_index = cur_op->inputs()->operator[](3);
+    const auto input_to_output_weights_index = cur_op->inputs()->operator[](4);
+    assert(input_index != -1);
+    // input_to_input_weights_index - optional
+    assert(input_to_forget_weights_index != -1);
+    assert(input_to_cell_weights_index != -1);
+    assert(input_to_output_weights_index != -1);
+    internal_tensors[0] = runtime_graph->getCircleTensorByIndex(input_index);
+    internal_tensors[1] = runtime_graph->getCircleTensorByIndex(input_to_input_weights_index);
+    internal_tensors[2] = runtime_graph->getCircleTensorByIndex(input_to_forget_weights_index);
+    internal_tensors[3] = runtime_graph->getCircleTensorByIndex(input_to_cell_weights_index);
+    internal_tensors[4] = runtime_graph->getCircleTensorByIndex(input_to_output_weights_index);
+
+    const auto recurrent_to_input_weights_index = cur_op->inputs()->operator[](5);
+    const auto recurrent_to_forget_weights_index = cur_op->inputs()->operator[](6);
+    const auto recurrent_to_cell_weights_index = cur_op->inputs()->operator[](7);
+    const auto recurrent_to_output_weights_index = cur_op->inputs()->operator[](8);
+    // recurrent_to_input_weights_index - optional
+    assert(recurrent_to_forget_weights_index != -1);
+    assert(recurrent_to_cell_weights_index != -1);
+    assert(recurrent_to_output_weights_index != -1);
+    internal_tensors[5] = runtime_graph->getCircleTensorByIndex(recurrent_to_input_weights_index);
+    internal_tensors[6] = runtime_graph->getCircleTensorByIndex(recurrent_to_forget_weights_index);
+    internal_tensors[7] = runtime_graph->getCircleTensorByIndex(recurrent_to_cell_weights_index);
+    internal_tensors[8] = runtime_graph->getCircleTensorByIndex(recurrent_to_output_weights_index);
+
+    const auto cell_to_input_weights_index = cur_op->inputs()->operator[](9);
+    const auto cell_to_forget_weights_index = cur_op->inputs()->operator[](10);
+    const auto cell_to_output_weights_index = cur_op->inputs()->operator[](11);
+    // optional cell_to_input_weights_index
+    // optional cell_to_forget_weights_index
+    // optional cell_to_output_weights_index
+    internal_tensors[9] = runtime_graph->getCircleTensorByIndex(cell_to_input_weights_index);
+    internal_tensors[10] = runtime_graph->getCircleTensorByIndex(cell_to_forget_weights_index);
+    internal_tensors[11] = runtime_graph->getCircleTensorByIndex(cell_to_output_weights_index);
+
+    const auto input_gate_bias_index = cur_op->inputs()->operator[](12);
+    const auto forget_gate_bias_index = cur_op->inputs()->operator[](13);
+    const auto cell_gate_bias_index = cur_op->inputs()->operator[](14);
+    const auto output_gate_bias_index = cur_op->inputs()->operator[](15);
+    // optional input_gate_bias_index
+    assert(forget_gate_bias_index != -1);
+    assert(cell_gate_bias_index != -1);
+    assert(output_gate_bias_index != -1);
+    internal_tensors[12] = runtime_graph->getCircleTensorByIndex(input_gate_bias_index);
+    internal_tensors[13] = runtime_graph->getCircleTensorByIndex(forget_gate_bias_index);
+    internal_tensors[14] = runtime_graph->getCircleTensorByIndex(cell_gate_bias_index);
+    internal_tensors[15] = runtime_graph->getCircleTensorByIndex(output_gate_bias_index);
+
+    const auto projection_weights_index = cur_op->inputs()->operator[](16);
+    const auto projection_bias_index = cur_op->inputs()->operator[](17);
+    // optional projection_weights_index
+    // optional projection_bias_index
+    internal_tensors[16] = runtime_graph->getCircleTensorByIndex(projection_weights_index);
+    internal_tensors[17] = runtime_graph->getCircleTensorByIndex(projection_bias_index);
+
+    const auto output_state_index = cur_op->inputs()->operator[](18);
+    const auto cell_state_index = cur_op->inputs()->operator[](19);
+    assert(output_state_index != -1);
+    assert(cell_state_index != -1);
+    internal_tensors[18] = runtime_graph->getCircleTensorByIndex(output_state_index);
+    internal_tensors[19] = runtime_graph->getCircleTensorByIndex(cell_state_index);
+
+    const auto input_layer_norm_coefficients_index = cur_op->inputs()->operator[](20);
+    const auto forget_layer_norm_coefficients_index = cur_op->inputs()->operator[](21);
+    const auto cell_layer_norm_coefficients_index = cur_op->inputs()->operator[](22);
+    const auto output_layer_norm_coefficients_index = cur_op->inputs()->operator[](23);
+    // optional input_layer_norm_coefficients_index
+    // optional forget_layer_norm_coefficients_index
+    // optional cell_layer_norm_coefficients_index
+    // optional output_layer_norm_coefficients_index
+    internal_tensors[20] =
+      runtime_graph->getCircleTensorByIndex(input_layer_norm_coefficients_index);
+    internal_tensors[21] =
+      runtime_graph->getCircleTensorByIndex(forget_layer_norm_coefficients_index);
+    internal_tensors[22] =
+      runtime_graph->getCircleTensorByIndex(cell_layer_norm_coefficients_index);
+    internal_tensors[23] =
+      runtime_graph->getCircleTensorByIndex(output_layer_norm_coefficients_index);
+
+    const auto output_index = cur_op->outputs()->operator[](0);
+    assert(output_index != -1);
+    output_internal = runtime_graph->getCircleTensorByIndex(output_index);
+
+    options = cur_op->builtin_options_as_UnidirectionalSequenceLSTMOptions();
+  }
+
+  void validateTensorTypes()
+  {
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(input()) == Tensor::element_type(output_state()));
+    LUCI_INTERPRETER_CHECK(Tensor::element_type(output()) == Tensor::element_type(input()));
+
+    for (int32_t i = 1; i < 9; ++i)
+    {
+      LUCI_INTERPRETER_CHECK(internal_tensors[i] == nullptr or
+                             Tensor::element_type(input_to_forget_weights()) ==
+                               Tensor::element_type(internal_tensors[i]));
+    }
+
+    for (int32_t i = 12; i < 16; ++i)
+    {
+      LUCI_INTERPRETER_CHECK(internal_tensors[i] == nullptr or
+                             Tensor::element_type(forget_gate_bias()) ==
+                               Tensor::element_type(internal_tensors[i]));
+    }
+  }
+
+  const circle::Tensor *input() { return internal_tensors[0]; };
+
+  const circle::Tensor *input_to_input_weights() { return internal_tensors[1]; };
+  const circle::Tensor *input_to_forget_weights() { return internal_tensors[2]; };
+  const circle::Tensor *input_to_cell_weights() { return internal_tensors[3]; };
+  const circle::Tensor *input_to_output_weights() { return internal_tensors[4]; };
+
+  const circle::Tensor *recurrent_to_input_weights() { return internal_tensors[5]; };
+  const circle::Tensor *recurrent_to_forget_weights() { return internal_tensors[6]; };
+  const circle::Tensor *recurrent_to_cell_weights() { return internal_tensors[7]; };
+  const circle::Tensor *recurrent_to_output_weights() { return internal_tensors[8]; };
+
+  const circle::Tensor *cell_to_input_weights() { return internal_tensors[9]; };
+  const circle::Tensor *cell_to_forget_weights() { return internal_tensors[10]; };
+  const circle::Tensor *cell_to_output_weights() { return internal_tensors[11]; };
+
+  const circle::Tensor *input_gate_bias() { return internal_tensors[12]; };
+  const circle::Tensor *forget_gate_bias() { return internal_tensors[13]; };
+  const circle::Tensor *cell_gate_bias() { return internal_tensors[14]; };
+  const circle::Tensor *output_gate_bias() { return internal_tensors[15]; };
+
+  const circle::Tensor *projection_weights() { return internal_tensors[16]; };
+  const circle::Tensor *projection_bias() { return internal_tensors[17]; };
+
+  const circle::Tensor *output_state() { return internal_tensors[18]; };
+  const circle::Tensor *cell_state() { return internal_tensors[19]; };
+
+  const circle::Tensor *input_layer_norm_coefficients() { return internal_tensors[20]; };
+  const circle::Tensor *forget_layer_norm_coefficients() { return internal_tensors[21]; };
+  const circle::Tensor *cell_layer_norm_coefficients() { return internal_tensors[22]; };
+  const circle::Tensor *output_layer_norm_coefficients() { return internal_tensors[23]; };
+  const circle::Tensor *output() { return output_internal; };
+
+  const circle::UnidirectionalSequenceLSTMOptions *options;
+
+  const circle::Tensor *get_internal_tensor(int i) { return internal_tensors[i]; }
+
+private:
+  const circle::Tensor *output_internal;
+  const circle::Tensor *internal_tensors[24];
+};
+
+struct FullyConnectedParams
+{
+  int32_t input_offset;
+  int32_t weights_offset;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int32_t output_shift;
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  int32_t float_activation_min;
+  int32_t float_activation_max;
+};
+
+struct GateParameters
+{
+  FullyConnectedParams input_fc_params;
+  FullyConnectedParams recurrent_fc_params;
+};
+
+struct ArithmeticParams
+{
+  int32_t input1_offset;
+  int32_t input2_offset;
+  int32_t quantized_activation_min;
+  int32_t quantized_activation_max;
+  int32_t output_offset;
+  int32_t output_multiplier;
+  int32_t output_shift;
+  int32_t float_activation_min;
+  int32_t float_activation_max;
+};
+
+struct InterGateParameters
+{
+  ArithmeticParams forget_cell_mul_params;
+  ArithmeticParams input_mul_params;
+  ArithmeticParams output_mul_params;
+};
+
+struct CellStateInfo
+{
+  float cell_clip;
+  // clipping range for cell state only 16 bits cell is supported (could be
+  // generalized through templatation)
+  int16_t quantized_cell_clip;
+  // 2^-cell_state_scale_power = cell state scale, required by integer tanh
+  // computation
+  int32_t cell_state_scale_power;
+};
+
+struct LSTMParameters
+{
+  GateParameters forget_gate_parameters;
+  GateParameters input_gate_parameters;
+  GateParameters cell_gate_parameters;
+  GateParameters output_gate_parameters;
+  InterGateParameters inter_gate_parameters;
+};
+
+} // namespace lstm
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
diff --git a/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp b/onert-micro/luci-interpreter/src/kernels/UnidirectionalSequenceLSTM.test.cpp
new file mode 100644 (file)
index 0000000..df059cf
--- /dev/null
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class UnidirectionalSequenceLSTMTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// NOTE from NoCifgNoPeepholeNoProjectionNoClippingUnidirectionalLstmTest
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+
+  std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284,  0.11810488, 0.2013163,
+                                              -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+  std::vector<float> input_to_forget_weights = {0.09701663,  0.20334584, -0.50592935, -0.31343272,
+                                                -0.40032279, 0.44781327, 0.01387155,  -0.35593212};
+
+  std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+                                                0.44272184,  0.03897077,  -0.1556896, 0.19487578};
+
+  std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+  std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+  std::vector<float> recurrent_to_input_weights = {
+    -0.0063535,  -0.2042388,  0.31454784,  -0.35746509, 0.28902304, 0.08183324,
+    -0.16555229, 0.02286911,  -0.13566875, 0.03034258,  0.48091322, -0.12528998,
+    0.24077177,  -0.51332325, -0.33502164, 0.10629296};
+
+  std::vector<float> recurrent_to_forget_weights = {
+    -0.48684245, -0.06655136, 0.42224967,  0.2112639,   0.27654213, 0.20864892,
+    -0.07646349, 0.45877004,  0.00141793,  -0.14609534, 0.36447752, 0.09196436,
+    0.28053468,  0.01560611,  -0.20127171, -0.01140004};
+
+  std::vector<float> recurrent_to_cell_weights = {
+    -0.3407414,  0.24443203,  -0.2078532,  0.26320225,  0.05695659, -0.00123841,
+    -0.4744786,  -0.35869038, -0.06418842, -0.13502428, -0.501764,  0.22830659,
+    -0.46367589, 0.26016325,  -0.03894562, -0.16368064};
+
+  std::vector<float> recurrent_to_output_weights = {
+    0.43385774,  -0.17194885, 0.2718237,  0.09215671,  0.24107647, -0.39835793,
+    0.18212086,  0.01301402,  0.48572797, -0.50656658, 0.20047462, -0.20607421,
+    -0.51818722, -0.15390486, 0.0468148,  0.39922136};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-0.02973187, 0.1229473,  0.20885126, -0.15358765,
+                                     -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+                                     -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+  std::vector<float> ref_output_shape{sequence_length, n_batch, n_output};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_batch)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+
+  std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284,  0.11810488, 0.2013163,
+                                              -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+  std::vector<float> input_to_forget_weights = {0.09701663,  0.20334584, -0.50592935, -0.31343272,
+                                                -0.40032279, 0.44781327, 0.01387155,  -0.35593212};
+
+  std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+                                                0.44272184,  0.03897077,  -0.1556896, 0.19487578};
+
+  std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+  std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+  std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+  std::vector<float> recurrent_to_input_weights = {
+    -0.0063535,  -0.2042388,  0.31454784,  -0.35746509, 0.28902304, 0.08183324,
+    -0.16555229, 0.02286911,  -0.13566875, 0.03034258,  0.48091322, -0.12528998,
+    0.24077177,  -0.51332325, -0.33502164, 0.10629296};
+
+  std::vector<float> recurrent_to_forget_weights = {
+    -0.48684245, -0.06655136, 0.42224967,  0.2112639,   0.27654213, 0.20864892,
+    -0.07646349, 0.45877004,  0.00141793,  -0.14609534, 0.36447752, 0.09196436,
+    0.28053468,  0.01560611,  -0.20127171, -0.01140004};
+
+  std::vector<float> recurrent_to_cell_weights = {
+    -0.3407414,  0.24443203,  -0.2078532,  0.26320225,  0.05695659, -0.00123841,
+    -0.4744786,  -0.35869038, -0.06418842, -0.13502428, -0.501764,  0.22830659,
+    -0.46367589, 0.26016325,  -0.03894562, -0.16368064};
+
+  std::vector<float> recurrent_to_output_weights = {
+    0.43385774,  -0.17194885, 0.2718237,  0.09215671,  0.24107647, -0.39835793,
+    0.18212086,  0.01301402,  0.48572797, -0.50656658, 0.20047462, -0.20607421,
+    -0.51818722, -0.15390486, 0.0468148,  0.39922136};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{n_batch, sequence_length, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = false;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+    &scratchpad_1, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-0.02973187, 0.1229473,  0.20885126, -0.15358765,
+                                     -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+                                     -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+  std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_simple)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 1;
+  const int32_t n_cell = 1;
+  const int32_t n_output = 1;
+  const int32_t sequence_length = 1;
+
+  std::vector<float> input_to_input_weights = {0.329067};
+  std::vector<float> input_to_forget_weights = {0.308059};
+  std::vector<float> input_to_cell_weights = {0.152916};
+  std::vector<float> input_to_output_weights = {-0.476033};
+
+  std::vector<float> input_gate_bias = {0.};
+  std::vector<float> forget_gate_bias = {1.};
+  std::vector<float> cell_gate_bias = {0.};
+  std::vector<float> output_gate_bias = {0.};
+
+  std::vector<float> recurrent_to_input_weights = {0.207806};
+  std::vector<float> recurrent_to_forget_weights = {0.028718};
+  std::vector<float> recurrent_to_cell_weights = {-0.182756};
+  std::vector<float> recurrent_to_output_weights = {-0.960517};
+
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Shape input_to_cell_weights_shape{n_cell, n_input};
+  Shape input_to_forget_weights_shape{n_cell, n_input};
+  Shape input_to_output_weights_shape{n_cell, n_input};
+
+  Shape input_gate_bias_shape{n_cell};
+  Shape forget_gate_bias_shape{n_cell};
+  Shape cell_gate_bias_shape{n_cell};
+  Shape output_gate_bias_shape{n_cell};
+
+  Shape recurrent_to_input_weights_shape{n_cell, n_output};
+  Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+  Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+  Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+  Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+  Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+  Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+  Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+  Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+  Tensor cell_gate_bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+  Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+    output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+  Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+  Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+  Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+  Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+  std::vector<float> input_data{0.03653763};
+  Shape input_shape{n_batch, sequence_length, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  Shape output_state_shape{n_batch, n_output};
+  Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  output_state_tensor.resize(output_state_shape);
+
+  Shape cell_state_shape{n_batch, n_cell};
+  Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  cell_state_tensor.resize(cell_state_shape);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 10.0;
+  params.proj_clip = 0.0;
+  params.time_major = false;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+    &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+    &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+    &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+    nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+    &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+    nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+    &scratchpad_1, params);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(output_state_tensor);
+  _memory_manager->allocate_memory(cell_state_tensor);
+  _memory_manager->allocate_memory(scratchpad_1);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.00139296};
+  std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+  const float tolerance = 1e-5;
+  auto aa = extractTensorData<float>(output_tensor);
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Unsupported_Type_Configure_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<int8_t> input_data{2, 3, 3, 4, 1, 1}; // int8 is not support as of now
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_input}; // this is wrong
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_2_NEG)
+{
+  const int32_t n_batch = 1;
+  const int32_t n_input = 2;
+  const int32_t n_cell = 4;
+  const int32_t n_output = 4;
+  const int32_t sequence_length = 3;
+
+  std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+  Shape input_shape{sequence_length, n_batch, n_input};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+  std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                                               0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+  Shape input_to_input_weights_shape{n_cell, n_input};
+  Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+    input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = Activation::TANH;
+  params.cell_clip = 0.0;
+  params.proj_clip = 0.0;
+  params.time_major = true;
+  params.asymmetric_quantize_inputs = false;
+
+  // NOTE provide wrong shaped inputs
+  UnidirectionalSequenceLSTM kernel(
+    &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+    &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+    nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Unpack.cpp b/onert-micro/luci-interpreter/src/kernels/Unpack.cpp
new file mode 100644 (file)
index 0000000..80f4d1f
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
+  : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+{
+}
+
+void Unpack::configure()
+{
+  const Shape &input_shape = input()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += input()->shape().num_dims();
+  assert(axis >= 0 && axis < input_shape.num_dims());
+
+  Shape output_shape(input_shape.num_dims() - 1);
+  int out_index = 0;
+  for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
+  {
+    if (in_index != axis)
+      output_shape.dim(out_index++) = input_shape.dim(in_index);
+  }
+
+  // TODO: enable it only if kernel with dynamic shapes
+  for (Tensor *output : _outputs)
+  {
+    assert(output->element_type() == input()->element_type());
+    output->resize(output_shape);
+  }
+}
+
+template <typename T> void Unpack::executeImpl() const
+{
+  tflite::UnpackParams params{};
+  params.axis = _params.axis;
+  params.num_split = _outputs.size();
+  VectorOfTensors<T, false> all_outputs(_outputs);
+  tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
+                                   **all_outputs.shapes(), all_outputs.data());
+}
+
+void Unpack::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      return executeImpl<float>();
+    case DataType::U8:
+      return executeImpl<uint8_t>();
+    default:
+      assert(false && "Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Utils.cpp b/onert-micro/luci-interpreter/src/kernels/Utils.cpp
new file mode 100644 (file)
index 0000000..0810b82
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Utils.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
+{
+  switch (activation)
+  {
+    case Activation::NONE:
+      *activation_min = std::numeric_limits<T>::lowest();
+      *activation_max = std::numeric_limits<T>::max();
+      break;
+    case Activation::RELU:
+      *activation_min = 0;
+      *activation_max = std::numeric_limits<T>::max();
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = -1;
+      *activation_max = 1;
+      break;
+    case Activation::RELU6:
+      *activation_min = 0;
+      *activation_max = 6;
+      break;
+    default:
+      assert(false && "Unsupported activation.");
+  }
+}
+
+void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
+                                    int32_t n_col, int32_t *output)
+{
+  for (int i = 0; i < n_row; ++i)
+  {
+    int32_t row_sum = 0;
+    for (int j = 0; j < n_col; ++j)
+    {
+      row_sum += *matrix++;
+    }
+    output[i] += row_sum * scalar;
+  }
+}
+
+template void calculateActivationRange(Activation activation, float *activation_min,
+                                       float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+                                       int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+                                       int64_t *activation_max);
+
+#ifndef DIS_QUANT
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+                                                  int32_t zero_point, float scale,
+                                                  int32_t *activation_min, int32_t *activation_max)
+{
+  auto quantize = [scale, zero_point](float x) {
+    return zero_point + static_cast<int32_t>(std::round(x / scale));
+  };
+
+  switch (activation)
+  {
+    case Activation::NONE:
+    case Activation::TANH:
+      *activation_min = qmin;
+      *activation_max = qmax;
+      break;
+    case Activation::RELU:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = qmax;
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = std::max(qmin, quantize(-1.0f));
+      *activation_max = std::min(qmax, quantize(1.0f));
+      break;
+    case Activation::RELU6:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = std::min(qmax, quantize(6.0f));
+      break;
+    default:
+      assert(false && "Unsupported activation.");
+  }
+}
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+                                                  const circle::Tensor *output,
+                                                  int32_t *activation_min, int32_t *activation_max)
+{
+  const float scale = Tensor::scale(output);
+  const int32_t zero_point = Tensor::zero_point(output);
+
+  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, zero_point, zero_point,
+                                        activation_min, activation_max);
+}
+
+void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
+                                       float output_scale, DataType data_type,
+                                       int32_t *activation_min, int32_t *activation_max)
+{
+  int32_t qmin{};
+  int32_t qmax{};
+  switch (data_type)
+  {
+    case DataType::U8:
+      qmin = 0;
+      qmax = std::numeric_limits<uint8_t>::max();
+      break;
+    case DataType::S8:
+      qmin = -std::numeric_limits<int8_t>::max();
+      qmax = std::numeric_limits<int8_t>::max();
+      break;
+    case DataType::S16:
+      // For now, assume that signed int16 type implies signed symmetric quantization.
+      assert(output_zero_point == 0);
+      qmin = -std::numeric_limits<int16_t>::max();
+      qmax = std::numeric_limits<int16_t>::max();
+      break;
+    default:
+      assert(false && "Unsupported type.");
+  }
+
+  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
+                                        activation_min, activation_max);
+}
+
+void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max)
+{
+  assert(Tensor::zero_points(output).size() == 1);
+  const float scale = Tensor::scale(output);
+  const int32_t zero_point = Tensor::zero_point(output);
+  calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
+                                    activation_min, activation_max);
+}
+
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+  if (double_multiplier == 0.0)
+  {
+    *quantized_multiplier = 0;
+    *shift = 0;
+    return;
+  }
+
+  const double q = std::frexp(double_multiplier, shift);
+  auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
+
+  if (q_fixed == (int64_t(1) << 31))
+  {
+    q_fixed /= 2;
+    ++*shift;
+  }
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  // A shift amount smaller than -31 would cause all bits to be shifted out
+  // and thus all results would be zero. We implement that instead with
+  // q_fixed==0, so as to avoid hitting issues with right-shift
+  // operations with shift amounts greater than 31. Note that this happens
+  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+  // that we're effectively flushing tiny double_multiplier's to zero.
+  // We could conceivably handle values in the range (roughly) [32, 63]
+  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+  // the present handling is just doing 'flush denormals to zero'. We could
+  // reconsider and actually generate nonzero denormals if a need arises.
+  if (*shift < -31)
+  {
+    *shift = 0;
+    q_fixed = 0;
+  }
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift)
+{
+  assert(double_multiplier < 1.0);
+  assert(double_multiplier > 0.0);
+  int shift;
+  quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+  assert(shift <= 0);
+  *left_shift = shift;
+}
+#endif
+
+tflite::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
+                                                const circle::Tensor *input2)
+{
+  const int num_input1_dims = Tensor::num_dims(input1);
+  const int num_input2_dims = Tensor::num_dims(input2);
+  const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
+  tflite::RuntimeShape output_shape(num_out_dims);
+
+  for (int i = 0; i < num_out_dims; ++i)
+  {
+    const int32_t input1_dim =
+      i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
+    const int32_t input2_dim =
+      i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
+
+    bool need_broadcast = input1_dim != input2_dim;
+    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
+    output_shape.SetDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
+  }
+
+  return output_shape;
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/kernels/Utils.h b/onert-micro/luci-interpreter/src/kernels/Utils.h
new file mode 100644 (file)
index 0000000..8d3cd69
--- /dev/null
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
+#define LUCI_INTERPRETER_KERNELS_UTILS_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <tensorflow/lite/kernels/internal/types.h>
+#include <cassert>
+#include <cstdint>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+using Activation = luci_interpreter::FusedActFunc;
+
+#define LUCI_INTERPRETER_CHECK(cond)                 \
+  if (!(cond))                                       \
+  {                                                  \
+    assert(false && "LUCI_INTERPRETER_CHECK fails"); \
+  }
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                              int32_t filter_size, int32_t out_size)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                                        int32_t filter_size, int32_t out_size, int32_t *offset)
+{
+  int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
+  total_padding = total_padding > 0 ? total_padding : 0;
+  *offset = total_padding % 2;
+  return total_padding / 2;
+}
+
+inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
+                                 int32_t stride, int32_t dilation_rate = 1)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding)
+  {
+    case Padding::SAME:
+      return (image_size + stride - 1) / stride;
+    case Padding::VALID:
+      return (image_size + stride - effective_filter_size) / stride;
+    default:
+      assert(false);
+      return 0;
+  }
+}
+
+inline int32_t calcOffset(const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2,
+                          int32_t d3)
+{
+
+  return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) *
+           Tensor::dim(tensor, 3) +
+         d3;
+}
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
+
+tflite::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
+                                                const circle::Tensor *input2);
+
+// Helper wrapper to hide broadcast logic
+template <typename T> class BroadcastableWrapper
+{
+public:
+  BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
+
+  T operator[](int idx) { return _v[idx * _stride]; }
+
+private:
+  const std::vector<T> &_v;
+  int _stride;
+};
+
+inline tflite::RuntimeShape getTensorShape(const circle::Tensor *tensor)
+{
+  if (tensor == nullptr)
+    return tflite::RuntimeShape();
+
+  tflite::RuntimeShape runtime_shape(Tensor::num_dims(tensor));
+  for (int i = 0; i < Tensor::num_dims(tensor); ++i)
+  {
+    runtime_shape.SetDim(i, Tensor::dim(tensor, i));
+  }
+  return runtime_shape;
+}
+
+template <typename T> const T *getTensorData(const uint8_t *tensor_data)
+{
+  return tensor_data != nullptr ? reinterpret_cast<const T *>(tensor_data) : nullptr;
+}
+
+template <typename T> T *getTensorData(uint8_t *tensor_data)
+{
+  return tensor_data != nullptr ? reinterpret_cast<T *>(tensor_data) : nullptr;
+}
+
+// A list of tensors in a format that can be used by kernels like split and
+// concatenation.
+template <typename T, bool is_const> class VectorOfTensors
+{
+public:
+  using ElementT = typename std::conditional<is_const, const T, T>::type;
+  using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
+  {
+    const int num_tensors = tensor_list.size();
+
+    all_data_.reserve(num_tensors);
+    all_shape_.reserve(num_tensors);
+    all_shape_ptr_.reserve(num_tensors);
+
+    for (TensorT *tensor : tensor_list)
+    {
+      all_data_.push_back(getTensorData<T>(tensor));
+      all_shape_.push_back(getTensorShape(tensor));
+    }
+
+    // Taking the pointer from inside a std::vector is only OK if the vector is
+    // never modified, so we populate all_shape in the previous loop and then we
+    // are free to grab iterators here.
+    for (tflite::RuntimeShape &shape : all_shape_)
+    {
+      all_shape_ptr_.push_back(&shape);
+    }
+  }
+  // Return a pointer to the data pointers of all tensors in the list. For
+  // example:
+  //   float* const* f = v.data();
+  //   f[0][1] is the second element of the first tensor.
+  ElementT *const *data() const { return all_data_.data(); }
+
+  // Return a pointer the shape pointers of all tensors in the list. For
+  // example:
+  //   const RuntimeShape* const* d = v.dims();
+  //   dims[1] are the dimensions of the second tensor in the list.
+  const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
+
+private:
+  std::vector<ElementT *> all_data_;
+  std::vector<tflite::RuntimeShape> all_shape_;
+  std::vector<tflite::RuntimeShape *> all_shape_ptr_;
+};
+
+#ifndef DIS_QUANT
+void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max);
+void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
+                                       float output_scale, DataType data_type,
+                                       int32_t *activation_min, int32_t *activation_max);
+
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+  return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
+                                    int32_t n_col, int32_t *output);
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+  static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+  if (std::is_same<T, float>::value)
+    calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+  if (std::is_same<T, int32_t>::value)
+    calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+  else
+    calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of its exponent.
+//
+// Handles an arbitrary positive multiplier. The 'shift' output-value is
+// basically the 'floating-point exponent' of the multiplier:
+// Negative for a right-shift (when the multiplier is <1), positive for a
+// left-shift (when the multiplier is >1)
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of NEGATIVE its exponent ---
+// this is intended as a RIGHT-shift.
+//
+// Restricted to the case where the multiplier < 1 (and non-negative).
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift);
+
+inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
+                                               float output_scale)
+{
+  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+  LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
+  return input_product_scale / static_cast<double>(output_scale);
+}
+
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
+inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
+                                                             const std::vector<float> &filter_scale,
+                                                             float output_scale)
+{
+  std::vector<double> effective_output_scales;
+  size_t n = filter_scale.size();
+  effective_output_scales.reserve(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    effective_output_scales.push_back(
+      getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+  }
+  return effective_output_scales;
+}
+
+struct ChannelQuantMultipliers
+{
+  int shift;
+  int32_t multiplier;
+  ChannelQuantMultipliers() = default;
+};
+
+inline std::vector<ChannelQuantMultipliers>
+quantizeMultipliers(const std::vector<double> &effective_scale)
+{
+  size_t n = effective_scale.size();
+  std::vector<ChannelQuantMultipliers> params(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
+  }
+  return params;
+}
+
+// A list of quantized tensors in a format that can be used by kernels like
+// split and concatenation.
+template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
+{
+public:
+  using typename VectorOfTensors<uint8_t, is_const>::TensorT;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
+    : VectorOfTensors<uint8_t, is_const>(tensor_list)
+  {
+    for (TensorT *tensor : tensor_list)
+    {
+      zero_point_.push_back(tensor->zero_point());
+      scale_.push_back(tensor->scale());
+    }
+  }
+
+  const float *scale() const { return scale_.data(); }
+  const int32_t *zero_point() const { return zero_point_.data(); }
+
+private:
+  std::vector<int32_t> zero_point_;
+  std::vector<float> scale_;
+};
+#endif // DIS_QUANT
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
diff --git a/onert-micro/luci-interpreter/src/loader/CMakeLists.txt b/onert-micro/luci-interpreter/src/loader/CMakeLists.txt
new file mode 100644 (file)
index 0000000..0ef63e2
--- /dev/null
@@ -0,0 +1,15 @@
+set(SOURCES
+    GraphLoader.h
+    GraphLoader.cpp
+    ModuleLoader.h
+    ModuleLoader.cpp)
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+        PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER} ${LUCI_INTERPRETER_CORE}
+        PRIVATE ${LUCI_INTERPRETER_KERNELS})
diff --git a/onert-micro/luci-interpreter/src/loader/GraphLoader.cpp b/onert-micro/luci-interpreter/src/loader/GraphLoader.cpp
new file mode 100644 (file)
index 0000000..cb9dd7f
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+// TODO: add more operations
+bool isCouldBeEmplaceOperation(circle::BuiltinOperator op)
+{
+  switch (op)
+  {
+    case circle::BuiltinOperator_LOGISTIC:
+    case circle::BuiltinOperator_RESHAPE:
+    case circle::BuiltinOperator_EXPAND_DIMS:
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool isCouldBeEmplaceTensor(CircleReader *reader, const int32_t tensor_index)
+{
+  uint32_t usage_count = 0;
+  for (uint32_t i = 0; i < reader->operators().size(); ++i)
+  {
+    const auto op = reader->operators().at(i);
+    assert(op != nullptr);
+
+    for (int32_t j = 0; j < op->inputs()->size(); ++j)
+    {
+      const auto input_index = op->inputs()->operator[](j);
+      if (input_index == tensor_index)
+        usage_count++;
+
+      if (usage_count > 1)
+        return false;
+    }
+  }
+  return true;
+}
+
+} // namespace
+
+void GraphLoader::checkInplaceOps(CircleReader *reader, RuntimeGraph *runtime_graph)
+{
+  for (uint32_t i = 0; i < reader->operators().size(); ++i)
+  {
+    const auto *op = reader->operators().at(i);
+    assert(op != nullptr);
+
+    bool is_graph_input = false;
+    for (int32_t j = 0; j < op->inputs()->size(); ++j)
+    {
+      const auto input_index = op->inputs()->operator[](j);
+      if (input_index == -1)
+        continue;
+
+      const auto &inputs_indexes = reader->inputs();
+
+      is_graph_input = (std::find(inputs_indexes.begin(), inputs_indexes.end(), input_index) !=
+                        inputs_indexes.end()) or
+                       is_graph_input;
+
+      if (not is_graph_input and isCouldBeEmplaceOperation(reader->builtin_code(op)) and
+          op->outputs()->size() == 1 and isCouldBeEmplaceTensor(reader, input_index))
+      {
+        runtime_graph->addInplaceOpIndex(i);
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/GraphLoader.h b/onert-micro/luci-interpreter/src/loader/GraphLoader.h
new file mode 100644 (file)
index 0000000..3265f11
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+
+#include "core/RuntimeGraph.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class GraphLoader
+{
+public:
+  static void checkInplaceOps(CircleReader *reader, RuntimeGraph *runtime_graph);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
diff --git a/onert-micro/luci-interpreter/src/loader/ModuleLoader.cpp b/onert-micro/luci-interpreter/src/loader/ModuleLoader.cpp
new file mode 100644 (file)
index 0000000..0bb38c6
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleLoader.h"
+
+#include "GraphLoader.h"
+
+namespace luci_interpreter
+{
+
+void ModuleLoader::load(RuntimeModule *runtime_module, SimpleMemoryManager *memory_manager,
+                        const char *model_data_raw)
+{
+  const circle::Model *model = circle::GetModel(model_data_raw);
+
+  CircleReader &reader = runtime_module->getCircleReader();
+  if (!reader.parse(model))
+    assert(false && "Error during parse");
+
+  for (size_t i = 0; i < reader.num_subgraph(); ++i)
+  {
+    runtime_module->addGraph(memory_manager);
+  }
+
+#ifndef USE_STATIC_ALLOC
+  for (size_t i = 0; i < reader.num_subgraph(); ++i)
+  {
+    if (!reader.select_subgraph(i))
+      assert(false && "Error during select subgraph");
+    auto *runtime_graph = runtime_module->getRuntimeGraphAt(i);
+    // For Dynamic memory manager we can use inplace optimization
+    GraphLoader::checkInplaceOps(&reader, runtime_graph);
+  }
+#endif // USE_STATIC_ALLOC
+
+  // For Dynamic Memory manager we build memory allocate/deallocate plan and then configure kernels.
+  // For Static Memory manager we only configure kernels.
+  for (size_t i = 0; i < reader.num_subgraph(); ++i)
+  {
+    auto *runtime_graph = runtime_module->getRuntimeGraphAt(i);
+#ifdef USE_STATIC_ALLOC
+    runtime_graph->configure_kernels();
+#else
+    runtime_graph->configure();
+#endif // USE_STATIC_ALLOC
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/ModuleLoader.h b/onert-micro/luci-interpreter/src/loader/ModuleLoader.h
new file mode 100644 (file)
index 0000000..c35c72d
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
+#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
+
+#include "core/RuntimeModule.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ModuleLoader
+{
+public:
+  static void load(RuntimeModule *runtime_module, MemoryManager *memory_manager,
+                   const char *model_data_raw);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Add.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Add.cpp
new file mode 100644 (file)
index 0000000..2a2140b
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Add.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAdd(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsAddOptions();
+
+  AddParams params{};
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp b/onert-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp
new file mode 100644 (file)
index 0000000..0da72da
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ArgMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleArgMax(std::vector<const Tensor *> &&inputs,
+                                                  std::vector<Tensor *> &&outputs,
+                                                  const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *axis = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsArgMaxOptions();
+
+  ArgMaxParams params{};
+  params.output_type = static_cast<DataType>(options->output_type);
+
+  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/onert-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
new file mode 100644 (file)
index 0000000..9a5af1c
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/AveragePool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(std::vector<const Tensor *> &&inputs,
+                                                         std::vector<Tensor *> &&outputs,
+                                                         const uint32_t op_index,
+                                                         KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsPool2DOptions();
+
+  Pool2DParams params{};
+  params.padding = luci_padding(options->padding);
+  params.filter_height = options->filter_height;
+  params.filter_width = options->filter_width;
+  params.stride_height = options->stride_h;
+  params.stride_width = options->stride_w;
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), nullptr);
+  scratchpad->set_data_buffer(nullptr);
+  // TODO move tensors offset initialization to one place
+  // TODO handle with static manager
+  Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/onert-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
new file mode 100644 (file)
index 0000000..7799331
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(std::vector<const Tensor *> &&inputs,
+                                                       std::vector<Tensor *> &&outputs,
+                                                       const uint32_t op_index,
+                                                       KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *lhs = inputs.at(0);
+  const Tensor *rhs = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  auto lhs_scratchpad = std::make_unique<Tensor>(lhs->element_type(), Shape({}), nullptr);
+  lhs_scratchpad->set_data_buffer(nullptr);
+  auto rhs_scratchpad = std::make_unique<Tensor>(rhs->element_type(), Shape({}), nullptr);
+  rhs_scratchpad->set_data_buffer(nullptr);
+  // TODO move tensors offset initialization to one place
+  // TODO handle with StaticManager
+  Tensor *lhs_tmp = builder.get_runtime_graph()->addTensor(std::move(lhs_scratchpad));
+  Tensor *rhs_tmp = builder.get_runtime_graph()->addTensor(std::move(rhs_scratchpad));
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsBatchMatMulOptions();
+
+  BatchMatMulParams params;
+  params.adj_x = options->adjoint_lhs;
+  params.adj_y = options->adjoint_rhs;
+
+  return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/onert-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
new file mode 100644 (file)
index 0000000..424844a
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(std::vector<const Tensor *> &&inputs,
+                                                          std::vector<Tensor *> &&outputs,
+                                                          const uint32_t op_index,
+                                                          KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *block_shape = inputs.at(1);
+  const Tensor *crops = inputs.at(2);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Cast.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Cast.cpp
new file mode 100644 (file)
index 0000000..441dacb
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Cast.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleCast(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Cast>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp
new file mode 100644 (file)
index 0000000..e2b847a
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Concatenation.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConcatenation(std::vector<const Tensor *> &&inputs,
+                                                         std::vector<Tensor *> &&outputs,
+                                                         const uint32_t op_index,
+                                                         KernelBuilder &builder)
+{
+  std::vector<const Tensor *> input_tensors(inputs.size());
+  for (uint32_t i = 0; i < inputs.size(); ++i)
+  {
+    input_tensors[i] = inputs.at(i);
+  }
+  Tensor *output = outputs.at(0);
+  ;
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsConcatenationOptions();
+
+  ConcatenationParams params{};
+  params.axis = options->axis;
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::Concatenation>(std::move(input_tensors), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp
new file mode 100644 (file)
index 0000000..1750e8a
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Conv2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConv2D(std::vector<const Tensor *> &&inputs,
+                                                  std::vector<Tensor *> &&outputs,
+                                                  const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *filter = inputs.at(1);
+  const Tensor *bias = inputs.at(2);
+  Tensor *output = outputs.at(0);
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), nullptr);
+  scratchpad->set_data_buffer(nullptr);
+  // TODO move tensors offset initialization to one place
+  // TODO handle with StaticManager
+  Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratchpad));
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsConv2DOptions();
+
+  Conv2DParams params{};
+  params.padding = luci_padding(options->padding);
+  params.stride_height = options->stride_h;
+  params.stride_width = options->stride_w;
+  params.dilation_height_factor = options->dilation_h_factor;
+  params.dilation_width_factor = options->dilation_w_factor;
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/onert-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
new file mode 100644 (file)
index 0000000..ebab0cc
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthToSpace.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(std::vector<const Tensor *> &&inputs,
+                                                        std::vector<Tensor *> &&outputs,
+                                                        const uint32_t op_index,
+                                                        KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsDepthToSpaceOptions();
+
+  DepthToSpaceParams params{};
+  params.block_size = options->block_size;
+
+  return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/onert-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
new file mode 100644 (file)
index 0000000..cebad55
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthwiseConv2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(std::vector<const Tensor *> &&inputs,
+                                                           std::vector<Tensor *> &&outputs,
+                                                           const uint32_t op_index,
+                                                           KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *filter = inputs.at(1);
+  const Tensor *bias = inputs.at(2);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsDepthwiseConv2DOptions();
+
+  DepthwiseConv2DParams params{};
+  params.padding = luci_padding(options->padding);
+  params.depth_multiplier = options->depth_multiplier;
+  params.stride_height = options->stride_h;
+  params.stride_width = options->stride_w;
+  params.dilation_height_factor = options->dilation_h_factor;
+  params.dilation_width_factor = options->dilation_w_factor;
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), nullptr);
+  scratchpad->set_data_buffer(nullptr);
+  // TODO move tensors offset initialization to one place
+  // TODO handle with StaticManager
+  Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp
new file mode 100644 (file)
index 0000000..06b7518
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(std::vector<const Tensor *> &&inputs,
+                                                      std::vector<Tensor *> &&outputs,
+                                                      const uint32_t op_index,
+                                                      KernelBuilder &builder)
+{
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Div.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Div.cpp
new file mode 100644 (file)
index 0000000..3331df9
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Div.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDiv(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsDivOptions();
+
+  DivParams params{};
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::Div>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Elu.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Elu.cpp
new file mode 100644 (file)
index 0000000..2d89328
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Elu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleElu(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Elu>(input, output);
+}
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Equal.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Equal.cpp
new file mode 100644 (file)
index 0000000..eee5009
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Equal.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleEqual(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *x = inputs.at(0);
+  const Tensor *y = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Equal>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Exp.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Exp.cpp
new file mode 100644 (file)
index 0000000..401b6d0
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Exp.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExp(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Exp>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/onert-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp
new file mode 100644 (file)
index 0000000..f4745b3
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(std::vector<const Tensor *> &&inputs,
+                                                      std::vector<Tensor *> &&outputs,
+                                                      const uint32_t op_index,
+                                                      KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+  const Tensor *input = inputs.at(0);
+  const Tensor *axis = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Fill.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Fill.cpp
new file mode 100644 (file)
index 0000000..cd5a336
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Fill.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFill(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+  const Tensor *dims = inputs.at(0);
+  const Tensor *value = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Fill>(dims, value, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Floor.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Floor.cpp
new file mode 100644 (file)
index 0000000..59164ce
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Floor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloor(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Floor>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/onert-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp
new file mode 100644 (file)
index 0000000..7caf123
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorDiv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(std::vector<const Tensor *> &&inputs,
+                                                    std::vector<Tensor *> &&outputs,
+                                                    const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+  const Tensor *x = inputs.at(0);
+  const Tensor *y = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::FloorDiv>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/onert-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp
new file mode 100644 (file)
index 0000000..cb35c0f
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FullyConnected.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(std::vector<const Tensor *> &&inputs,
+                                                          std::vector<Tensor *> &&outputs,
+                                                          const uint32_t op_index,
+                                                          KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *weights = inputs.at(1);
+  const Tensor *bias = inputs.at(2);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsFullyConnectedOptions();
+
+  FullyConnectedParams params{};
+  params.activation = luci_actfunc(options->fused_activation_function);
+  params.keep_num_dims = options->keep_num_dims;
+
+  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Gather.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Gather.cpp
new file mode 100644 (file)
index 0000000..02cb7e6
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(std::vector<const Tensor *> &&inputs,
+                                                  std::vector<Tensor *> &&outputs,
+                                                  const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+  const Tensor *params = inputs.at(0);
+  const Tensor *indices = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsGatherOptions();
+
+  GatherParams gparams{};
+  gparams.axis = options->axis;
+  // TODO support batch_dims
+  gparams.batch_dims = 0;
+
+  return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Greater.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Greater.cpp
new file mode 100644 (file)
index 0000000..160911e
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Greater.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreater(std::vector<const Tensor *> &&inputs,
+                                                   std::vector<Tensor *> &&outputs,
+                                                   const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+  const Tensor *x = inputs.at(0);
+  const Tensor *y = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Greater>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/onert-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
new file mode 100644 (file)
index 0000000..4c19682
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/GreaterEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(std::vector<const Tensor *> &&inputs,
+                                                        std::vector<Tensor *> &&outputs,
+                                                        const uint32_t op_index,
+                                                        KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+  const Tensor *x = inputs.at(0);
+  const Tensor *y = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::GreaterEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/If.cpp b/onert-micro/luci-interpreter/src/loader/nodes/If.cpp
new file mode 100644 (file)
index 0000000..1c6f12c
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/If.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleIf(std::vector<const Tensor *> &&inputs,
+                                              std::vector<Tensor *> &&outputs,
+                                              const uint32_t op_index, KernelBuilder &builder)
+{
+  // TODO: support IF operation
+  assert(false && "Not supported now");
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/onert-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
new file mode 100644 (file)
index 0000000..1d7f639
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/InstanceNorm.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(std::vector<const Tensor *> &&inputs,
+                                                        std::vector<Tensor *> &&outputs,
+                                                        const uint32_t op_index,
+                                                        KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *gamma = inputs.at(1);
+  const Tensor *beta = inputs.at(2);
+
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsInstanceNormOptions();
+
+  InstanceNormParams params{};
+  params.epsilon = options->epsilon;
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/onert-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp
new file mode 100644 (file)
index 0000000..6435f09
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Normalize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(std::vector<const Tensor *> &&inputs,
+                                                       std::vector<Tensor *> &&outputs,
+                                                       const uint32_t op_index,
+                                                       KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsL2NormOptions();
+
+  L2NormParams params{};
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/onert-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
new file mode 100644 (file)
index 0000000..d297525
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(std::vector<const Tensor *> &&inputs,
+                                                    std::vector<Tensor *> &&outputs,
+                                                    const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsPool2DOptions();
+
+  Pool2DParams params{};
+  params.padding = luci_padding(options->padding);
+  params.filter_height = options->filter_height;
+  params.filter_width = options->filter_width;
+  params.stride_height = options->stride_h;
+  params.stride_width = options->stride_w;
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/onert-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
new file mode 100644 (file)
index 0000000..2cb27e2
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(std::vector<const Tensor *> &&inputs,
+                                                     std::vector<Tensor *> &&outputs,
+                                                     const uint32_t op_index,
+                                                     KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsLeakyReluOptions();
+
+  LeakyReluParams params{};
+  params.alpha = options->alpha;
+
+  return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Less.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Less.cpp
new file mode 100644 (file)
index 0000000..9de9dbe
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Less.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLess(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *x = inputs.at(0);
+  const Tensor *y = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Less>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp b/onert-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp
new file mode 100644 (file)
index 0000000..ca4f26c
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LessEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLessEqual(std::vector<const Tensor *> &&inputs,
+                                                     std::vector<Tensor *> &&outputs,
+                                                     const uint32_t op_index,
+                                                     KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *x = inputs.at(0);
+  const Tensor *y = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::LessEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/onert-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
new file mode 100644 (file)
index 0000000..71ed634
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleLocalResponseNormalization(std::vector<const Tensor *> &&inputs,
+                                              std::vector<Tensor *> &&outputs,
+                                              const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsLocalResponseNormalizationOptions();
+
+  LocalResponseNormalizationParams params{};
+  params.radius = options->radius;
+  params.bias = options->bias;
+  params.alpha = options->alpha;
+  params.beta = options->beta;
+
+  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/onert-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
new file mode 100644 (file)
index 0000000..03e40e9
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogSoftmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(std::vector<const Tensor *> &&inputs,
+                                                      std::vector<Tensor *> &&outputs,
+                                                      const uint32_t op_index,
+                                                      KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::LogSoftmax>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/onert-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
new file mode 100644 (file)
index 0000000..32677a7
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalAnd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(std::vector<const Tensor *> &&inputs,
+                                                      std::vector<Tensor *> &&outputs,
+                                                      const uint32_t op_index,
+                                                      KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/onert-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp
new file mode 100644 (file)
index 0000000..43ec1e4
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalNot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(std::vector<const Tensor *> &&inputs,
+                                                      std::vector<Tensor *> &&outputs,
+                                                      const uint32_t op_index,
+                                                      KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::LogicalNot>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/onert-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp
new file mode 100644 (file)
index 0000000..7ce29a8
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalOr.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(std::vector<const Tensor *> &&inputs,
+                                                     std::vector<Tensor *> &&outputs,
+                                                     const uint32_t op_index,
+                                                     KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::LogicalOr>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Logistic.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Logistic.cpp
new file mode 100644 (file)
index 0000000..113b5ea
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Logistic.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogistic(std::vector<const Tensor *> &&inputs,
+                                                    std::vector<Tensor *> &&outputs,
+                                                    const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Logistic>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/onert-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
new file mode 100644 (file)
index 0000000..40fff4d
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MaxPool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(std::vector<const Tensor *> &&inputs,
+                                                     std::vector<Tensor *> &&outputs,
+                                                     const uint32_t op_index,
+                                                     KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsPool2DOptions();
+
+  Pool2DParams params{};
+  params.padding = luci_padding(options->padding);
+  params.filter_height = options->filter_height;
+  params.filter_width = options->filter_width;
+  params.stride_height = options->stride_h;
+  params.stride_width = options->stride_w;
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Maximum.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Maximum.cpp
new file mode 100644 (file)
index 0000000..1a7930a
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Maximum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaximum(std::vector<const Tensor *> &&inputs,
+                                                   std::vector<Tensor *> &&outputs,
+                                                   const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Maximum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Mean.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Mean.cpp
new file mode 100644 (file)
index 0000000..6ef48bc
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mean.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMean(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *axis = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  auto temp_index_unique = std::make_unique<Tensor>(DataType::S32, Shape({}), nullptr);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index = builder.get_runtime_graph()->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique = std::make_unique<Tensor>(DataType::S32, Shape({}), nullptr);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes = builder.get_runtime_graph()->addTensor(std::move(resolved_axes_unique));
+
+  auto temp_sum_unique = std::make_unique<Tensor>(input->element_type(), Shape({}), nullptr);
+  temp_sum_unique->set_data_buffer(nullptr);
+  Tensor *temp_sum = builder.get_runtime_graph()->addTensor(std::move(temp_sum_unique));
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsReducerOptions();
+
+  ReducerParams params{};
+  params.keep_dims = options->keep_dims;
+
+  return std::make_unique<kernels::Mean>(input, axis, output, temp_index, resolved_axes, temp_sum,
+                                         params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Minimum.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Minimum.cpp
new file mode 100644 (file)
index 0000000..232ebf0
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Minimum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMinimum(std::vector<const Tensor *> &&inputs,
+                                                   std::vector<Tensor *> &&outputs,
+                                                   const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Minimum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/onert-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp
new file mode 100644 (file)
index 0000000..d96ad25
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MirrorPad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(std::vector<const Tensor *> &&inputs,
+                                                     std::vector<Tensor *> &&outputs,
+                                                     const uint32_t op_index,
+                                                     KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *paddings = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsMirrorPadOptions();
+
+  MirrorPadParams params{};
+  params.mode = luci_mirrorpad_mode(options->mode);
+
+  return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Mul.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Mul.cpp
new file mode 100644 (file)
index 0000000..283f042
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMul(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsMulOptions();
+
+  MulParams params{};
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Neg.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Neg.cpp
new file mode 100644 (file)
index 0000000..a898f41
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Neg.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNeg(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Neg>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp b/onert-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp
new file mode 100644 (file)
index 0000000..1c84931
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/NotEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNotEqual(std::vector<const Tensor *> &&inputs,
+                                                    std::vector<Tensor *> &&outputs,
+                                                    const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *x = inputs.at(0);
+  const Tensor *y = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::NotEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/OneHot.cpp b/onert-micro/luci-interpreter/src/loader/nodes/OneHot.cpp
new file mode 100644 (file)
index 0000000..a1c8a5b
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(std::vector<const Tensor *> &&inputs,
+                                                  std::vector<Tensor *> &&outputs,
+                                                  const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 4);
+
+  const Tensor *indices = inputs.at(0);
+  const Tensor *depth = inputs.at(1);
+  const Tensor *on_value = inputs.at(2);
+  const Tensor *off_value = inputs.at(3);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsOneHotOptions();
+
+  OneHotParams params{};
+  params.axis = options->axis;
+
+  return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/PRelu.cpp b/onert-micro/luci-interpreter/src/loader/nodes/PRelu.cpp
new file mode 100644 (file)
index 0000000..6adc549
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePRelu(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *alpha = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::PRelu>(input, alpha, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Pack.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Pack.cpp
new file mode 100644 (file)
index 0000000..a92196b
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePack(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  std::vector<const Tensor *> input_tensors(inputs.size());
+  for (uint32_t i = 0; i < inputs.size(); ++i)
+  {
+    input_tensors[i] = inputs.at(i);
+  }
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsPackOptions();
+
+  PackParams params{};
+  params.axis = options->axis;
+  params.values_count = options->values_count;
+
+  return std::make_unique<kernels::Pack>(std::move(input_tensors), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Pad.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Pad.cpp
new file mode 100644 (file)
index 0000000..26aa7e7
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePad(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *paddings = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/PadV2.cpp b/onert-micro/luci-interpreter/src/loader/nodes/PadV2.cpp
new file mode 100644 (file)
index 0000000..829c47f
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PadV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePadV2(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *paddings = inputs.at(1);
+  const Tensor *constant_values = inputs.at(2);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Pow.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Pow.cpp
new file mode 100644 (file)
index 0000000..005c281
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pow.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePow(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Pow>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Quantize.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Quantize.cpp
new file mode 100644 (file)
index 0000000..4fa7e66
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(std::vector<const Tensor *> &&inputs,
+                                                    std::vector<Tensor *> &&outputs,
+                                                    const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Relu.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Relu.cpp
new file mode 100644 (file)
index 0000000..27fcb6b
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Relu>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Relu6.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Relu6.cpp
new file mode 100644 (file)
index 0000000..68dba2c
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu6.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu6(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Relu6>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Reshape.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Reshape.cpp
new file mode 100644 (file)
index 0000000..45ebf0a
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Reshape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReshape(std::vector<const Tensor *> &&inputs,
+                                                   std::vector<Tensor *> &&outputs,
+                                                   const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *shape = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  // NOTE 'newShape' attribute is ignored.
+  return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/onert-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
new file mode 100644 (file)
index 0000000..bd82048
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(std::vector<const Tensor *> &&inputs,
+                                                          std::vector<Tensor *> &&outputs,
+                                                          const uint32_t op_index,
+                                                          KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *size = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsResizeBilinearOptions();
+
+  ResizeBilinearParams params{};
+  params.align_corners = options->align_corners;
+  params.half_pixel_centers = options->half_pixel_centers;
+
+  return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/onert-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
new file mode 100644 (file)
index 0000000..bc59e87
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleResizeNearestNeighbor(std::vector<const Tensor *> &&inputs,
+                                         std::vector<Tensor *> &&outputs, const uint32_t op_index,
+                                         KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *size = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsResizeNearestNeighborOptions();
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = options->align_corners;
+  // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
+  // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
+  // default value on current is false.
+  // it need to be updated when CircleResizeNearestNeighbor updated.
+  params.half_pixel_centers = false;
+
+  return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/onert-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp
new file mode 100644 (file)
index 0000000..0b23ee0
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReverseV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReverseV2(std::vector<const Tensor *> &&inputs,
+                                                     std::vector<Tensor *> &&outputs,
+                                                     const uint32_t op_index,
+                                                     KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *axis = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::ReverseV2>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp
new file mode 100644 (file)
index 0000000..87ca438
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Rsqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRsqrt(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/SVDF.cpp b/onert-micro/luci-interpreter/src/loader/nodes/SVDF.cpp
new file mode 100644 (file)
index 0000000..8a77459
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 5);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *feature = inputs.at(1);
+  const Tensor *time = inputs.at(2);
+  const Tensor *bias = inputs.at(3);
+  const Tensor *input_activation_state = inputs.at(4);
+  Tensor *output = outputs.at(0);
+
+  auto scratchpad_tensor =
+    std::make_unique<Tensor>(input_activation_state->element_type(), Shape({}), nullptr);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+  DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_1 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+  if (data_type == DataType::FLOAT32 &&
+      (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+  {
+    data_type = feature->element_type();
+  }
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_2 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+  data_type = DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_3 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_4 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_5 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_6 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsSVDFOptions();
+
+  SVDFParams params{};
+  params.activation = luci_actfunc(options->fused_activation_function);
+  params.svdf_rank = options->rank;
+  params.asymmetric_quantize_inputs = options->asymmetric_quantize_inputs;
+
+  return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+                                         tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Shape.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Shape.cpp
new file mode 100644 (file)
index 0000000..69a727f
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Shape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleShape(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsShapeOptions();
+
+  ShapeParams shape_params{};
+  shape_params.out_type = luci_datatype(options->out_type);
+
+  return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Slice.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Slice.cpp
new file mode 100644 (file)
index 0000000..e28742b
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Slice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSlice(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *begin = inputs.at(1);
+  const Tensor *size = inputs.at(2);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Softmax.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Softmax.cpp
new file mode 100644 (file)
index 0000000..1957a76
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Softmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSoftmax(std::vector<const Tensor *> &&inputs,
+                                                   std::vector<Tensor *> &&outputs,
+                                                   const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsSoftmaxOptions();
+
+  SoftmaxParams params{};
+  params.beta = options->beta;
+
+  return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/onert-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
new file mode 100644 (file)
index 0000000..c5553c6
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(std::vector<const Tensor *> &&inputs,
+                                                          std::vector<Tensor *> &&outputs,
+                                                          const uint32_t op_index,
+                                                          KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *block_shape = inputs.at(1);
+  const Tensor *paddings = inputs.at(2);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/onert-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
new file mode 100644 (file)
index 0000000..156976a
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToDepth.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(std::vector<const Tensor *> &&inputs,
+                                                        std::vector<Tensor *> &&outputs,
+                                                        const uint32_t op_index,
+                                                        KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsSpaceToDepthOptions();
+
+  SpaceToDepthParams params{};
+  params.block_size = options->block_size;
+
+  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Split.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Split.cpp
new file mode 100644 (file)
index 0000000..ecec1cb
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Split.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplit(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *axis = inputs.at(0);
+  const Tensor *input = inputs.at(1);
+  std::vector<Tensor *> output_tensors(outputs.size());
+
+  for (uint32_t i = 0; i < outputs.size(); ++i)
+  {
+    output_tensors[i] = outputs.at(i).first;
+  }
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::Split>(axis, input, std::move(output_tensors));
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/SplitV.cpp b/onert-micro/luci-interpreter/src/loader/nodes/SplitV.cpp
new file mode 100644 (file)
index 0000000..a4c0ae2
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SplitV.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplitV(std::vector<const Tensor *> &&inputs,
+                                                  std::vector<Tensor *> &&outputs,
+                                                  const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 3);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *sizes_data = inputs.at(1);
+  const Tensor *axis = inputs.at(2);
+  std::vector<Tensor *> output_tensors(outputs.size());
+
+  for (uint32_t i = 0; i < outputs.size(); ++i)
+  {
+    output_tensors[i] = outputs.at(i).first;
+  }
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(output_tensors));
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp
new file mode 100644 (file)
index 0000000..3eaf234
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqrt(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Sqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Square.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Square.cpp
new file mode 100644 (file)
index 0000000..1afc6cc
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Square.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquare(std::vector<const Tensor *> &&inputs,
+                                                  std::vector<Tensor *> &&outputs,
+                                                  const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Square>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/onert-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
new file mode 100644 (file)
index 0000000..0a5ba78
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SquaredDifference.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(std::vector<const Tensor *> &&inputs,
+                                                             std::vector<Tensor *> &&outputs,
+                                                             const uint32_t op_index,
+                                                             KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp
new file mode 100644 (file)
index 0000000..4f0c265
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Squeeze.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqueeze(std::vector<const Tensor *> &&inputs,
+                                                   std::vector<Tensor *> &&outputs,
+                                                   const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsSqueezeOptions();
+
+  SqueezeParams params{};
+  params.squeeze_dims = options->squeeze_dims;
+
+  return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/onert-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp
new file mode 100644 (file)
index 0000000..c0a53fc
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/StridedSlice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(std::vector<const Tensor *> &&inputs,
+                                                        std::vector<Tensor *> &&outputs,
+                                                        const uint32_t op_index,
+                                                        KernelBuilder &builder)
+{
+  assert(inputs.size() == 4);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *begin = inputs.at(1);
+  const Tensor *end = inputs.at(2);
+  const Tensor *strides = inputs.at(3);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsStridedSliceOptions();
+
+  StridedSliceParams params{};
+  params.begin_mask = options->begin_mask;
+  params.ellipsis_mask = options->ellipsis_mask;
+  params.end_mask = options->end_mask;
+  params.new_axis_mask = options->new_axis_mask;
+  params.shrink_axis_mask = options->shrink_axis_mask;
+
+  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Sub.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Sub.cpp
new file mode 100644 (file)
index 0000000..79c773b
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sub.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSub(std::vector<const Tensor *> &&inputs,
+                                               std::vector<Tensor *> &&outputs,
+                                               const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input1 = inputs.at(0);
+  const Tensor *input2 = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsSubOptions();
+
+  SubParams params{};
+  params.activation = luci_actfunc(options->fused_activation_function);
+
+  return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Tanh.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Tanh.cpp
new file mode 100644 (file)
index 0000000..f4aff4c
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Tanh.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTanh(std::vector<const Tensor *> &&inputs,
+                                                std::vector<Tensor *> &&outputs,
+                                                const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+  const Tensor *input = inputs.at(0);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Tanh>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Transpose.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Transpose.cpp
new file mode 100644 (file)
index 0000000..83b466d
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Transpose.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTranspose(std::vector<const Tensor *> &&inputs,
+                                                     std::vector<Tensor *> &&outputs,
+                                                     const uint32_t op_index,
+                                                     KernelBuilder &builder)
+{
+  assert(inputs.size() == 2);
+
+  const Tensor *input = inputs.at(0);
+  const Tensor *perm = inputs.at(1);
+  Tensor *output = outputs.at(0);
+
+  return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/onert-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp
new file mode 100644 (file)
index 0000000..06ee63e
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/TransposeConv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(std::vector<const Tensor *> &&inputs,
+                                                         std::vector<Tensor *> &&outputs,
+                                                         const uint32_t op_index,
+                                                         KernelBuilder &builder)
+{
+  assert(inputs.size() == 4);
+
+  const Tensor *input_sizes = inputs.at(0);
+  const Tensor *filter = inputs.at(1);
+  const Tensor *out_backprop = inputs.at(2);
+  const Tensor *bias = inputs.at(3);
+  Tensor *output = outputs.at(0);
+
+  DataType scratch_data_type =
+    input_sizes->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+
+  auto scratch_tensor = std::make_unique<Tensor>(scratch_data_type, Shape({}), nullptr);
+  scratch_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratch_tensor));
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsTransposeConvOptions();
+
+  TransposeConvParams params{};
+  params.padding = luci_padding(options->padding);
+  params.stride_height = options->stride_h;
+  params.stride_width = options->stride_w;
+
+  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
+                                                  tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp b/onert-micro/luci-interpreter/src/loader/nodes/UnidirectionalSequenceLSTM.cpp
new file mode 100644 (file)
index 0000000..b66e53f
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleUnidirectionalSequenceLSTM(std::vector<const Tensor *> &&inputs,
+                                              std::vector<Tensor *> &&outputs,
+                                              const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 24);
+  const Tensor *input = inputs.at(0);
+  const Tensor *input_to_input_weights = inputs.at(1);
+  const Tensor *input_to_forget_weights = inputs.at(2);
+  const Tensor *input_to_cell_weights = inputs.at(3);
+  const Tensor *input_to_output_weights = inputs.at(4);
+
+  const Tensor *recurrent_to_input_weights = inputs.at(5);
+  const Tensor *recurrent_to_forget_weights = inputs.at(6);
+  const Tensor *recurrent_to_cell_weights = inputs.at(7);
+  const Tensor *recurrent_to_output_weights = inputs.at(8);
+
+  const Tensor *cell_to_input_weights = inputs.at(9);
+  const Tensor *cell_to_forget_weights = inputs.at(10);
+  const Tensor *cell_to_output_weights = inputs.at(11);
+
+  const Tensor *input_gate_bias = inputs.at(12);
+  const Tensor *forget_gate_bias = inputs.at(13);
+  const Tensor *cell_gate_bias = inputs.at(14);
+  const Tensor *output_gate_bias = inputs.at(15);
+
+  const Tensor *projection_weights = inputs.at(16);
+  const Tensor *projection_bias = inputs.at(17);
+
+  Tensor *output_state = const_cast<Tensor *>(inputs.at(18));
+  Tensor *cell_state = const_cast<Tensor *>(inputs.at(19));
+
+  const Tensor *input_layer_norm_coefficients = inputs.at(20);
+  const Tensor *forget_layer_norm_coefficients = inputs.at(21);
+  const Tensor *cell_layer_norm_coefficients = inputs.at(22);
+  const Tensor *output_layer_norm_coefficients = inputs.at(23);
+  Tensor *output = outputs.at(0);
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsUnidirectionalSequenceLSTMOptions();
+
+  UnidirectionalSequenceLSTMParams params{};
+  params.activation = luci_actfunc(options->fused_activation_function);
+  params.cell_clip = options->cell_clip;
+  params.proj_clip = options->proj_clip;
+  params.time_major = options->time_major;
+  params.asymmetric_quantize_inputs = options->asymmetric_quantize_inputs;
+
+  // scratch pad tensor
+  const bool is_integer = input->element_type() == DataType::S8;
+  bool use_layer_norm = (forget_layer_norm_coefficients != nullptr);
+
+  if (is_integer)
+  {
+    if (not use_layer_norm)
+    {
+      params.intermediate_affine_quant =
+        builder.get_runtime_graph()->getIntermediateAffineQuantizations();
+
+      // For integer LSTM need 4 16-bit buffer with size n_batch * n_cell
+      // and 1 8-bit buffer with size n_batch * n_cell
+      auto tmp_1 = std::make_unique<Tensor>(DataType::S16, Shape({}), nullptr);
+      tmp_1->set_data_buffer(nullptr);
+      outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_1)));
+
+      auto tmp_2 = std::make_unique<Tensor>(DataType::S16, Shape({}), nullptr);
+      tmp_2->set_data_buffer(nullptr);
+      outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_2)));
+
+      auto tmp_3 = std::make_unique<Tensor>(DataType::S16, Shape({}), nullptr);
+      tmp_3->set_data_buffer(nullptr);
+      outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_3)));
+
+      auto tmp_4 = std::make_unique<Tensor>(DataType::S16, Shape({}), nullptr);
+      tmp_4->set_data_buffer(nullptr);
+      outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_4)));
+
+      auto tmp_5 = std::make_unique<Tensor>(
+        DataType::S8, Shape({}),
+        builder.get_runtime_graph()->getIntermediateAffineQuantizations()[0]);
+      tmp_5->set_data_buffer(nullptr);
+      outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_5)));
+    }
+    else
+    {
+      // TODO: support float
+      assert(false && "Not supported now");
+    }
+  }
+  else
+  {
+    // NOTE provide more scratch pads if support hybrid or integer
+    auto sp_output_state =
+      std::make_unique<Tensor>(output_state->element_type(), Shape({}), nullptr);
+    sp_output_state->set_data_buffer(nullptr);
+    outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(sp_output_state)));
+
+    auto sp_cell_state = std::make_unique<Tensor>(cell_state->element_type(), Shape({}), nullptr);
+    sp_cell_state->set_data_buffer(nullptr);
+    outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(sp_cell_state)));
+
+    auto sp_3 = std::make_unique<Tensor>(input->element_type(), Shape({}), nullptr);
+    sp_3->set_data_buffer(nullptr);
+    outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(sp_3)));
+  }
+
+  outputs.push_back(output_state);
+  outputs.push_back(cell_state);
+
+  return std::make_unique<kernels::UnidirectionalSequenceLSTM>(
+    input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights,
+    input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights,
+    recurrent_to_cell_weights, recurrent_to_output_weights, cell_to_input_weights,
+    cell_to_forget_weights, cell_to_output_weights, input_gate_bias, forget_gate_bias,
+    cell_gate_bias, output_gate_bias, projection_weights, projection_bias,
+    input_layer_norm_coefficients, forget_layer_norm_coefficients, cell_layer_norm_coefficients,
+    output_layer_norm_coefficients, std::move(outputs), params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/Unpack.cpp b/onert-micro/luci-interpreter/src/loader/nodes/Unpack.cpp
new file mode 100644 (file)
index 0000000..7f067f4
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Unpack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleUnpack(std::vector<const Tensor *> &&inputs,
+                                                  std::vector<Tensor *> &&outputs,
+                                                  const uint32_t op_index, KernelBuilder &builder)
+{
+  assert(inputs.size() == 1);
+
+  const Tensor *input = inputs.at(0);
+  std::vector<Tensor *> output_tensors(outputs.size());
+
+  for (uint32_t i = 0; i < outputs.size(); ++i)
+  {
+    output_tensors[i] = outputs.at(i);
+  }
+
+  circle::OperatorT oper_t;
+  builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+  const auto *options = oper_t.builtin_options.AsUnpackOptions();
+
+  UnpackParams params{};
+  params.axis = options->axis;
+
+  // NOTE 'num' attribute is ignored.
+  return std::make_unique<kernels::Unpack>(input, std::move(output_tensors), params);
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/loader/nodes/While.cpp b/onert-micro/luci-interpreter/src/loader/nodes/While.cpp
new file mode 100644 (file)
index 0000000..b1f719e
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/While.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleWhile(std::vector<const Tensor *> &&inputs,
+                                                 std::vector<Tensor *> &&outputs,
+                                                 const uint32_t op_index, KernelBuilder &builder)
+{
+  // TODO: support IF operation
+  assert(false && "Not supported now");
+}
+
+} // namespace luci_interpreter
diff --git a/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.cpp b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.cpp
new file mode 100644 (file)
index 0000000..ca1c92b
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+  int32_t p = lowerLog2(memSize);
+
+  // We assume that the requested size of memory does not exceed 4 GB
+  assert(p < 32);
+  memSize = 1 << p;
+
+  _start_block = reinterpret_cast<Block *>(memory_start);
+  _start_block->size = memSize - sizeof(Block);
+  _start_block->is_free = true;
+  _start_block->self = _start_block;
+  _num_blocks = 0;
+  _size = _start_block->size;
+
+  for (auto &_free_block : _free_blocks)
+    _free_block = nullptr;
+
+  addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  const size_t element_size = getDataTypeSize(tensor.element_type());
+  const int32_t num_elements = tensor.shape().num_elements();
+  auto size = num_elements * element_size;
+  auto footprint = size + sizeof(Block);
+  auto l = (footprint & (footprint - 1)) == 0
+             ? lowerLog2(footprint)
+             : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+  while (l < 32 && !_free_blocks[l])
+    l++;
+
+  assert(l < 32);
+
+  Block *tmp;
+  tmp = _free_blocks[l];
+  removeFromBlocks(tmp, l);
+
+  while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+  {
+    divideBlock(tmp, l);
+    l--;
+  }
+
+  tmp->is_free = false;
+  tmp->self = tmp;
+  _num_blocks++;
+
+  auto *data = (uint8_t *)(tmp + 1);
+  tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  auto data = tensor.data<void>();
+  auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+  assert(tmp->self == tmp);
+
+  tmp->is_free = true;
+  addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+  while (tmp)
+    if (tmp->size == _size)
+      break;
+    else
+      tmp = mergeBlock(tmp);
+
+  _num_blocks--;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
+
+#endif
diff --git a/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.h b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.h
new file mode 100644 (file)
index 0000000..ea56e97
--- /dev/null
@@ -0,0 +1,148 @@
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+  BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  struct Block
+  {
+    Block *next_free;
+    bool is_free;
+    uint32_t size;
+    // debug field
+    Block *self;
+  };
+
+  Block *_start_block;
+  int32_t _num_blocks;
+  uint32_t _size;
+  Block *_free_blocks[32]{};
+
+  static int32_t lowerLog2(uint32_t val)
+  {
+    int32_t i = 0;
+    while (val >>= 1)
+      i++;
+
+    return i;
+  }
+
+  void addToBlocks(Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    block->next_free = _free_blocks[l];
+    _free_blocks[l] = block;
+  }
+
+  void removeFromBlocks(const Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    Block *tmp = _free_blocks[l];
+
+    if (block == tmp)
+    {
+      _free_blocks[l] = block->next_free;
+      return;
+    }
+
+    while (tmp)
+    {
+      if (tmp->next_free == block)
+      {
+        tmp->next_free = block->next_free;
+        return;
+      }
+
+      tmp = tmp->next_free;
+    }
+  }
+
+  void divideBlock(Block *block, int32_t l)
+  {
+    int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+    removeFromBlocks(block, l);
+
+    // there is no need to add to the free_blocks list here
+    block->is_free = true;
+    block->size = size;
+    block->self = block;
+
+    Block *buddy;
+    buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+    buddy->is_free = true;
+    buddy->size = size;
+    buddy->self = buddy;
+
+    addToBlocks(buddy, l - 1);
+  }
+
+  Block *mergeBlock(Block *block)
+  {
+    Block *buddy;
+
+    const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+    const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+    buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
+
+    if (!buddy->is_free || buddy->size != block->size)
+      return nullptr;
+
+    if (block > buddy)
+    {
+      Block *x = block;
+      block = buddy;
+      buddy = x;
+    }
+
+    removeFromBlocks(block, l);
+    removeFromBlocks(buddy, l);
+
+    block->size = block->size * 2 + sizeof(Block);
+    block->is_free = true;
+    block->self = block;
+
+    addToBlocks(block, l + 1);
+
+    return block;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+#endif
diff --git a/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.test.cpp b/onert-micro/luci-interpreter/src/memory_managers/BuddyMemoryManager.test.cpp
new file mode 100644 (file)
index 0000000..996b36d
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+  auto mem_pool = std::make_unique<uint8_t[]>(200);
+  auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+  Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+  buddy_memory_manager->allocate_memory(first_tensor);
+
+  uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+  first_tensor.writeData(data_1, 8);
+  uint8_t array_1[8];
+  first_tensor.readData(array_1, 8);
+  for (int i = 0; i < 8; i++)
+  {
+    EXPECT_EQ(data_1[i], array_1[i]);
+  }
+
+  Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+  buddy_memory_manager->allocate_memory(second_tensor);
+
+  uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+  second_tensor.writeData(data_2, 10);
+
+  uint8_t array_2[2][5];
+  second_tensor.readData(array_2, 10);
+  for (int i = 0; i < 2; i++)
+  {
+    for (int j = 0; j < 5; j++)
+    {
+      EXPECT_EQ(data_2[i][j], array_2[i][j]);
+    }
+  }
+
+  buddy_memory_manager->release_memory(first_tensor);
+  EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+  buddy_memory_manager->release_memory(second_tensor);
+  EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
+
+#endif
diff --git a/onert-micro/luci-interpreter/src/memory_managers/CMakeLists.txt b/onert-micro/luci-interpreter/src/memory_managers/CMakeLists.txt
new file mode 100644 (file)
index 0000000..e783d8d
--- /dev/null
@@ -0,0 +1,21 @@
+set(SOURCES
+        "SimpleMemoryManager.h" SimpleMemoryManager.cpp
+        "TestMemoryManager.h" TestMemoryManager.cpp
+        "BuddyMemoryManager.h" BuddyMemoryManager.cpp
+        "StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+add_library(${LUCI_INTERPRETER_MEMORY_MANAGER} STATIC ${SOURCES})
+target_include_directories(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC "luci_micro_circle_reader${READER_SUFFIX}")
+target_link_libraries(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC luci_micro_circle_schema)
+
+if(NOT ENABLE_TEST)
+    return()
+endif(NOT ENABLE_TEST)
+
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test_micro ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test_micro ${LUCI_INTERPRETER_BINARY})
diff --git a/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.cpp b/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.cpp
new file mode 100644 (file)
index 0000000..e8caa7c
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef USE_STATIC_ALLOC
+
+#include "SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+uint8_t *SimpleMemoryManager::allocate_memory(const circle::Tensor *tensor)
+{
+  const auto element_size = getDataTypeSize(Tensor::element_type(tensor));
+  const auto num_elements = Tensor::num_elements(tensor);
+
+  assert(element_size * num_elements > 0);
+
+  return new uint8_t[num_elements * element_size];
+}
+
+void SimpleMemoryManager::release_memory(uint8_t *data)
+{
+  if (data == nullptr)
+    return;
+
+  delete[] data;
+}
+
+} // namespace luci_interpreter
+
+#endif // USE_STATIC_ALLOC
diff --git a/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.h b/onert-micro/luci-interpreter/src/memory_managers/SimpleMemoryManager.h
new file mode 100644 (file)
index 0000000..0817fb2
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef USE_STATIC_ALLOC
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager
+{
+public:
+  uint8_t *allocate_memory(const circle::Tensor *tensor);
+  void release_memory(uint8_t *data);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#endif // USE_STATIC_ALLOC
diff --git a/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.cpp b/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.cpp
new file mode 100644 (file)
index 0000000..08d0850
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef USE_STATIC_ALLOC
+
+#include "StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+uint8_t *StaticMemoryManager::allocate_memory(int32_t offset)
+{
+  assert(_buffer_ptr != nullptr);
+  return _buffer_ptr + offset;
+}
+
+uint8_t *StaticMemoryManager::allocate_memory_for_input(int32_t offset)
+{
+  assert(_input_buffer_ptr != nullptr);
+  return _input_buffer_ptr + offset;
+}
+
+uint8_t *StaticMemoryManager::allocate_memory_for_output(int32_t offset)
+{
+  assert(_output_buffer_ptr != nullptr);
+  return _output_buffer_ptr + offset;
+}
+
+void StaticMemoryManager::allocate_input_buf()
+{
+  assert(_input_req_size > 0);
+  if (_input_buffer_ptr == nullptr)
+    _input_buffer_ptr = new uint8_t[_input_req_size];
+}
+
+void StaticMemoryManager::allocate_output_buf()
+{
+  assert(_output_req_size > 0);
+  if (_output_buffer_ptr == nullptr)
+    _output_buffer_ptr = new uint8_t[_output_req_size];
+}
+
+void StaticMemoryManager::allocate_computing_buf()
+{
+  assert(_buffer_req_size > 0);
+  if (_buffer_ptr == nullptr)
+    _buffer_ptr = new uint8_t[_buffer_req_size];
+}
+
+void StaticMemoryManager::release_computing_buf()
+{
+  delete[] _buffer_ptr;
+  _buffer_ptr = nullptr;
+}
+
+void StaticMemoryManager::release_input_buf()
+{
+  delete[] _input_buffer_ptr;
+  _input_buffer_ptr = nullptr;
+}
+
+void StaticMemoryManager::release_output_buf()
+{
+  delete[] _output_buffer_ptr;
+  _output_buffer_ptr = nullptr;
+}
+
+} // namespace luci_interpreter
+
+#endif // USE_STATIC_ALLOC
diff --git a/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.h b/onert-micro/luci-interpreter/src/memory_managers/StaticMemoryManager.h
new file mode 100644 (file)
index 0000000..2971e38
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef USE_STATIC_ALLOC
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager
+{
+public:
+  StaticMemoryManager() = delete;
+
+  // To initialize static memory manager with precalculating required buffers size for input,
+  // output and for intermediate computations buffers.
+  // Using Static Memory Manager with common buffer for input, output, and for intermediate
+  // computations
+  // TODO remove this *_req_size to read it from circle file
+  explicit StaticMemoryManager(int32_t input_req_size, int32_t buffer_req_size,
+                               int32_t output_req_size)
+    : _input_buffer_ptr(nullptr), _buffer_ptr(nullptr), _output_buffer_ptr(nullptr),
+      _input_req_size(input_req_size), _buffer_req_size(buffer_req_size),
+      _output_req_size(output_req_size)
+  { /* Do nothing */
+  }
+
+  // To set a pointer for tensor in _buffer_ptr with right offset
+  uint8_t *allocate_memory(int32_t offset);
+  // To set a pointer for tensor in input_buffer with right offset
+  uint8_t *allocate_memory_for_input(int32_t offset);
+  // To set a pointer for tensor in output_buffer with right offset
+  uint8_t *allocate_memory_for_output(int32_t offset);
+
+  // Methods to set data pointer for tensor
+  // To allocate input memory buffer with _input_req_size * size_type bytes. Result pointer -
+  // _input_buffer_ptr
+  void allocate_input_buf();
+  // To allocate input memory buffer with _output_req_size * size_type bytes. Result pointer -
+  // _output_buffer_ptr
+  void allocate_output_buf();
+  // To allocate intermediate computing memory buffer with _buffer_req_size * size_type bytes.
+  // Result pointer - _buffer_ptr
+  void allocate_computing_buf();
+
+  // To delete memory for intermediate computing buffer
+  void release_computing_buf();
+  // To delete memory for input buffer
+  void release_input_buf();
+  // To delete memory for output buffer
+  void release_output_buf();
+
+private:
+  // Stores a pointer to the beginning of the allocated memory buffer.
+  uint8_t *_buffer_ptr;
+  uint8_t *_input_buffer_ptr;
+  uint8_t *_output_buffer_ptr;
+
+  // TODO remove this fields to read it from circle file
+  int32_t _input_req_size{};
+  int32_t _buffer_req_size{};
+  int32_t _output_req_size{};
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#endif // USE_STATIC_ALLOC
diff --git a/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.cpp b/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.cpp
new file mode 100644 (file)
index 0000000..803e038
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO Enable it
+
+#if 0
+
+#include "TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  allocations.push_back(data);
+  tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
+
+#endif
diff --git a/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.h b/onert-micro/luci-interpreter/src/memory_managers/TestMemoryManager.h
new file mode 100644 (file)
index 0000000..25ee38d
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO Enable it
+
+#if 0
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+  ~TestMemoryManager() override
+  {
+    for (auto allocation : allocations)
+    {
+      delete[] allocation;
+    }
+  }
+
+private:
+  std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#endif
diff --git a/onert-micro/requires.cmake b/onert-micro/requires.cmake
new file mode 100644 (file)
index 0000000..8b13789
--- /dev/null
@@ -0,0 +1 @@
+
diff --git a/onert-micro/standalone/CMakeLists.txt b/onert-micro/standalone/CMakeLists.txt
new file mode 100644 (file)
index 0000000..2dd7ef1
--- /dev/null
@@ -0,0 +1,10 @@
+cmake_minimum_required(VERSION 3.15)
+project(luci_interpreter_micro_standalone)
+
+include(${NNAS_ROOT}/infra/onert-micro/utils.cmake)
+
+nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
+include_directories(${FlatBuffersSource_DIR}/include)
+
+# TODO: fix luci/plan for new luci-micro without luci/IR
+add_subdirectory(${NNAS_PROJECT_SOURCE_DIR}/onert-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
diff --git a/onert-micro/tests/mbed-os/CMakeLists.txt b/onert-micro/tests/mbed-os/CMakeLists.txt
new file mode 100644 (file)
index 0000000..8785ddf
--- /dev/null
@@ -0,0 +1,194 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.9)
+SET(CMAKE_SYSTEM_NAME Generic)
+SET(CMAKE_CROSSCOMPILING TRUE)
+
+# force compiler settings
+SET(CMAKE_C_COMPILER_WORKS TRUE)
+SET(CMAKE_CXX_COMPILER_WORKS TRUE)
+
+# force cmake compilers
+SET(CMAKE_ASM_COMPILER "arm-none-eabi-gcc")
+SET(CMAKE_C_COMPILER "arm-none-eabi-gcc")
+SET(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
+SET(ELF2BIN "arm-none-eabi-objcopy")
+
+
+# if the environment does not specify build type, set to Debug
+IF (NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug"
+            CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+            FORCE)
+ENDIF ()
+
+# here starts the project
+PROJECT(mbed-os-example-onert-micro C CXX ASM)
+
+# uncomment below to have a verbose build process
+#SET(CMAKE_VERBOSE_MAKEFILE ON)
+
+SET(LD_SYS_LIBS "-Wl,--start-group -lstdc++ -lsupc++ -lm -lc -lgcc -lnosys  -Wl,--end-group")
+
+
+SET(CMAKE_C_FLAGS "-g3 -std=gnu11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000000 -DMBED_RAM_SIZE=0x20000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_CXX_FLAGS "-g3 -std=gnu++14 -frtti -Wvla -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000000 -DMBED_RAM_SIZE=0x20000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_ASM_FLAGS "-g3 -x assembler-with-cpp -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp ")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_CXX_LINK_FLAGS "-Wl,--gc-sections -Wl,--wrap,main -Wl,--wrap,_malloc_r -Wl,--wrap,_free_r")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,_realloc_r -Wl,--wrap,__memalign_r -Wl,--wrap,__calloc_r")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,exit -Wl,--wrap,atexit -Wl,-n -Wl,--wrap,printf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,sprintf -Wl,--wrap,snprintf -Wl,--wrap,vprintf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,vsprintf -Wl,--wrap,vsnprintf -Wl,--wrap,fprintf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,vfprintf -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -mfloat-abi=softfp -Wall -Wextra -Wno-unused-parameter")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wno-missing-field-initializers -fmessage-length=0 -fexceptions")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -ffunction-sections -fdata-sections -funsigned-char -MMD")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fomit-frame-pointer -Og -DMBED_DEBUG -DMBED_TRAP_ERRORS_ENABLED=1")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000400")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_RAM_SIZE=0x1FC00 -DMBED_RAM1_START=0x24000000")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_RAM1_SIZE=0x80000 -DMBED_BOOT_STACK_SIZE=1024 -DXIP_ENABLE=0")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} ${LD_SYS_LIBS} -T ${CMAKE_BINARY_DIR}/build_test_pp.link_script.ld")
+
+ADD_DEFINITIONS(
+        -DARM_MATH_CM7
+        -DCOMPONENT_FLASHIAP=1
+        -DDEVICE_ANALOGIN=1
+        -DDEVICE_ANALOGOUT=1
+        -DDEVICE_CAN=1
+        -DDEVICE_CRC=1
+        -DDEVICE_EMAC=1
+        -DDEVICE_FLASH=1
+        -DDEVICE_I2C=1
+        -DDEVICE_I2CSLAVE=1
+        -DDEVICE_I2C_ASYNCH=1
+        -DDEVICE_INTERRUPTIN=1
+        -DDEVICE_LPTICKER=1
+        -DDEVICE_MPU=1
+        -DDEVICE_PORTIN=1
+        -DDEVICE_PORTINOUT=1
+        -DDEVICE_PORTOUT=1
+        -DDEVICE_PWMOUT=1
+        -DDEVICE_RESET_REASON=1
+        -DDEVICE_RTC=1
+        -DDEVICE_SERIAL=1
+        -DDEVICE_SERIAL_FC=1
+        -DDEVICE_SLEEP=1
+        -DDEVICE_SPI=1
+        -DDEVICE_SPISLAVE=1
+        -DDEVICE_SPI_ASYNCH=1
+        -DDEVICE_STDIO_MESSAGES=1
+        -DDEVICE_TRNG=1
+        -DDEVICE_USBDEVICE=1
+        -DDEVICE_USTICKER=1
+        -DDEVICE_WATCHDOG=1
+        -DEXTRA_IDLE_STACK_REQUIRED
+        -DMBED_BUILD_TIMESTAMP=1640167847.81
+        -DMBED_TICKLESS
+        -DSTM32H743xx
+        -DTARGET_CORTEX
+        -DTARGET_CORTEX_M
+        -DTARGET_FF_ARDUINO_UNO
+        -DTARGET_LIKE_CORTEX_M7
+        -DTARGET_LIKE_MBED
+        -DTARGET_M7
+        -DTARGET_MCU_STM32
+        -DTARGET_MCU_STM32H7
+        -DTARGET_MCU_STM32H743xI
+        -DTARGET_NAME=NUCLEO_H743ZI2
+        -DTARGET_NUCLEO_H743ZI2
+        -DTARGET_RELEASE
+        -DTARGET_RTOS_M4_M7
+        -DTARGET_STM
+        -DTARGET_STM32H7
+        -DTARGET_STM32H743xI
+        -DTOOLCHAIN_GCC
+        -DTOOLCHAIN_GCC_ARM
+        -DTRANSACTION_QUEUE_SIZE_SPI=2
+        -DUSE_FULL_LL_DRIVER
+        -DUSE_HAL_DRIVER
+        -D__CMSIS_RTOS
+        -D__CORTEX_M7
+        -D__FPU_PRESENT=1
+        -D__MBED_CMSIS_RTOS_CM
+        -D__MBED__=1
+        -DMBED_MEM_TRACING_ENABLED=0
+)
+
+include(mbed-sources.cmake)
+
+set_sources_mbed(${MbedOSSource_DIR})
+list(APPEND SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/startup_stm32h743xx.S")
+
+
+add_library(mbed_os STATIC ${SOURCES})
+
+target_include_directories_mbed(mbed_os ${MbedOSSource_DIR})
+
+
+SET_TARGET_PROPERTIES(mbed_os PROPERTIES ENABLE_EXPORTS 1)
+# add syslibs dependencies to create the correct linker order
+TARGET_LINK_LIBRARIES(mbed_os -lstdc++ -lsupc++ -lm -lc -lgcc -lnosys)
+
+add_executable(build_test main.cpp)
+
+target_link_libraries(build_test mbed_os)
+target_include_directories_mbed(build_test ${MbedOSSource_DIR})
+
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/core/reader/libluci_micro_circle_reader.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/core/libluci_interpreter_core_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/kernels/libluci_interpreter_kernels_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/kernels/libluci_interpreter_mcu_pal.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/loader/libluci_interpreter_loader_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/libluci_interpreter_micro.a")
+
+target_include_directories(build_test PRIVATE
+        ${ONERTMICRO_SRC_DIR}/luci-interpreter/include
+        ${CMAKE_CURRENT_SOURCE_DIR}
+        ${FlatBuffersSource_DIR}/include
+        )
+
+add_custom_command(TARGET build_test PRE_LINK
+        COMMAND "arm-none-eabi-cpp" -E -P -Wl,--gc-sections -Wl,--wrap,main -Wl,--wrap,_malloc_r
+        -Wl,--wrap,_free_r -Wl,--wrap,_realloc_r -Wl,--wrap,_memalign_r -Wl,--wrap,_calloc_r
+        -Wl,--wrap,exit -Wl,--wrap,atexit -Wl,-n -Wl,--wrap,printf -Wl,--wrap,sprintf
+        -Wl,--wrap,snprintf -Wl,--wrap,vprintf -Wl,--wrap,vsprintf -Wl,--wrap,vsnprintf
+        -Wl,--wrap,fprintf -Wl,--wrap,vfprintf -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp
+        -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -fmessage-length=0
+        -fexceptions -ffunction-sections -fdata-sections -funsigned-char -MMD -fomit-frame-pointer
+        -Og -DMBED_DEBUG -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb
+        -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000 -DMBED_ROM_SIZE=0x200000
+        -DMBED_RAM_START=0x20000400 -DMBED_RAM_SIZE=0x1FC00 -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000
+        -DMBED_BOOT_STACK_SIZE=1024 -DXIP_ENABLE=0
+        ${MbedOSSource_DIR}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TOOLCHAIN_GCC_ARM/STM32H743xI.ld
+        -o ${CMAKE_CURRENT_BINARY_DIR}/build_test_pp.link_script.ld
+
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/build_test_pp.link_script.ld"
+        )
+
+add_custom_command(TARGET build_test POST_BUILD
+        COMMAND ${ELF2BIN} -O binary $<TARGET_FILE:build_test> $<TARGET_FILE:build_test>.bin
+        COMMAND ${CMAKE_COMMAND} -E echo "-- built: $<TARGET_FILE:build_test>.bin"
+        )
diff --git a/onert-micro/tests/mbed-os/main.cpp b/onert-micro/tests/mbed-os/main.cpp
new file mode 100644 (file)
index 0000000..cdbe14b
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// base app for qemu
+static volatile unsigned int *const UART_DR = (unsigned int *)0x40011004;
+
+void uart_print(const char *s)
+{
+  while (*s != '\0')
+  {
+    *UART_DR = *s;
+    s++;
+  }
+}
+
+int main() { uart_print("Hello, World!\n"); }
diff --git a/onert-micro/tests/mbed-os/mbed-sources.cmake b/onert-micro/tests/mbed-os/mbed-sources.cmake
new file mode 100644 (file)
index 0000000..25f9e31
--- /dev/null
@@ -0,0 +1,1589 @@
+macro(set_sources_mbed)
+    set(SOURCES
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Include/cmsis_os2.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Include/os_tick.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config/RTX_Config.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config/RTX_Config.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_def.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_evr.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_os.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include1/cmsis_os.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Library/cmsis_os1.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/TOOLCHAIN_GCC/TARGET_RTOS_M4_M7/irq_cm4f.S
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_c.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_ca.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_cm.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_delay.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_evflags.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_evr.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_kernel.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_lib.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_lib.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_memory.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_mempool.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_msgqueue.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_mutex.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_semaphore.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_system.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_thread.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_timer.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Source/os_systick.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Source/os_tick_ptim.c
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cachel1_armv7.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armcc.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armclang.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armclang_ltm.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_compiler.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_gcc.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_iccarm.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_version.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv81mml.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv8mbl.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv8mml.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm0.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm0plus.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm1.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm23.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm3.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm33.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm35p.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm4.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm55.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm7.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_sc000.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_sc300.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/mpu_armv7.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/mpu_armv8.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/pmu_armv8.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/tz_context.h
+            ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Source/mbed_tz_context.c
+            ${ARGV0}/cmsis/device/RTE/include/RTE_Components.h
+            ${ARGV0}/cmsis/device/mbed_cmsis_conf.h
+            ${ARGV0}/cmsis/device/rtos/TOOLCHAIN_GCC_ARM/mbed_boot_gcc_arm.c
+            ${ARGV0}/cmsis/device/rtos/include/mbed_boot.h
+            ${ARGV0}/cmsis/device/rtos/include/mbed_rtx_conf.h
+            ${ARGV0}/cmsis/device/rtos/include/mbed_rtx_storage.h
+            ${ARGV0}/cmsis/device/rtos/source/mbed_boot.c
+            ${ARGV0}/cmsis/device/rtos/source/mbed_rtos_rtx.c
+            ${ARGV0}/cmsis/device/rtos/source/mbed_rtx_handlers.c
+            ${ARGV0}/cmsis/device/rtos/source/mbed_rtx_idle.cpp
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/ATHandler.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularContext.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularDevice.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularInformation.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularNetwork.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularSMS.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularContext.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularDevice.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularInformation.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularNetwork.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularSMS.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularStack.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_ControlPlane_netif.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/APN_db.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularCommon.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularList.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularLog.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularUtil.h
+            ${ARGV0}/connectivity/cellular/include/cellular/framework/device/CellularStateMachine.h
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularContext.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularDevice.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularInformation.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularNetwork.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularSMS.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularStack.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/AT/AT_ControlPlane_netif.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/common/APN_db.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/common/CellularLog.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/common/CellularUtil.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/device/ATHandler.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/device/CellularContext.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/device/CellularDevice.cpp
+            ${ARGV0}/connectivity/cellular/source/framework/device/CellularStateMachine.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/atmel-rf-driver/NanostackRfPhyAtmel.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/AT86RF215Reg.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/AT86RFReg.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/NanostackRfPhyAT86RF215.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/NanostackRfPhyAtmel.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/at24mac.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/at24mac.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/rfbits.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/mcr20a-rf-driver/NanostackRfPhyMcr20a.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Drv.c
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Drv.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Overwrites.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Reg.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/NanostackRfPhyMcr20a.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/XcvrSpi.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/NanostackRfPhys2lp.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/at24mac_s2lp.cpp
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/at24mac_s2lp.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/rf_configuration.c
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/rf_configuration.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/s2lpReg.h
+            ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/stm-s2lp-rf-driver/NanostackRfPhys2lp.h
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP.cpp
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP.h
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION.h
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularInformation.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularInformation.h
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP/GENERIC_AT3GPP.cpp
+            ${ARGV0}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP/GENERIC_AT3GPP.h
+            ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP.cpp
+            ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP.h
+            ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularInformation.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularInformation.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularInformation.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularInformation.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_ControlPlane_netif.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_ControlPlane_netif.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/EC2X/QUECTEL_EC2X.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/EC2X/QUECTEL_EC2X.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularInformation.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularInformation.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96.h
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT.cpp
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT.h
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/HE910/TELIT_HE910.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/HE910/TELIT_HE910.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularContext.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularContext.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularNetwork.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularNetwork.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularSMS.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularSMS.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularStack.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularStack.h
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/PPP/UBLOX_PPP.cpp
+            ${ARGV0}/connectivity/drivers/cellular/UBLOX/PPP/UBLOX_PPP.h
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/TARGET_NUCLEO_H743ZI2/stm32h7_eth_init.c
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742/lan8742.c
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742/lan8742.h
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/stm32xx_emac_config.h
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_emac.cpp
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_emac.h
+            ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_eth_irq_callback.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt_stm32l4.c
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt_stm32l4.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/ccm_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/ccm_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/cryp_stm32.c
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/cryp_stm32.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/gcm_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/gcm_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/hash_stm32.c
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/hash_stm32.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/md5_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/md5_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha1_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha1_alt.h
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha256_alt.cpp
+            ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha256_alt.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512Driver.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512SPITransportDriver.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512TransportDriver.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512Driver.cpp
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512SPITransportDriver.cpp
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512TransportDriver.cpp
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_callback.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_cmd.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_cmd.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_hw.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_hw.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_internal.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_irq.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_irq.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_poll.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_poll.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_registers.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_registers.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_rf.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_rf.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_timer.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_timer.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_transceive.c
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_transceive.h
+            ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_types.h
+            ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266/ESP8266.cpp
+            ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266/ESP8266.h
+            ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266Interface.cpp
+            ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266Interface.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_coap_header.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_coap_protocol.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_config.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/include/sn_coap_header_internal.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/include/sn_coap_protocol_internal.h
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_builder.c
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_header_check.c
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_parser.c
+            ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_protocol.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/common_functions.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip4string.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip6string.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip_fsc.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_list.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_nvm_helper.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_types.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmemLIB.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmem_tracker.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmem_tracker_lib.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform/arm_hal_interrupt.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform/arm_hal_nvm.h
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/IPv6_fcf_lib/ip_fsc.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libBits/common_functions.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libList/ns_list.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip4string/ip4tos.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip4string/stoip4.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip6string/ip6tos.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip6string/stoip6.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nsdynmemLIB/nsdynmemLIB.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nsdynmemtracker/nsdynmem_tracker_lib.c
+            ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nvmHelper/ns_nvm_helper.c
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/arc4.h
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/des.h
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/md4.h
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/md5.h
+            ${ARGV0}/connectivity/libraries/ppp/include/polarssl/sha1.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ccp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap-md5.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap-new.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap_ms.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/eap.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ecp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/eui64.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/fsm.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ipcp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ipv6cp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/lcp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/magic.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/mppe.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_impl.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_opts.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_service.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_service_if.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppapi.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppcrypt.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppdebug.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppoe.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppol2tp.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppos.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/upap.h
+            ${ARGV0}/connectivity/libraries/ppp/include/ppp/vj.h
+            ${ARGV0}/connectivity/libraries/ppp/source/auth.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ccp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/chap-md5.c
+            ${ARGV0}/connectivity/libraries/ppp/source/chap-new.c
+            ${ARGV0}/connectivity/libraries/ppp/source/chap_ms.c
+            ${ARGV0}/connectivity/libraries/ppp/source/demand.c
+            ${ARGV0}/connectivity/libraries/ppp/source/eap.c
+            ${ARGV0}/connectivity/libraries/ppp/source/eui64.c
+            ${ARGV0}/connectivity/libraries/ppp/source/fsm.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ipcp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ipv6cp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/lcp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/magic.c
+            ${ARGV0}/connectivity/libraries/ppp/source/mppe.c
+            ${ARGV0}/connectivity/libraries/ppp/source/multilink.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_arc4.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_des.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_md4.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_md5.c
+            ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_sha1.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ppp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ppp_ecp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/ppp_service.cpp
+            ${ARGV0}/connectivity/libraries/ppp/source/ppp_service_if.cpp
+            ${ARGV0}/connectivity/libraries/ppp/source/pppapi.c
+            ${ARGV0}/connectivity/libraries/ppp/source/pppcrypt.c
+            ${ARGV0}/connectivity/libraries/ppp/source/pppoe.c
+            ${ARGV0}/connectivity/libraries/ppp/source/pppol2tp.c
+            ${ARGV0}/connectivity/libraries/ppp/source/pppos.cpp
+            ${ARGV0}/connectivity/libraries/ppp/source/upap.c
+            ${ARGV0}/connectivity/libraries/ppp/source/utils.c
+            ${ARGV0}/connectivity/libraries/ppp/source/vj.c
+            ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaRadio.h
+            ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANBase.h
+            ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANInterface.h
+            ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANStack.h
+            ${ARGV0}/connectivity/lorawan/include/lorawan/lorawan_types.h
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMac.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMac.h
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacChannelPlan.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacChannelPlan.h
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCommand.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCommand.h
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCrypto.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCrypto.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHY.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHY.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAS923.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAS923.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAU915.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAU915.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN470.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN470.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN779.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN779.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU433.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU433.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU868.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU868.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYIN865.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYIN865.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYKR920.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYKR920.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYUS915.cpp
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYUS915.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/lora_phy_ds.h
+            ${ARGV0}/connectivity/lorawan/lorastack/phy/loraphy_target.h
+            ${ARGV0}/connectivity/lorawan/source/LoRaWANInterface.cpp
+            ${ARGV0}/connectivity/lorawan/source/LoRaWANStack.cpp
+            ${ARGV0}/connectivity/lorawan/system/LoRaWANTimer.cpp
+            ${ARGV0}/connectivity/lorawan/system/LoRaWANTimer.h
+            ${ARGV0}/connectivity/lorawan/system/lorawan_data_structures.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFC.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCController.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCControllerDriver.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCDefinitions.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCEEPROM.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCEEPROMDriver.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCNDEFCapable.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCRemoteEndpoint.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCRemoteInitiator.h
+            ${ARGV0}/connectivity/nfc/include/nfc/NFCTarget.h
+            ${ARGV0}/connectivity/nfc/include/nfc/Type4RemoteInitiator.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/MessageBuilder.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/MessageParser.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/Record.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/RecordParser.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/Mime.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/SimpleMessageParser.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/Text.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/URI.h
+            ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/util.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer_builder.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer_reader.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_debug.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_macros.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_stream.h
+            ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer.c
+            ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer_builder.c
+            ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer_reader.c
+            ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_stream.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/ndef/ndef.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/ndef/ndef.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/nfc_common.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/nfc_errors.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_debug.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_scheduler.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_scheduler.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_transport.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_transport.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_app.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_app.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_defs.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep_target.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep_target.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/type4/type4_target.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/tech/type4/type4_target.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/protocols.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver.c
+            ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver.h
+            ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver_internal.h
+            ${ARGV0}/connectivity/nfc/source/NFCController.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCControllerDriver.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCEEPROM.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCEEPROMDriver.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCNDEFCapable.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCRemoteEndpoint.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCRemoteInitiator.cpp
+            ${ARGV0}/connectivity/nfc/source/NFCTarget.cpp
+            ${ARGV0}/connectivity/nfc/source/Type4RemoteInitiator.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/MessageBuilder.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/MessageParser.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/RecordParser.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/Mime.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/SimpleMessageParser.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/Text.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/URI.cpp
+            ${ARGV0}/connectivity/nfc/source/ndef/common/util.cpp
+            ${ARGV0}/drivers/device_key/include/device_key/DeviceKey.h
+            ${ARGV0}/drivers/device_key/source/DeviceKey.cpp
+            ${ARGV0}/drivers/include/drivers/AnalogIn.h
+            ${ARGV0}/drivers/include/drivers/AnalogOut.h
+            ${ARGV0}/drivers/include/drivers/BufferedSerial.h
+            ${ARGV0}/drivers/include/drivers/BusIn.h
+            ${ARGV0}/drivers/include/drivers/BusInOut.h
+            ${ARGV0}/drivers/include/drivers/BusOut.h
+            ${ARGV0}/drivers/include/drivers/CAN.h
+            ${ARGV0}/drivers/include/drivers/DigitalIn.h
+            ${ARGV0}/drivers/include/drivers/DigitalInOut.h
+            ${ARGV0}/drivers/include/drivers/DigitalOut.h
+            ${ARGV0}/drivers/include/drivers/FlashIAP.h
+            ${ARGV0}/drivers/include/drivers/HighResClock.h
+            ${ARGV0}/drivers/include/drivers/I2C.h
+            ${ARGV0}/drivers/include/drivers/I2CSlave.h
+            ${ARGV0}/drivers/include/drivers/InterruptIn.h
+            ${ARGV0}/drivers/include/drivers/LowPowerClock.h
+            ${ARGV0}/drivers/include/drivers/LowPowerTicker.h
+            ${ARGV0}/drivers/include/drivers/LowPowerTimeout.h
+            ${ARGV0}/drivers/include/drivers/LowPowerTimer.h
+            ${ARGV0}/drivers/include/drivers/MbedCRC.h
+            ${ARGV0}/drivers/include/drivers/OSPI.h
+            ${ARGV0}/drivers/include/drivers/PortIn.h
+            ${ARGV0}/drivers/include/drivers/PortInOut.h
+            ${ARGV0}/drivers/include/drivers/PortOut.h
+            ${ARGV0}/drivers/include/drivers/PwmOut.h
+            ${ARGV0}/drivers/include/drivers/QSPI.h
+            ${ARGV0}/drivers/include/drivers/RawCAN.h
+            ${ARGV0}/drivers/include/drivers/RealTimeClock.h
+            ${ARGV0}/drivers/include/drivers/ResetReason.h
+            ${ARGV0}/drivers/include/drivers/SPI.h
+            ${ARGV0}/drivers/include/drivers/SPISlave.h
+            ${ARGV0}/drivers/include/drivers/SerialBase.h
+            ${ARGV0}/drivers/include/drivers/SerialWireOutput.h
+            ${ARGV0}/drivers/include/drivers/Ticker.h
+            ${ARGV0}/drivers/include/drivers/TickerDataClock.h
+            ${ARGV0}/drivers/include/drivers/Timeout.h
+            ${ARGV0}/drivers/include/drivers/Timer.h
+            ${ARGV0}/drivers/include/drivers/TimerEvent.h
+            ${ARGV0}/drivers/include/drivers/UnbufferedSerial.h
+            ${ARGV0}/drivers/include/drivers/Watchdog.h
+            ${ARGV0}/drivers/include/drivers/interfaces/InterfaceCAN.h
+            ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalIn.h
+            ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalInOut.h
+            ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalOut.h
+            ${ARGV0}/drivers/source/AnalogIn.cpp
+            ${ARGV0}/drivers/source/AnalogOut.cpp
+            ${ARGV0}/drivers/source/BufferedSerial.cpp
+            ${ARGV0}/drivers/source/BusIn.cpp
+            ${ARGV0}/drivers/source/BusInOut.cpp
+            ${ARGV0}/drivers/source/BusOut.cpp
+            ${ARGV0}/drivers/source/CAN.cpp
+            ${ARGV0}/drivers/source/DigitalIn.cpp
+            ${ARGV0}/drivers/source/DigitalInOut.cpp
+            ${ARGV0}/drivers/source/DigitalOut.cpp
+            ${ARGV0}/drivers/source/FlashIAP.cpp
+            ${ARGV0}/drivers/source/I2C.cpp
+            ${ARGV0}/drivers/source/I2CSlave.cpp
+            ${ARGV0}/drivers/source/InterruptIn.cpp
+            ${ARGV0}/drivers/source/MbedCRC.cpp
+            ${ARGV0}/drivers/source/OSPI.cpp
+            ${ARGV0}/drivers/source/PortIn.cpp
+            ${ARGV0}/drivers/source/PortInOut.cpp
+            ${ARGV0}/drivers/source/PortOut.cpp
+            ${ARGV0}/drivers/source/PwmOut.cpp
+            ${ARGV0}/drivers/source/QSPI.cpp
+            ${ARGV0}/drivers/source/ResetReason.cpp
+            ${ARGV0}/drivers/source/SPI.cpp
+            ${ARGV0}/drivers/source/SPISlave.cpp
+            ${ARGV0}/drivers/source/SerialBase.cpp
+            ${ARGV0}/drivers/source/SerialWireOutput.cpp
+            ${ARGV0}/drivers/source/Ticker.cpp
+            ${ARGV0}/drivers/source/Timeout.cpp
+            ${ARGV0}/drivers/source/Timer.cpp
+            ${ARGV0}/drivers/source/TimerEvent.cpp
+            ${ARGV0}/drivers/source/UnbufferedSerial.cpp
+            ${ARGV0}/drivers/source/Watchdog.cpp
+            ${ARGV0}/drivers/usb/include/usb/USBAudio.h
+            ${ARGV0}/drivers/usb/include/usb/USBCDC.h
+            ${ARGV0}/drivers/usb/include/usb/USBCDC_ECM.h
+            ${ARGV0}/drivers/usb/include/usb/USBHID.h
+            ${ARGV0}/drivers/usb/include/usb/USBKeyboard.h
+            ${ARGV0}/drivers/usb/include/usb/USBMIDI.h
+            ${ARGV0}/drivers/usb/include/usb/USBMSD.h
+            ${ARGV0}/drivers/usb/include/usb/USBMouse.h
+            ${ARGV0}/drivers/usb/include/usb/USBMouseKeyboard.h
+            ${ARGV0}/drivers/usb/include/usb/USBSerial.h
+            ${ARGV0}/drivers/usb/include/usb/internal/AsyncOp.h
+            ${ARGV0}/drivers/usb/include/usb/internal/ByteBuffer.h
+            ${ARGV0}/drivers/usb/include/usb/internal/EndpointResolver.h
+            ${ARGV0}/drivers/usb/include/usb/internal/LinkEntry.h
+            ${ARGV0}/drivers/usb/include/usb/internal/LinkedList.h
+            ${ARGV0}/drivers/usb/include/usb/internal/LinkedListBase.h
+            ${ARGV0}/drivers/usb/include/usb/internal/MIDIMessage.h
+            ${ARGV0}/drivers/usb/include/usb/internal/OperationList.h
+            ${ARGV0}/drivers/usb/include/usb/internal/OperationListBase.h
+            ${ARGV0}/drivers/usb/include/usb/internal/PolledQueue.h
+            ${ARGV0}/drivers/usb/include/usb/internal/Task.h
+            ${ARGV0}/drivers/usb/include/usb/internal/TaskBase.h
+            ${ARGV0}/drivers/usb/include/usb/internal/TaskQueue.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBAudio_Types.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBDescriptor.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBDevice.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBDevice_Types.h
+            ${ARGV0}/drivers/usb/include/usb/internal/USBHID_Types.h
+            ${ARGV0}/drivers/usb/source/AsyncOp.cpp
+            ${ARGV0}/drivers/usb/source/ByteBuffer.cpp
+            ${ARGV0}/drivers/usb/source/EndpointResolver.cpp
+            ${ARGV0}/drivers/usb/source/LinkedListBase.cpp
+            ${ARGV0}/drivers/usb/source/OperationListBase.cpp
+            ${ARGV0}/drivers/usb/source/PolledQueue.cpp
+            ${ARGV0}/drivers/usb/source/TaskBase.cpp
+            ${ARGV0}/drivers/usb/source/USBAudio.cpp
+            ${ARGV0}/drivers/usb/source/USBCDC.cpp
+            ${ARGV0}/drivers/usb/source/USBCDC_ECM.cpp
+            ${ARGV0}/drivers/usb/source/USBDevice.cpp
+            ${ARGV0}/drivers/usb/source/USBHID.cpp
+            ${ARGV0}/drivers/usb/source/USBKeyboard.cpp
+            ${ARGV0}/drivers/usb/source/USBMIDI.cpp
+            ${ARGV0}/drivers/usb/source/USBMSD.cpp
+            ${ARGV0}/drivers/usb/source/USBMouse.cpp
+            ${ARGV0}/drivers/usb/source/USBMouseKeyboard.cpp
+            ${ARGV0}/drivers/usb/source/USBSerial.cpp
+            ${ARGV0}/events/include/events/Event.h
+            ${ARGV0}/events/include/events/EventQueue.h
+            ${ARGV0}/events/include/events/UserAllocatedEvent.h
+            ${ARGV0}/events/include/events/equeue.h
+            ${ARGV0}/events/include/events/internal/equeue_platform.h
+            ${ARGV0}/events/include/events/mbed_events.h
+            ${ARGV0}/events/include/events/mbed_shared_queues.h
+            ${ARGV0}/events/source/EventQueue.cpp
+            ${ARGV0}/events/source/equeue.c
+            ${ARGV0}/events/source/equeue_mbed.cpp
+            ${ARGV0}/events/source/equeue_posix.c
+            ${ARGV0}/events/source/mbed_shared_queues.cpp
+            ${ARGV0}/features/frameworks/greentea-client/greentea-client/greentea_metrics.h
+            ${ARGV0}/features/frameworks/greentea-client/greentea-client/test_env.h
+            ${ARGV0}/features/frameworks/greentea-client/source/greentea_metrics.cpp
+            ${ARGV0}/features/frameworks/greentea-client/source/greentea_test_env.cpp
+            ${ARGV0}/features/frameworks/mbed-client-cli/mbed-client-cli/ns_cmdline.h
+            ${ARGV0}/features/frameworks/mbed-client-cli/source/ns_cmdline.c
+            ${ARGV0}/features/frameworks/mbed-greentea-io/mbed_io.cpp
+            ${ARGV0}/features/frameworks/unity/source/unity.c
+            ${ARGV0}/features/frameworks/unity/unity/unity.h
+            ${ARGV0}/features/frameworks/unity/unity/unity_config.h
+            ${ARGV0}/features/frameworks/unity/unity/unity_internals.h
+            ${ARGV0}/features/frameworks/utest/mbed-utest-shim.cpp
+            ${ARGV0}/features/frameworks/utest/source/unity_handler.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_case.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_default_handlers.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_greentea_handlers.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_harness.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_print.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_shim.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_stack_trace.cpp
+            ${ARGV0}/features/frameworks/utest/source/utest_types.cpp
+            ${ARGV0}/features/frameworks/utest/utest/unity_handler.h
+            ${ARGV0}/features/frameworks/utest/utest/utest.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_case.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_default_handlers.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_harness.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_print.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_scheduler.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_shim.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_specification.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_stack_trace.h
+            ${ARGV0}/features/frameworks/utest/utest/utest_types.h
+            ${ARGV0}/hal/include/hal/LowPowerTickerWrapper.h
+            ${ARGV0}/hal/include/hal/PinNameAliases.h
+            ${ARGV0}/hal/include/hal/analogin_api.h
+            ${ARGV0}/hal/include/hal/analogout_api.h
+            ${ARGV0}/hal/include/hal/buffer.h
+            ${ARGV0}/hal/include/hal/can_api.h
+            ${ARGV0}/hal/include/hal/can_helper.h
+            ${ARGV0}/hal/include/hal/crc_api.h
+            ${ARGV0}/hal/include/hal/critical_section_api.h
+            ${ARGV0}/hal/include/hal/dma_api.h
+            ${ARGV0}/hal/include/hal/flash_api.h
+            ${ARGV0}/hal/include/hal/gpio_api.h
+            ${ARGV0}/hal/include/hal/gpio_irq_api.h
+            ${ARGV0}/hal/include/hal/i2c_api.h
+            ${ARGV0}/hal/include/hal/itm_api.h
+            ${ARGV0}/hal/include/hal/lp_ticker_api.h
+            ${ARGV0}/hal/include/hal/mbed_lp_ticker_wrapper.h
+            ${ARGV0}/hal/include/hal/mpu_api.h
+            ${ARGV0}/hal/include/hal/ospi_api.h
+            ${ARGV0}/hal/include/hal/pinmap.h
+            ${ARGV0}/hal/include/hal/port_api.h
+            ${ARGV0}/hal/include/hal/pwmout_api.h
+            ${ARGV0}/hal/include/hal/qspi_api.h
+            ${ARGV0}/hal/include/hal/reset_reason_api.h
+            ${ARGV0}/hal/include/hal/rtc_api.h
+            ${ARGV0}/hal/include/hal/serial_api.h
+            ${ARGV0}/hal/include/hal/sleep_api.h
+            ${ARGV0}/hal/include/hal/spi_api.h
+            ${ARGV0}/hal/include/hal/static_pinmap.h
+            ${ARGV0}/hal/include/hal/ticker_api.h
+            ${ARGV0}/hal/include/hal/trng_api.h
+            ${ARGV0}/hal/include/hal/us_ticker_api.h
+            ${ARGV0}/hal/include/hal/watchdog_api.h
+            ${ARGV0}/hal/source/LowPowerTickerWrapper.cpp
+            ${ARGV0}/hal/source/mbed_compat.c
+            ${ARGV0}/hal/source/mbed_critical_section_api.c
+            ${ARGV0}/hal/source/mbed_flash_api.c
+            ${ARGV0}/hal/source/mbed_gpio.c
+            ${ARGV0}/hal/source/mbed_gpio_irq.c
+            ${ARGV0}/hal/source/mbed_itm_api.c
+            ${ARGV0}/hal/source/mbed_lp_ticker_api.c
+            ${ARGV0}/hal/source/mbed_lp_ticker_wrapper.cpp
+            ${ARGV0}/hal/source/mbed_pinmap_common.c
+            ${ARGV0}/hal/source/mbed_pinmap_default.cpp
+            ${ARGV0}/hal/source/mbed_ticker_api.c
+            ${ARGV0}/hal/source/mbed_us_ticker_api.c
+            ${ARGV0}/hal/source/mpu/mbed_mpu_v7m.c
+            ${ARGV0}/hal/source/mpu/mbed_mpu_v8m.c
+            ${ARGV0}/hal/source/static_pinmap.cpp
+            ${ARGV0}/hal/usb/include/usb/USBPhy.h
+            ${ARGV0}/hal/usb/include/usb/USBPhyEvents.h
+            ${ARGV0}/hal/usb/include/usb/USBPhyTypes.h
+            ${ARGV0}/hal/usb/include/usb/usb_phy_api.h
+            ${ARGV0}/hal/usb/source/mbed_usb_phy.cpp
+            ${ARGV0}/mbed.h
+            ${ARGV0}/platform/cxxsupport/mstd_algorithm
+            ${ARGV0}/platform/cxxsupport/mstd_atomic
+            ${ARGV0}/platform/cxxsupport/mstd_cstddef
+            ${ARGV0}/platform/cxxsupport/mstd_functional
+            ${ARGV0}/platform/cxxsupport/mstd_iterator
+            ${ARGV0}/platform/cxxsupport/mstd_memory
+            ${ARGV0}/platform/cxxsupport/mstd_mutex
+            ${ARGV0}/platform/cxxsupport/mstd_mutex.cpp
+            ${ARGV0}/platform/cxxsupport/mstd_new
+            ${ARGV0}/platform/cxxsupport/mstd_span
+            ${ARGV0}/platform/cxxsupport/mstd_tuple
+            ${ARGV0}/platform/cxxsupport/mstd_type_traits
+            ${ARGV0}/platform/cxxsupport/mstd_utility
+            ${ARGV0}/platform/include/platform/ATCmdParser.h
+            ${ARGV0}/platform/include/platform/CThunk.h
+            ${ARGV0}/platform/include/platform/Callback.h
+            ${ARGV0}/platform/include/platform/CircularBuffer.h
+            ${ARGV0}/platform/include/platform/CriticalSectionLock.h
+            ${ARGV0}/platform/include/platform/DeepSleepLock.h
+            ${ARGV0}/platform/include/platform/DirHandle.h
+            ${ARGV0}/platform/include/platform/FileBase.h
+            ${ARGV0}/platform/include/platform/FileHandle.h
+            ${ARGV0}/platform/include/platform/FileLike.h
+            ${ARGV0}/platform/include/platform/FilePath.h
+            ${ARGV0}/platform/include/platform/FileSystemHandle.h
+            ${ARGV0}/platform/include/platform/FileSystemLike.h
+            ${ARGV0}/platform/include/platform/LocalFileSystem.h
+            ${ARGV0}/platform/include/platform/NonCopyable.h
+            ${ARGV0}/platform/include/platform/PlatformMutex.h
+            ${ARGV0}/platform/include/platform/ScopedLock.h
+            ${ARGV0}/platform/include/platform/ScopedRamExecutionLock.h
+            ${ARGV0}/platform/include/platform/ScopedRomWriteLock.h
+            ${ARGV0}/platform/include/platform/SharedPtr.h
+            ${ARGV0}/platform/include/platform/SingletonPtr.h
+            ${ARGV0}/platform/include/platform/Span.h
+            ${ARGV0}/platform/include/platform/Stream.h
+            ${ARGV0}/platform/include/platform/Transaction.h
+            ${ARGV0}/platform/include/platform/internal/CThunkBase.h
+            ${ARGV0}/platform/include/platform/internal/SysTimer.h
+            ${ARGV0}/platform/include/platform/internal/mbed_atomic_impl.h
+            ${ARGV0}/platform/include/platform/internal/mbed_error_hist.h
+            ${ARGV0}/platform/include/platform/internal/mbed_fault_handler.h
+            ${ARGV0}/platform/include/platform/internal/mbed_os_timer.h
+            ${ARGV0}/platform/include/platform/mbed_application.h
+            ${ARGV0}/platform/include/platform/mbed_assert.h
+            ${ARGV0}/platform/include/platform/mbed_atomic.h
+            ${ARGV0}/platform/include/platform/mbed_chrono.h
+            ${ARGV0}/platform/include/platform/mbed_critical.h
+            ${ARGV0}/platform/include/platform/mbed_debug.h
+            ${ARGV0}/platform/include/platform/mbed_enum_flags.h
+            ${ARGV0}/platform/include/platform/mbed_error.h
+            ${ARGV0}/platform/include/platform/mbed_interface.h
+            ${ARGV0}/platform/include/platform/mbed_mem_trace.h
+            ${ARGV0}/platform/include/platform/mbed_mktime.h
+            ${ARGV0}/platform/include/platform/mbed_mpu_mgmt.h
+            ${ARGV0}/platform/include/platform/mbed_poll.h
+            ${ARGV0}/platform/include/platform/mbed_power_mgmt.h
+            ${ARGV0}/platform/include/platform/mbed_preprocessor.h
+            ${ARGV0}/platform/include/platform/mbed_retarget.h
+            ${ARGV0}/platform/include/platform/mbed_rtc_time.h
+            ${ARGV0}/platform/include/platform/mbed_semihost_api.h
+            ${ARGV0}/platform/include/platform/mbed_stats.h
+            ${ARGV0}/platform/include/platform/mbed_thread.h
+            ${ARGV0}/platform/include/platform/mbed_toolchain.h
+            ${ARGV0}/platform/include/platform/mbed_version.h
+            ${ARGV0}/platform/include/platform/mbed_wait_api.h
+            ${ARGV0}/platform/include/platform/platform.h
+            ${ARGV0}/platform/mbed-trace/include/mbed-trace/mbed_trace.h
+            ${ARGV0}/platform/mbed-trace/include/mbed-trace/ns_trace.h
+            ${ARGV0}/platform/mbed-trace/source/mbed_trace.c
+            ${ARGV0}/platform/randlib/include/mbed-client-randlib/platform/arm_hal_random.h
+            ${ARGV0}/platform/randlib/include/mbed-client-randlib/randLIB.h
+            ${ARGV0}/platform/randlib/source/randLIB.c
+            ${ARGV0}/platform/source/ATCmdParser.cpp
+            ${ARGV0}/platform/source/CThunkBase.cpp
+            ${ARGV0}/platform/source/CriticalSectionLock.cpp
+            ${ARGV0}/platform/source/DeepSleepLock.cpp
+            ${ARGV0}/platform/source/FileBase.cpp
+            ${ARGV0}/platform/source/FileHandle.cpp
+            ${ARGV0}/platform/source/FilePath.cpp
+            ${ARGV0}/platform/source/FileSystemHandle.cpp
+            ${ARGV0}/platform/source/LocalFileSystem.cpp
+            ${ARGV0}/platform/source/Stream.cpp
+            ${ARGV0}/platform/source/SysTimer.cpp
+            ${ARGV0}/platform/source/TARGET_CORTEX_M/TOOLCHAIN_GCC/except.S
+            ${ARGV0}/platform/source/TARGET_CORTEX_M/mbed_fault_handler.c
+            ${ARGV0}/platform/source/mbed_alloc_wrappers.cpp
+            ${ARGV0}/platform/source/mbed_application.c
+            ${ARGV0}/platform/source/mbed_assert.c
+            ${ARGV0}/platform/source/mbed_atomic_impl.c
+            ${ARGV0}/platform/source/mbed_board.c
+            ${ARGV0}/platform/source/mbed_crash_data_offsets.h
+            ${ARGV0}/platform/source/mbed_critical.c
+            ${ARGV0}/platform/source/mbed_error.c
+            ${ARGV0}/platform/source/mbed_error_hist.c
+            ${ARGV0}/platform/source/mbed_interface.c
+            ${ARGV0}/platform/source/mbed_mem_trace.cpp
+            ${ARGV0}/platform/source/mbed_mktime.c
+            ${ARGV0}/platform/source/mbed_mpu_mgmt.c
+            ${ARGV0}/platform/source/mbed_os_timer.cpp
+            ${ARGV0}/platform/source/mbed_poll.cpp
+            ${ARGV0}/platform/source/mbed_power_mgmt.c
+            ${ARGV0}/platform/source/mbed_retarget.cpp
+            ${ARGV0}/platform/source/mbed_rtc_time.cpp
+            ${ARGV0}/platform/source/mbed_sdk_boot.c
+            ${ARGV0}/platform/source/mbed_semihost_api.c
+            ${ARGV0}/platform/source/mbed_stats.c
+            ${ARGV0}/platform/source/mbed_thread.cpp
+            ${ARGV0}/platform/source/mbed_wait_api_no_rtos.c
+            ${ARGV0}/platform/source/minimal-printf/mbed_printf_armlink_overrides.c
+            ${ARGV0}/platform/source/minimal-printf/mbed_printf_implementation.c
+            ${ARGV0}/platform/source/minimal-printf/mbed_printf_implementation.h
+            ${ARGV0}/platform/source/minimal-printf/mbed_printf_wrapper.c
+            ${ARGV0}/platform/source/newlib_nano_malloc_workaround.c
+            ${ARGV0}/rtos/include/rtos/ConditionVariable.h
+            ${ARGV0}/rtos/include/rtos/EventFlags.h
+            ${ARGV0}/rtos/include/rtos/Kernel.h
+            ${ARGV0}/rtos/include/rtos/Mail.h
+            ${ARGV0}/rtos/include/rtos/MemoryPool.h
+            ${ARGV0}/rtos/include/rtos/Mutex.h
+            ${ARGV0}/rtos/include/rtos/Queue.h
+            ${ARGV0}/rtos/include/rtos/Semaphore.h
+            ${ARGV0}/rtos/include/rtos/ThisThread.h
+            ${ARGV0}/rtos/include/rtos/Thread.h
+            ${ARGV0}/rtos/include/rtos/internal/mbed_rtos1_types.h
+            ${ARGV0}/rtos/include/rtos/internal/mbed_rtos_storage.h
+            ${ARGV0}/rtos/include/rtos/mbed_rtos_types.h
+            ${ARGV0}/rtos/include/rtos/rtos.h
+            ${ARGV0}/rtos/source/ConditionVariable.cpp
+            ${ARGV0}/rtos/source/EventFlags.cpp
+            ${ARGV0}/rtos/source/Kernel.cpp
+            ${ARGV0}/rtos/source/Mutex.cpp
+            ${ARGV0}/rtos/source/Semaphore.cpp
+            ${ARGV0}/rtos/source/ThisThread.cpp
+            ${ARGV0}/rtos/source/Thread.cpp
+            ${ARGV0}/rtos/source/rtos_handlers.h
+            ${ARGV0}/rtos/source/rtos_idle.h
+            ${ARGV0}/storage/blockdevice/COMPONENT_FLASHIAP/include/FlashIAP/FlashIAPBlockDevice.h
+            ${ARGV0}/storage/blockdevice/COMPONENT_FLASHIAP/source/FlashIAPBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/include/blockdevice/BlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/BufferedBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ChainingBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ExhaustibleBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/FlashSimBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/HeapBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/MBRBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ObservingBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ProfilingBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/ReadOnlyBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/SlicingBlockDevice.h
+            ${ARGV0}/storage/blockdevice/include/blockdevice/internal/SFDP.h
+            ${ARGV0}/storage/blockdevice/source/BufferedBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ChainingBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ExhaustibleBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/FlashSimBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/HeapBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/MBRBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ObservingBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ProfilingBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/ReadOnlyBlockDevice.cpp
+            ${ARGV0}/storage/blockdevice/source/SFDP.cpp
+            ${ARGV0}/storage/blockdevice/source/SlicingBlockDevice.cpp
+            ${ARGV0}/storage/filesystem/fat/ChaN/diskio.h
+            ${ARGV0}/storage/filesystem/fat/ChaN/ff.cpp
+            ${ARGV0}/storage/filesystem/fat/ChaN/ff.h
+            ${ARGV0}/storage/filesystem/fat/ChaN/ffconf.h
+            ${ARGV0}/storage/filesystem/fat/ChaN/ffunicode.cpp
+            ${ARGV0}/storage/filesystem/fat/ChaN/integer.h
+            ${ARGV0}/storage/filesystem/fat/include/fat/FATFileSystem.h
+            ${ARGV0}/storage/filesystem/fat/source/FATFileSystem.cpp
+            ${ARGV0}/storage/filesystem/include/filesystem/Dir.h
+            ${ARGV0}/storage/filesystem/include/filesystem/File.h
+            ${ARGV0}/storage/filesystem/include/filesystem/FileSystem.h
+            ${ARGV0}/storage/filesystem/include/filesystem/mbed_filesystem.h
+            ${ARGV0}/storage/filesystem/littlefs/include/littlefs/LittleFileSystem.h
+            ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs.c
+            ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs.h
+            ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs_util.c
+            ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs_util.h
+            ${ARGV0}/storage/filesystem/littlefs/source/LittleFileSystem.cpp
+            ${ARGV0}/storage/filesystem/littlefsv2/include/littlefsv2/LittleFileSystem2.h
+            ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2.c
+            ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2.h
+            ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2_util.c
+            ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2_util.h
+            ${ARGV0}/storage/filesystem/littlefsv2/source/LittleFileSystem2.cpp
+            ${ARGV0}/storage/filesystem/source/Dir.cpp
+            ${ARGV0}/storage/filesystem/source/File.cpp
+            ${ARGV0}/storage/filesystem/source/FileSystem.cpp
+            ${ARGV0}/storage/kvstore/direct_access_devicekey/include/direct_access_devicekey/DirectAccessDevicekey.h
+            ${ARGV0}/storage/kvstore/direct_access_devicekey/source/DirectAccessDevicekey.cpp
+            ${ARGV0}/storage/kvstore/filesystemstore/include/filesystemstore/FileSystemStore.h
+            ${ARGV0}/storage/kvstore/filesystemstore/source/FileSystemStore.cpp
+            ${ARGV0}/storage/kvstore/include/kvstore/KVStore.h
+            ${ARGV0}/storage/kvstore/kv_config/include/kv_config/kv_config.h
+            ${ARGV0}/storage/kvstore/kv_config/source/kv_config.cpp
+            ${ARGV0}/storage/kvstore/kvstore_global_api/include/kvstore_global_api/KVMap.h
+            ${ARGV0}/storage/kvstore/kvstore_global_api/include/kvstore_global_api/kvstore_global_api.h
+            ${ARGV0}/storage/kvstore/kvstore_global_api/source/KVMap.cpp
+            ${ARGV0}/storage/kvstore/kvstore_global_api/source/kvstore_global_api.cpp
+            ${ARGV0}/storage/kvstore/securestore/include/securestore/SecureStore.h
+            ${ARGV0}/storage/kvstore/securestore/source/SecureStore.cpp
+            ${ARGV0}/storage/kvstore/tdbstore/include/tdbstore/TDBStore.h
+            ${ARGV0}/storage/kvstore/tdbstore/source/TDBStore.cpp
+            ${ARGV0}/storage/platform/source/PlatformStorage.cpp
+            ${ARGV0}/targets/TARGET_STM/PeripheralPins.h
+            ${ARGV0}/targets/TARGET_STM/PinNamesTypes.h
+            ${ARGV0}/targets/TARGET_STM/PortNames.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/PeripheralNames.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h723xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h725xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h730xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h730xxq.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h733xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h735xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h742xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h743xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h745xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h747xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h750xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h753xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h755xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h757xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7a3xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7a3xxq.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b0xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b0xxq.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b3xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b3xxq.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/system_stm32h7xx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/Legacy/stm32_hal_legacy.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cec.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cec.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_comp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_comp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cordic.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cordic.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cortex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cortex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dcmi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dcmi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_def.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma2d.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma2d.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dsi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dsi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dts.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dts.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_exti.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_exti.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fdcan.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fdcan.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fmac.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fmac.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gfxmmu.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gfxmmu.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hcd.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hcd.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hrtim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hrtim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hsem.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hsem.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_iwdg.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_iwdg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_jpeg.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_jpeg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_lptim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_lptim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdios.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdios.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nand.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nand.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nor.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nor.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ospi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ospi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_otfdec.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_otfdec.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pssi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pssi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_qspi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_qspi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ramecc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ramecc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sdram.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sdram.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smbus.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smbus.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spdifrx.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spdifrx.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sram.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sram.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_swpmi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_swpmi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart_ex.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart_ex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_wwdg.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_wwdg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_adc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_adc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bdma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bdma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bus.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_comp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_comp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cordic.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cordic.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cortex.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crs.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crs.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dac.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dac.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_delayblock.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_delayblock.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma2d.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma2d.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dmamux.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_exti.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_exti.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmac.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmac.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_gpio.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_gpio.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hrtim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hrtim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hsem.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_i2c.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_i2c.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_iwdg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lptim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lptim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lpuart.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lpuart.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_mdma.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_mdma.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_opamp.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_opamp.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_pwr.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_pwr.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rcc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rcc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rng.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rng.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rtc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rtc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_sdmmc.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_sdmmc.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_spi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_spi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_swpmi.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_swpmi.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_system.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_tim.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_tim.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usart.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usart.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usb.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usb.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_utils.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_utils.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_wwdg.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/stm32h7xx_hal_conf.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/system_stm32h7xx_dualcore_boot_cm4_cm7.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/system_stm32h7xx_singlecore.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/PeripheralPins.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/PinNames.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/system_clock.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/cmsis_nvic.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/analogin_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/analogout_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/cmsis.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/flash_api.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/gpio_irq_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/gpio_irq_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/i2c_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/i2c_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/objects.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pin_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pwmout_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pwmout_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/serial_device.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/spi_api.c
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/spi_device.h
+            ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/us_ticker_data.h
+            ${ARGV0}/targets/TARGET_STM/USBPhyHw.h
+            ${ARGV0}/targets/TARGET_STM/USBPhy_STM32.cpp
+            ${ARGV0}/targets/TARGET_STM/analogin_api.c
+            ${ARGV0}/targets/TARGET_STM/analogout_api.c
+            ${ARGV0}/targets/TARGET_STM/can_api.c
+            ${ARGV0}/targets/TARGET_STM/device.h
+            ${ARGV0}/targets/TARGET_STM/gpio_api.c
+            ${ARGV0}/targets/TARGET_STM/gpio_irq_api.c
+            ${ARGV0}/targets/TARGET_STM/gpio_object.h
+            ${ARGV0}/targets/TARGET_STM/hal_tick_overrides.c
+            ${ARGV0}/targets/TARGET_STM/i2c_api.c
+            ${ARGV0}/targets/TARGET_STM/lp_ticker.c
+            ${ARGV0}/targets/TARGET_STM/lp_ticker_defines.h
+            ${ARGV0}/targets/TARGET_STM/mbed_crc_api.c
+            ${ARGV0}/targets/TARGET_STM/mbed_overrides.c
+            ${ARGV0}/targets/TARGET_STM/mbed_rtx.h
+            ${ARGV0}/targets/TARGET_STM/nvic_addr.h
+            ${ARGV0}/targets/TARGET_STM/ospi_api.c
+            ${ARGV0}/targets/TARGET_STM/pinmap.c
+            ${ARGV0}/targets/TARGET_STM/port_api.c
+            ${ARGV0}/targets/TARGET_STM/pwmout_api.c
+            ${ARGV0}/targets/TARGET_STM/qspi_api.c
+            ${ARGV0}/targets/TARGET_STM/reset_reason.c
+            ${ARGV0}/targets/TARGET_STM/rtc_api.c
+            ${ARGV0}/targets/TARGET_STM/rtc_api_hal.h
+            ${ARGV0}/targets/TARGET_STM/serial_api.c
+            ${ARGV0}/targets/TARGET_STM/serial_api_hal.h
+            ${ARGV0}/targets/TARGET_STM/sleep.c
+            ${ARGV0}/targets/TARGET_STM/stm32_assert.h
+            ${ARGV0}/targets/TARGET_STM/stm_spi_api.c
+            ${ARGV0}/targets/TARGET_STM/trng_api.c
+            ${ARGV0}/targets/TARGET_STM/us_ticker.c
+            ${ARGV0}/targets/TARGET_STM/us_ticker_defines.h
+            ${ARGV0}/targets/TARGET_STM/watchdog_api.c
+            mbed_config.h
+            )
+endmacro()
+
+macro(target_include_directories_mbed)
+    target_include_directories(${ARGV0} PRIVATE
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/Legacy
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW
+            ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7
+            ${ARGV1}/targets/TARGET_STM
+            ${ARGV1}/storage/kvstore/tdbstore/include/tdbstore
+            ${ARGV1}/storage/kvstore/tdbstore/include
+            ${ARGV1}/storage/kvstore/tdbstore
+            ${ARGV1}/storage/kvstore/securestore/include/securestore
+            ${ARGV1}/storage/kvstore/securestore/include
+            ${ARGV1}/storage/kvstore/securestore
+            ${ARGV1}/storage/kvstore/kvstore_global_api/include/kvstore_global_api
+            ${ARGV1}/storage/kvstore/kvstore_global_api/include
+            ${ARGV1}/storage/kvstore/kvstore_global_api
+            ${ARGV1}/storage/kvstore/kv_config/include/kv_config
+            ${ARGV1}/storage/kvstore/kv_config/include
+            ${ARGV1}/storage/kvstore/kv_config
+            ${ARGV1}/storage/kvstore/include/kvstore
+            ${ARGV1}/storage/kvstore/include
+            ${ARGV1}/storage/kvstore/filesystemstore/include/filesystemstore
+            ${ARGV1}/storage/kvstore/filesystemstore/include
+            ${ARGV1}/storage/kvstore/filesystemstore
+            ${ARGV1}/storage/kvstore/direct_access_devicekey/include/direct_access_devicekey
+            ${ARGV1}/storage/kvstore/direct_access_devicekey/include
+            ${ARGV1}/storage/kvstore/direct_access_devicekey
+            ${ARGV1}/storage/kvstore
+            ${ARGV1}/storage/filesystem/littlefsv2/littlefs
+            ${ARGV1}/storage/filesystem/littlefsv2/include/littlefsv2
+            ${ARGV1}/storage/filesystem/littlefsv2/include
+            ${ARGV1}/storage/filesystem/littlefsv2
+            ${ARGV1}/storage/filesystem/littlefs/littlefs
+            ${ARGV1}/storage/filesystem/littlefs/include/littlefs
+            ${ARGV1}/storage/filesystem/littlefs/include
+            ${ARGV1}/storage/filesystem/littlefs
+            ${ARGV1}/storage/filesystem/include/filesystem
+            ${ARGV1}/storage/filesystem/include
+            ${ARGV1}/storage/filesystem/fat/include/fat
+            ${ARGV1}/storage/filesystem/fat/include
+            ${ARGV1}/storage/filesystem/fat/ChaN
+            ${ARGV1}/storage/filesystem/fat
+            ${ARGV1}/storage/filesystem
+            ${ARGV1}/storage/blockdevice/include/blockdevice/internal
+            ${ARGV1}/storage/blockdevice/include/blockdevice
+            ${ARGV1}/storage/blockdevice/include
+            ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP/include/FlashIAP
+            ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP/include
+            ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP
+            ${ARGV1}/storage/blockdevice
+            ${ARGV1}/storage
+            ${ARGV1}/rtos/source
+            ${ARGV1}/rtos/include/rtos/internal
+            ${ARGV1}/rtos/include/rtos
+            ${ARGV1}/rtos/include
+            ${ARGV1}/rtos
+            ${ARGV1}/platform/source/minimal-printf
+            ${ARGV1}/platform/source
+            ${ARGV1}/platform/randlib/include/mbed-client-randlib/platform
+            ${ARGV1}/platform/randlib/include/mbed-client-randlib
+            ${ARGV1}/platform/randlib/include
+            ${ARGV1}/platform/randlib
+            ${ARGV1}/platform/mbed-trace/include/mbed-trace
+            ${ARGV1}/platform/mbed-trace/include
+            ${ARGV1}/platform/mbed-trace
+            ${ARGV1}/platform/include/platform/internal
+            ${ARGV1}/platform/include/platform
+            ${ARGV1}/platform/include
+            ${ARGV1}/platform/cxxsupport
+            ${ARGV1}/platform
+            ${ARGV1}/hal/usb/include/usb
+            ${ARGV1}/hal/usb/include
+            ${ARGV1}/hal/usb
+            ${ARGV1}/hal/include/hal
+            ${ARGV1}/hal/include
+            ${ARGV1}/hal
+            ${ARGV1}/features/frameworks/utest/utest
+            ${ARGV1}/features/frameworks/utest
+            ${ARGV1}/features/frameworks/unity/unity
+            ${ARGV1}/features/frameworks/unity
+            ${ARGV1}/features/frameworks/mbed-client-cli/mbed-client-cli
+            ${ARGV1}/features/frameworks/mbed-client-cli
+            ${ARGV1}/features/frameworks/greentea-client/greentea-client
+            ${ARGV1}/features/frameworks/greentea-client
+            ${ARGV1}/features/frameworks
+            ${ARGV1}/features
+            ${ARGV1}/events/include/events/internal
+            ${ARGV1}/events/include/events
+            ${ARGV1}/events/include
+            ${ARGV1}/events
+            ${ARGV1}/drivers/usb/include/usb/internal
+            ${ARGV1}/drivers/usb/include/usb
+            ${ARGV1}/drivers/usb/include
+            ${ARGV1}/drivers/usb
+            ${ARGV1}/drivers/include/drivers/interfaces
+            ${ARGV1}/drivers/include/drivers
+            ${ARGV1}/drivers/include
+            ${ARGV1}/drivers/device_key/include/device_key
+            ${ARGV1}/drivers/device_key/include
+            ${ARGV1}/drivers/device_key
+            ${ARGV1}/drivers
+            ${ARGV1}/connectivity/nfc/libraries/stack/transceiver
+            ${ARGV1}/connectivity/nfc/libraries/stack/tech/type4
+            ${ARGV1}/connectivity/nfc/libraries/stack/tech/isodep
+            ${ARGV1}/connectivity/nfc/libraries/stack/tech/iso7816
+            ${ARGV1}/connectivity/nfc/libraries/stack/tech
+            ${ARGV1}/connectivity/nfc/libraries/stack/platform
+            ${ARGV1}/connectivity/nfc/libraries/stack/ndef
+            ${ARGV1}/connectivity/nfc/libraries/stack
+            ${ARGV1}/connectivity/nfc/libraries/acore/acore
+            ${ARGV1}/connectivity/nfc/libraries/acore
+            ${ARGV1}/connectivity/nfc/libraries
+            ${ARGV1}/connectivity/nfc/include/nfc/ndef/common
+            ${ARGV1}/connectivity/nfc/include/nfc/ndef
+            ${ARGV1}/connectivity/nfc/include/nfc
+            ${ARGV1}/connectivity/nfc/include
+            ${ARGV1}/connectivity/nfc
+            ${ARGV1}/connectivity/netsocket/include/netsocket
+            ${ARGV1}/connectivity/netsocket/include
+            ${ARGV1}/connectivity/netsocket
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libNET/src
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libNET
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libDHCPv6
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/ipv6_stack
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/configs/base
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/configs
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/whiteboard
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/utils
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/random_early_detection
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/pan_blacklist
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/nist_aes_kw
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/nd_proxy
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mle_service
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/stack
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/serial
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/poll
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/mdns
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/dns
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port/cpu
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port/compiler
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mac_neighbor_table
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/load_balance
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/ieee_802_11
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/hmac
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/fnv_hash
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/fhss
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/etx
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/blacklist
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/Trickle
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/Neighbor_cache
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/tls_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/radius_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/msg_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/key_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/gkh_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/fwh_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/eap_tls_sec_prot
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/kmp
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/eapol
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/TLS
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/PANA
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/Common
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/RPL
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/NWK_INTERFACE/Include
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/NWK_INTERFACE
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MPL
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MLE
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC/virtual_rf
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC/IEEE802_15_4
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/DHCPv6_client
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/DHCPv6_Server
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Core/include
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Core
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Common_Protocols
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/BorderRouter
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/ws
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Thread
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/NVM
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/ND
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Mesh
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/MAC
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/IPHC_Decode
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Fragmentation
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Bootstraps
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/nanostack/platform
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/nanostack
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/source
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/nanostack-event-loop/platform
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/nanostack-event-loop
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop
+            ${ARGV1}/connectivity/nanostack/sal-stack-nanostack
+            ${ARGV1}/connectivity/nanostack/nanostack-hal-mbed-cmsis-rtos
+            ${ARGV1}/connectivity/nanostack/mbed-mesh-api/source/include
+            ${ARGV1}/connectivity/nanostack/mbed-mesh-api/source
+            ${ARGV1}/connectivity/nanostack/mbed-mesh-api/mbed-mesh-api
+            ${ARGV1}/connectivity/nanostack/mbed-mesh-api
+            ${ARGV1}/connectivity/nanostack/include/nanostack-interface
+            ${ARGV1}/connectivity/nanostack/include
+            ${ARGV1}/connectivity/nanostack/coap-service/source/include
+            ${ARGV1}/connectivity/nanostack/coap-service/source
+            ${ARGV1}/connectivity/nanostack/coap-service/coap-service
+            ${ARGV1}/connectivity/nanostack/coap-service
+            ${ARGV1}/connectivity/nanostack
+            ${ARGV1}/connectivity/mbedtls/source
+            ${ARGV1}/connectivity/mbedtls/platform/inc
+            ${ARGV1}/connectivity/mbedtls/platform
+            ${ARGV1}/connectivity/mbedtls/include/mbedtls
+            ${ARGV1}/connectivity/mbedtls/include
+            ${ARGV1}/connectivity/mbedtls
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/netif
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip/prot
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip/priv
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/sys
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/net
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/arpa
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat
+            ${ARGV1}/connectivity/lwipstack/lwip/src/include
+            ${ARGV1}/connectivity/lwipstack/lwip/src
+            ${ARGV1}/connectivity/lwipstack/lwip-sys/arch
+            ${ARGV1}/connectivity/lwipstack/lwip-sys
+            ${ARGV1}/connectivity/lwipstack/lwip
+            ${ARGV1}/connectivity/lwipstack/include/lwipstack
+            ${ARGV1}/connectivity/lwipstack/include
+            ${ARGV1}/connectivity/lwipstack
+            ${ARGV1}/connectivity/lorawan/system
+            ${ARGV1}/connectivity/lorawan/lorastack/phy
+            ${ARGV1}/connectivity/lorawan/lorastack/mac
+            ${ARGV1}/connectivity/lorawan/lorastack
+            ${ARGV1}/connectivity/lorawan/include/lorawan
+            ${ARGV1}/connectivity/lorawan/include
+            ${ARGV1}/connectivity/lorawan
+            ${ARGV1}/connectivity/libraries/ppp/include/ppp
+            ${ARGV1}/connectivity/libraries/ppp/include/polarssl
+            ${ARGV1}/connectivity/libraries/ppp/include
+            ${ARGV1}/connectivity/libraries/ppp
+            ${ARGV1}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform
+            ${ARGV1}/connectivity/libraries/nanostack-libservice/mbed-client-libservice
+            ${ARGV1}/connectivity/libraries/nanostack-libservice
+            ${ARGV1}/connectivity/libraries/mbed-coap/source/include
+            ${ARGV1}/connectivity/libraries/mbed-coap/source
+            ${ARGV1}/connectivity/libraries/mbed-coap/mbed-coap
+            ${ARGV1}/connectivity/libraries/mbed-coap
+            ${ARGV1}/connectivity/libraries
+            ${ARGV1}/connectivity/drivers/wifi/esp8266-driver/ESP8266
+            ${ARGV1}/connectivity/drivers/wifi/esp8266-driver
+            ${ARGV1}/connectivity/drivers/wifi
+            ${ARGV1}/connectivity/drivers/nfc/PN512/source/transceiver
+            ${ARGV1}/connectivity/drivers/nfc/PN512/source
+            ${ARGV1}/connectivity/drivers/nfc/PN512/include/nfc/controllers
+            ${ARGV1}/connectivity/drivers/nfc/PN512/include/nfc
+            ${ARGV1}/connectivity/drivers/nfc/PN512/include
+            ${ARGV1}/connectivity/drivers/nfc/PN512
+            ${ARGV1}/connectivity/drivers/nfc
+            ${ARGV1}/connectivity/drivers/mbedtls/TARGET_STM
+            ${ARGV1}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742
+            ${ARGV1}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7
+            ${ARGV1}/connectivity/drivers/emac/TARGET_STM
+            ${ARGV1}/connectivity/drivers/cellular/UBLOX/PPP
+            ${ARGV1}/connectivity/drivers/cellular/UBLOX/N2XX
+            ${ARGV1}/connectivity/drivers/cellular/UBLOX/AT
+            ${ARGV1}/connectivity/drivers/cellular/UBLOX
+            ${ARGV1}/connectivity/drivers/cellular/TELIT/ME910
+            ${ARGV1}/connectivity/drivers/cellular/TELIT/ME310
+            ${ARGV1}/connectivity/drivers/cellular/TELIT/HE910
+            ${ARGV1}/connectivity/drivers/cellular/TELIT
+            ${ARGV1}/connectivity/drivers/cellular/RiotMicro/AT
+            ${ARGV1}/connectivity/drivers/cellular/RiotMicro
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/UG96
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/M26
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/EC2X
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/BG96
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL/BC95
+            ${ARGV1}/connectivity/drivers/cellular/QUECTEL
+            ${ARGV1}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP
+            ${ARGV1}/connectivity/drivers/cellular/MultiTech/DragonflyNano
+            ${ARGV1}/connectivity/drivers/cellular/MultiTech
+            ${ARGV1}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP
+            ${ARGV1}/connectivity/drivers/cellular/GENERIC
+            ${ARGV1}/connectivity/drivers/cellular/GEMALTO/CINTERION
+            ${ARGV1}/connectivity/drivers/cellular/GEMALTO
+            ${ARGV1}/connectivity/drivers/cellular/Altair/ALT1250/PPP
+            ${ARGV1}/connectivity/drivers/cellular/Altair/ALT1250
+            ${ARGV1}/connectivity/drivers/cellular/Altair
+            ${ARGV1}/connectivity/drivers/cellular
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/stm-s2lp-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/mcr20a-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/atmel-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver
+            ${ARGV1}/connectivity/drivers/802.15.4_RF
+            ${ARGV1}/connectivity/drivers
+            ${ARGV1}/connectivity/cellular/include/cellular/framework/device
+            ${ARGV1}/connectivity/cellular/include/cellular/framework/common
+            ${ARGV1}/connectivity/cellular/include/cellular/framework/AT
+            ${ARGV1}/connectivity/cellular/include/cellular/framework/API
+            ${ARGV1}/connectivity/cellular/include/cellular/framework
+            ${ARGV1}/connectivity/cellular/include/cellular
+            ${ARGV1}/connectivity/cellular/include
+            ${ARGV1}/connectivity/cellular
+            ${ARGV1}/connectivity
+            ${ARGV1}/cmsis/device/rtos/include
+            ${ARGV1}/cmsis/device/rtos
+            ${ARGV1}/cmsis/device/RTE/include
+            ${ARGV1}/cmsis/device/RTE
+            ${ARGV1}/cmsis/device
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include1
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/Include
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2
+            ${ARGV1}/cmsis/CMSIS_5/CMSIS
+            ${ARGV1}/cmsis/CMSIS_5
+            ${ARGV1}/cmsis
+            ${ARGV1}
+            )
+
+endmacro()
diff --git a/onert-micro/tests/mbed-os/mbed_config.h b/onert-micro/tests/mbed-os/mbed_config.h
new file mode 100644 (file)
index 0000000..5649f46
--- /dev/null
@@ -0,0 +1,488 @@
+/*
+ * mbed SDK
+ * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Automatically generated configuration file.
+// DO NOT EDIT, content will be overwritten.
+
+#ifndef __MBED_CONFIG_DATA__
+#define __MBED_CONFIG_DATA__
+
+// Configuration parameters
+#define CLOCK_SOURCE USE_PLL_HSE_EXTC | USE_PLL_HSI         // set by target:MCU_STM32H7
+#define HSE_VALUE 8000000                                   // set by target:NUCLEO_H743ZI2
+#define LPTICKER_DELAY_TICKS 0                              // set by target:MCU_STM32H7
+#define MBED_CONF_ALT1250_PPP_BAUDRATE 115200               // set by library:ALT1250_PPP
+#define MBED_CONF_ALT1250_PPP_PROVIDE_DEFAULT 0             // set by library:ALT1250_PPP
+#define MBED_CONF_ATMEL_RF_ASSUME_SPACED_SPI 1              // set by library:atmel-rf[STM]
+#define MBED_CONF_ATMEL_RF_FULL_SPI_SPEED 7500000           // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_FULL_SPI_SPEED_BYTE_SPACING 250  // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_IRQ_THREAD_STACK_SIZE 1024       // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_LOW_SPI_SPEED 3750000            // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_PROVIDE_DEFAULT 0                // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_USE_SPI_SPACING_API 0            // set by library:atmel-rf
+#define MBED_CONF_CELLULAR_AT_HANDLER_BUFFER_SIZE 32        // set by library:cellular
+#define MBED_CONF_CELLULAR_CONTROL_PLANE_OPT 0              // set by library:cellular
+#define MBED_CONF_CELLULAR_DEBUG_AT 0                       // set by library:cellular
+#define MBED_CONF_CELLULAR_MAX_CP_DATA_RECV_LEN 1358        // set by library:cellular
+#define MBED_CONF_CELLULAR_PRESENT 1                        // set by library:cellular
+#define MBED_CONF_CELLULAR_RANDOM_MAX_START_DELAY 0         // set by library:cellular
+#define MBED_CONF_CELLULAR_USE_APN_LOOKUP 0                 // set by library:cellular
+#define MBED_CONF_CELLULAR_USE_SMS 0                        // set by library:cellular
+#define MBED_CONF_DRIVERS_OSPI_CSN OSPI_FLASH1_CSN          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_DQS OSPI_FLASH1_DQS          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO0 OSPI_FLASH1_IO0          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO1 OSPI_FLASH1_IO1          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO2 OSPI_FLASH1_IO2          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO3 OSPI_FLASH1_IO3          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO4 OSPI_FLASH1_IO4          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO5 OSPI_FLASH1_IO5          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO6 OSPI_FLASH1_IO6          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO7 OSPI_FLASH1_IO7          // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_SCK OSPI_FLASH1_SCK          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_CSN QSPI_FLASH1_CSN          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO0 QSPI_FLASH1_IO0          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO1 QSPI_FLASH1_IO1          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO2 QSPI_FLASH1_IO2          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO3 QSPI_FLASH1_IO3          // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_SCK QSPI_FLASH1_SCK          // set by library:drivers
+#define MBED_CONF_DRIVERS_UART_SERIAL_RXBUF_SIZE 256        // set by library:drivers
+#define MBED_CONF_DRIVERS_UART_SERIAL_TXBUF_SIZE 256        // set by library:drivers
+#define MBED_CONF_ESP8266_BUILT_IN_DNS 0                    // set by library:esp8266
+#define MBED_CONF_ESP8266_DEBUG 0                           // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_OFF_TIME_MS 3               // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_ON_POLARITY 0               // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_ON_TIME_MS 3                // set by library:esp8266
+#define MBED_CONF_ESP8266_PROVIDE_DEFAULT 0                 // set by library:esp8266
+#define MBED_CONF_ESP8266_SERIAL_BAUDRATE 115200            // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_ENABLE 0                     // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER0 ""                   // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER1 ""                   // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER2 ""                   // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_TIMEZONE 0                   // set by library:esp8266
+#define MBED_CONF_ESP8266_SOCKET_BUFSIZE 8192               // set by library:esp8266
+#define MBED_CONF_EVENTS_PRESENT 1                          // set by library:events
+#define MBED_CONF_EVENTS_SHARED_DISPATCH_FROM_APPLICATION 0 // set by library:events
+#define MBED_CONF_EVENTS_SHARED_EVENTSIZE 768               // set by library:events
+#define MBED_CONF_EVENTS_SHARED_HIGHPRIO_EVENTSIZE 256      // set by library:events
+#define MBED_CONF_EVENTS_SHARED_HIGHPRIO_STACKSIZE 1024     // set by library:events
+#define MBED_CONF_EVENTS_SHARED_STACKSIZE 2048              // set by library:events
+#define MBED_CONF_EVENTS_USE_LOWPOWER_TIMER_TICKER 0        // set by library:events
+#define MBED_CONF_FAT_CHAN_FFS_DBG 0                        // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_CODE_PAGE 437                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_EXFAT 0                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_HEAPBUF 1                  // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_LOCK 0                     // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_MINIMIZE 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_NOFSINFO 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_NORTC 0                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_READONLY 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_REENTRANT 0                // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_RPATH 1                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_TIMEOUT 1000               // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_TINY 1                     // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_LFN_BUF 255                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_LFN_UNICODE 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MAX_LFN 255                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MAX_SS 4096                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MIN_SS 512                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MULTI_PARTITION 0             // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_MDAY 1                  // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_MON 1                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_YEAR 2017               // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_PRINT_FLOAT 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_PRINT_LLI 0                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_SFN_BUF 12                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_STRF_ENCODE 3                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_STR_VOLUME_ID 0               // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_SYNC_T HANDLE                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_CHMOD 0                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_EXPAND 0                  // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FASTSEEK 0                // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FIND 0                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FORWARD 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_LABEL 0                   // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_LFN 3                     // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_MKFS 1                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_STRFUNC 0                 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_TRIM 1                    // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_VOLUMES 4                     // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_VOLUME_STRS \
+  "RAM", "NAND", "CF", "SD", "SD2", "USB", "USB2", "USB3" // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FLUSH_ON_NEW_CLUSTER 0         // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FLUSH_ON_NEW_SECTOR 1          // set by library:fat_chan
+#define MBED_CONF_FILESYSTEM_PRESENT 1                    // set by library:filesystem
+#define MBED_CONF_FLASHIAP_BLOCK_DEVICE_BASE_ADDRESS \
+  0xFFFFFFFF                                          // set by library:flashiap-block-device
+#define MBED_CONF_FLASHIAP_BLOCK_DEVICE_SIZE 0        // set by library:flashiap-block-device
+#define MBED_CONF_GEMALTO_CINTERION_BAUDRATE 115200   // set by library:GEMALTO_CINTERION
+#define MBED_CONF_GEMALTO_CINTERION_PROVIDE_DEFAULT 0 // set by library:GEMALTO_CINTERION
+#define MBED_CONF_GENERIC_AT3GPP_BAUDRATE 115200      // set by library:GENERIC_AT3GPP
+#define MBED_CONF_GENERIC_AT3GPP_PROVIDE_DEFAULT 0    // set by library:GENERIC_AT3GPP
+#define MBED_CONF_LORA_ADR_ON 1                       // set by library:lora
+#define MBED_CONF_LORA_APPLICATION_EUI             \
+  {                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  } // set by library:lora
+#define MBED_CONF_LORA_APPLICATION_KEY                                                             \
+  {                                                                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  } // set by library:lora
+#define MBED_CONF_LORA_APPSKEY                                                                     \
+  {                                                                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  }                                               // set by library:lora
+#define MBED_CONF_LORA_APP_PORT 15                // set by library:lora
+#define MBED_CONF_LORA_AUTOMATIC_UPLINK_MESSAGE 1 // set by library:lora
+#define MBED_CONF_LORA_DEVICE_ADDRESS 0x00000000  // set by library:lora
+#define MBED_CONF_LORA_DEVICE_EUI                  \
+  {                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  }                                               // set by library:lora
+#define MBED_CONF_LORA_DOWNLINK_PREAMBLE_LENGTH 5 // set by library:lora
+#define MBED_CONF_LORA_DUTY_CYCLE_ON 1            // set by library:lora
+#define MBED_CONF_LORA_DUTY_CYCLE_ON_JOIN 1       // set by library:lora
+#define MBED_CONF_LORA_FSB_MASK            \
+  {                                        \
+    0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x00FF \
+  } // set by library:lora
+#define MBED_CONF_LORA_FSB_MASK_CHINA              \
+  {                                                \
+    0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF \
+  }                                       // set by library:lora
+#define MBED_CONF_LORA_LBT_ON 0           // set by library:lora
+#define MBED_CONF_LORA_MAX_SYS_RX_ERROR 5 // set by library:lora
+#define MBED_CONF_LORA_NB_TRIALS 12       // set by library:lora
+#define MBED_CONF_LORA_NWKSKEY                                                                     \
+  {                                                                                                \
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+  }                                                               // set by library:lora
+#define MBED_CONF_LORA_OVER_THE_AIR_ACTIVATION 1                  // set by library:lora
+#define MBED_CONF_LORA_PHY EU868                                  // set by library:lora
+#define MBED_CONF_LORA_PUBLIC_NETWORK 1                           // set by library:lora
+#define MBED_CONF_LORA_TX_MAX_SIZE 64                             // set by library:lora
+#define MBED_CONF_LORA_UPLINK_PREAMBLE_LENGTH 8                   // set by library:lora
+#define MBED_CONF_LORA_WAKEUP_TIME 5                              // set by library:lora
+#define MBED_CONF_LWIP_ADDR_TIMEOUT 5                             // set by library:lwip
+#define MBED_CONF_LWIP_ADDR_TIMEOUT_MODE 1                        // set by library:lwip
+#define MBED_CONF_LWIP_DEBUG_ENABLED 0                            // set by library:lwip
+#define MBED_CONF_LWIP_DEFAULT_THREAD_STACKSIZE 512               // set by library:lwip
+#define MBED_CONF_LWIP_DHCP_TIMEOUT 60                            // set by library:lwip
+#define MBED_CONF_LWIP_ENABLE_PPP_TRACE 0                         // set by library:lwip
+#define MBED_CONF_LWIP_ETHERNET_ENABLED 1                         // set by library:lwip
+#define MBED_CONF_LWIP_IPV4_ENABLED 1                             // set by library:lwip
+#define MBED_CONF_LWIP_IPV6_ENABLED 0                             // set by library:lwip
+#define MBED_CONF_LWIP_IP_VER_PREF 4                              // set by library:lwip
+#define MBED_CONF_LWIP_L3IP_ENABLED 0                             // set by library:lwip
+#define MBED_CONF_LWIP_MBOX_SIZE 8                                // set by library:lwip
+#define MBED_CONF_LWIP_MEMP_NUM_TCPIP_MSG_INPKT 8                 // set by library:lwip
+#define MBED_CONF_LWIP_MEMP_NUM_TCP_SEG 16                        // set by library:lwip
+#define MBED_CONF_LWIP_MEM_SIZE 2310                              // set by library:lwip[STM]
+#define MBED_CONF_LWIP_ND6_QUEUEING 0                             // set by library:lwip
+#define MBED_CONF_LWIP_ND6_RDNSS_MAX_DNS_SERVERS 0                // set by library:lwip
+#define MBED_CONF_LWIP_NUM_NETBUF 8                               // set by library:lwip
+#define MBED_CONF_LWIP_NUM_PBUF 8                                 // set by library:lwip
+#define MBED_CONF_LWIP_PBUF_POOL_SIZE 5                           // set by library:lwip
+#define MBED_CONF_LWIP_PPP_ENABLED 0                              // set by library:lwip
+#define MBED_CONF_LWIP_PPP_IPV4_ENABLED 0                         // set by library:lwip
+#define MBED_CONF_LWIP_PPP_IPV6_ENABLED 0                         // set by library:lwip
+#define MBED_CONF_LWIP_PPP_THREAD_STACKSIZE 768                   // set by library:lwip
+#define MBED_CONF_LWIP_PRESENT 1                                  // set by library:lwip
+#define MBED_CONF_LWIP_RAW_SOCKET_ENABLED 0                       // set by library:lwip
+#define MBED_CONF_LWIP_SOCKET_MAX 4                               // set by library:lwip
+#define MBED_CONF_LWIP_TCPIP_THREAD_PRIORITY osPriorityNormal     // set by library:lwip
+#define MBED_CONF_LWIP_TCPIP_THREAD_STACKSIZE 1200                // set by library:lwip
+#define MBED_CONF_LWIP_TCP_CLOSE_TIMEOUT 1000                     // set by library:lwip
+#define MBED_CONF_LWIP_TCP_ENABLED 1                              // set by library:lwip
+#define MBED_CONF_LWIP_TCP_MAXRTX 6                               // set by library:lwip
+#define MBED_CONF_LWIP_TCP_MSS 536                                // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SERVER_MAX 4                           // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SND_BUF (2 * TCP_MSS)                  // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SOCKET_MAX 4                           // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SYNMAXRTX 6                            // set by library:lwip
+#define MBED_CONF_LWIP_TCP_WND (4 * TCP_MSS)                      // set by library:lwip
+#define MBED_CONF_LWIP_UDP_SOCKET_MAX 4                           // set by library:lwip
+#define MBED_CONF_LWIP_USE_MBED_TRACE 0                           // set by library:lwip
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL 0              // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL_MASK 0x7fff800 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL_PAGE 0         // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_DEVICE_TYPE \
+  NET_6LOWPAN_ROUTER                                           // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PANID_FILTER 0xffff // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PSK_KEY                                                 \
+  {                                                                                                \
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf \
+  }                                                                  // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PSK_KEY_ID 1              // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_SECURITY_MODE NONE        // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_SEC_LEVEL 5               // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_HEAP_SIZE 32500                      // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_HEAP_STAT_INFO NULL                  // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_MAC_NEIGH_TABLE_SIZE 32              // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_COUNT 3                 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_IMAX 30                 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_IMIN 20                 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_SYSTEM_TIME_UPDATE_FROM_NANOSTACK 1  // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL 22             // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL_MASK 0x7fff800 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL_PAGE 0         // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_COMMISSIONING_DATASET_TIMESTAMP \
+  0x10000 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_EXTENDED_PANID \
+  {                                                          \
+    0xf1, 0xb5, 0xa1, 0xb2, 0xc4, 0xd5, 0xa1, 0xbd           \
+  } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_ML_PREFIX \
+  {                                                     \
+    0xfd, 0x0, 0x0d, 0xb8, 0x0, 0x0, 0x0, 0x0           \
+  } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_NETWORK_NAME \
+  "Thread Network"                                         // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_PANID 0x0700 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_PSKC                                                 \
+  {                                                                                                \
+    0xc8, 0xa6, 0x2e, 0xae, 0xf3, 0x68, 0xf3, 0x46, 0xa9, 0x9e, 0x57, 0x85, 0x98, 0x9d, 0x1c, 0xd0 \
+  } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_DEVICE_TYPE \
+  MESH_DEVICE_TYPE_THREAD_ROUTER // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_MASTER_KEY                                                  \
+  {                                                                                                \
+    0x10, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff \
+  }                                                             // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_PSKD "ABCDEFGH"          // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_SECURITY_POLICY 255      // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_USE_STATIC_LINK_CONFIG 1 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_USE_MALLOC_FOR_HEAP 0           // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_CHANNEL_FUNCTION 255   // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_DWELL_INTERVAL 0       // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_FIXED_CHANNEL 65535    // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_INTERVAL 0             // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_CHANNEL_PLAN_ID 255       // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_DEVICE_TYPE \
+  MESH_DEVICE_TYPE_WISUN_ROUTER                                     // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_NETWORK_NAME "Wi-SUN Network" // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_OPERATING_CLASS 255           // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_OPERATING_MODE 255            // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_PHY_MODE_ID 255               // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_REGULATORY_DOMAIN 3           // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_CHANNEL_FUNCTION 255       // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_DWELL_INTERVAL 255         // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_FIXED_CHANNEL 65535        // set by library:mbed-mesh-api
+#define MBED_CONF_MCR20A_PROVIDE_DEFAULT 0                          // set by library:mcr20a
+#define MBED_CONF_NANOSTACK_CONFIGURATION nanostack_full            // set by library:nanostack
+#define MBED_CONF_NANOSTACK_HAL_CRITICAL_SECTION_USABLE_FROM_INTERRUPT \
+  0 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_DISPATCH_FROM_APPLICATION \
+  0                                                               // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_THREAD_STACK_SIZE 6144 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_USE_MBED_EVENTS 0      // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_KVSTORE_PATH "/kv/"               // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_USE_KVSTORE 0                     // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_LIBSERVICE_NSDYNMEM_TRACKER_ENABLED \
+  0                                                          // set by library:nanostack-libservice
+#define MBED_CONF_NANOSTACK_LIBSERVICE_PRESENT 1             // set by library:nanostack-libservice
+#define MBED_CONF_NSAPI_ADD_EVENT_LISTENER_RETURN_CHANGE 0   // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_MESH_TYPE THREAD             // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_STACK LWIP                   // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_WIFI_SECURITY NONE           // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_ADDRESSES_LIMIT 10               // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_CACHE_SIZE 3                     // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_RESPONSE_WAIT_TIME 10000         // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_RETRIES 1                        // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_TOTAL_ATTEMPTS 10                // set by library:nsapi
+#define MBED_CONF_NSAPI_PRESENT 1                            // set by library:nsapi
+#define MBED_CONF_NSAPI_SOCKET_STATS_ENABLED 0               // set by library:nsapi
+#define MBED_CONF_NSAPI_SOCKET_STATS_MAX_COUNT 10            // set by library:nsapi
+#define MBED_CONF_PLATFORM_CALLBACK_COMPARABLE 1             // set by library:platform
+#define MBED_CONF_PLATFORM_CALLBACK_NONTRIVIAL 0             // set by library:platform
+#define MBED_CONF_PLATFORM_CRASH_CAPTURE_ENABLED 0           // set by library:platform
+#define MBED_CONF_PLATFORM_CTHUNK_COUNT_MAX 8                // set by library:platform
+#define MBED_CONF_PLATFORM_DEEPSLEEP_STATS_VERBOSE 0         // set by library:platform[STM]
+#define MBED_CONF_PLATFORM_DEFAULT_SERIAL_BAUD_RATE 9600     // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_ALL_THREADS_INFO 0          // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_FILENAME_CAPTURE_ENABLED 0  // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_HIST_ENABLED 0              // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_HIST_SIZE 4                 // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_REBOOT_MAX 1                // set by library:platform
+#define MBED_CONF_PLATFORM_FATAL_ERROR_AUTO_REBOOT_ENABLED 0 // set by library:platform
+#define MBED_CONF_PLATFORM_MAX_ERROR_FILENAME_LEN 16         // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_ENABLE_64_BIT 1    // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_ENABLE_FLOATING_POINT 0 // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_SET_FLOATING_POINT_MAX_DECIMALS \
+  6                                                              // set by library:platform
+#define MBED_CONF_PLATFORM_POLL_USE_LOWPOWER_TIMER 0             // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_BAUD_RATE 9600                  // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_BUFFERED_SERIAL 0               // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_CONVERT_NEWLINES 1              // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_CONVERT_TTY_NEWLINES 1          // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_FLUSH_AT_EXIT 1                 // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_MINIMAL_CONSOLE_ONLY 0          // set by library:platform
+#define MBED_CONF_PLATFORM_USE_MPU 1                             // set by library:platform
+#define MBED_CONF_PPP_ENABLED 0                                  // set by library:ppp
+#define MBED_CONF_PPP_ENABLE_TRACE 0                             // set by library:ppp
+#define MBED_CONF_PPP_IPV4_ENABLED 1                             // set by library:ppp
+#define MBED_CONF_PPP_IPV6_ENABLED 0                             // set by library:ppp
+#define MBED_CONF_PPP_MBED_EVENT_QUEUE 0                         // set by library:ppp
+#define MBED_CONF_PPP_THREAD_STACKSIZE 816                       // set by library:ppp
+#define MBED_CONF_QUECTEL_BC95_BAUDRATE 9600                     // set by library:QUECTEL_BC95
+#define MBED_CONF_QUECTEL_BC95_PROVIDE_DEFAULT 0                 // set by library:QUECTEL_BC95
+#define MBED_CONF_QUECTEL_BG96_BAUDRATE 115200                   // set by library:QUECTEL_BG96
+#define MBED_CONF_QUECTEL_BG96_PROVIDE_DEFAULT 0                 // set by library:QUECTEL_BG96
+#define MBED_CONF_QUECTEL_EC2X_BAUDRATE 115200                   // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_EC2X_PROVIDE_DEFAULT 0                 // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_EC2X_START_TIMEOUT 15000               // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_M26_BAUDRATE 115200                    // set by library:QUECTEL_M26
+#define MBED_CONF_QUECTEL_M26_PROVIDE_DEFAULT 0                  // set by library:QUECTEL_M26
+#define MBED_CONF_QUECTEL_UG96_BAUDRATE 115200                   // set by library:QUECTEL_UG96
+#define MBED_CONF_QUECTEL_UG96_PROVIDE_DEFAULT 0                 // set by library:QUECTEL_UG96
+#define MBED_CONF_RM1000_AT_BAUDRATE 230400                      // set by library:RM1000_AT
+#define MBED_CONF_RM1000_AT_PROVIDE_DEFAULT 0                    // set by library:RM1000_AT
+#define MBED_CONF_RTOS_API_PRESENT 1                             // set by library:rtos-api
+#define MBED_CONF_RTOS_ENABLE_ALL_RTX_EVENTS 0                   // set by library:rtos
+#define MBED_CONF_RTOS_EVFLAGS_NUM 0                             // set by library:rtos
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE 512                // set by library:rtos
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE_DEBUG_EXTRA 128    // set by library:rtos[STM]
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE_TICKLESS_EXTRA 256 // set by library:rtos
+#define MBED_CONF_RTOS_MAIN_THREAD_STACK_SIZE 4096               // set by library:rtos
+#define MBED_CONF_RTOS_MSGQUEUE_DATA_SIZE 0                      // set by library:rtos
+#define MBED_CONF_RTOS_MSGQUEUE_NUM 0                            // set by library:rtos
+#define MBED_CONF_RTOS_MUTEX_NUM 0                               // set by library:rtos
+#define MBED_CONF_RTOS_PRESENT 1                                 // set by library:rtos
+#define MBED_CONF_RTOS_SEMAPHORE_NUM 0                           // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_NUM 0                              // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_STACK_SIZE 4096                    // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_USER_STACK_SIZE 0                  // set by library:rtos
+#define MBED_CONF_RTOS_TIMER_NUM 0                               // set by library:rtos
+#define MBED_CONF_RTOS_TIMER_THREAD_STACK_SIZE 768               // set by library:rtos
+#define MBED_CONF_S2LP_PROVIDE_DEFAULT 0                         // set by library:s2lp
+#define MBED_CONF_SARA4_PPP_BAUDRATE 115200                      // set by library:SARA4_PPP
+#define MBED_CONF_SARA4_PPP_PROVIDE_DEFAULT 0                    // set by library:SARA4_PPP
+#define MBED_CONF_STM32_EMAC_ETH_PHY_ADDRESS 0                   // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_AUTONEGOTIATION \
+  ETH_AUTONEGOTIATION_ENABLE                                        // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_DUPLEXMODE ETH_MODE_FULLDUPLEX // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_DUPLEX_STATUS 0x0010           // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_MEDIA_INTERFACE \
+  ETH_MEDIA_INTERFACE_RMII                                   // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_RESET_DELAY 500         // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_SPEED ETH_SPEED_100M    // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_SPEED_STATUS 0x0004     // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_STATUS_REGISTER 31      // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_RXBUFNB 4                   // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_TXBUFNB 10                  // set by library:stm32-emac[STM32H7]
+#define MBED_CONF_STM32_EMAC_THREAD_STACKSIZE 1024           // set by library:stm32-emac
+#define MBED_CONF_STORAGE_DEFAULT_KV kv                      // set by library:storage
+#define MBED_CONF_STORAGE_FILESYSTEM_BLOCKDEVICE default     // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_EXTERNAL_BASE_ADDRESS 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_EXTERNAL_SIZE 0         // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_FILESYSTEM default      // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_FOLDER_PATH kvstore     // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_INTERNAL_BASE_ADDRESS 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_MOUNT_POINT kv          // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_BLOCKDEVICE \
+  default // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_EXTERNAL_BASE_ADDRESS \
+  0 // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_EXTERNAL_SIZE \
+  0 // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_FILESYSTEM \
+  default // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_FOLDER_PATH \
+  kvstore // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_MOUNT_POINT \
+  kv                                                     // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_RBP_INTERNAL_SIZE 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_STORAGE_TYPE TDB_INTERNAL      // set by library:storage[NUCLEO_H743ZI2]
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_BLOCKDEVICE default // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_EXTERNAL_BASE_ADDRESS \
+  0                                                    // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_EXTERNAL_SIZE 0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_INTERNAL_BASE_ADDRESS \
+  0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_BLOCKDEVICE \
+  default // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_EXTERNAL_BASE_ADDRESS \
+  0 // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_EXTERNAL_SIZE \
+  0 // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_RBP_INTERNAL_SIZE 0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_INTERNAL_INTERNAL_BASE_ADDRESS \
+  0                                                       // set by library:storage_tdb_internal
+#define MBED_CONF_STORAGE_TDB_INTERNAL_INTERNAL_SIZE 0    // set by library:storage_tdb_internal
+#define MBED_CONF_TARGET_BOOT_STACK_SIZE 0x400            // set by library:rtos[*]
+#define MBED_CONF_TARGET_CONSOLE_UART 1                   // set by target:Target
+#define MBED_CONF_TARGET_CUSTOM_TICKERS 1                 // set by target:Target
+#define MBED_CONF_TARGET_DEEP_SLEEP_LATENCY 4             // set by target:MCU_STM32
+#define MBED_CONF_TARGET_DEFAULT_ADC_VREF NAN             // set by target:Target
+#define MBED_CONF_TARGET_GPIO_RESET_AT_INIT 0             // set by target:MCU_STM32
+#define MBED_CONF_TARGET_I2C_TIMING_VALUE_ALGO 0          // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT 1         // set by target:MCU_STM32
+#define MBED_CONF_TARGET_INTERNAL_FLASH_UNIFORM_SECTORS 1 // set by target:Target
+#define MBED_CONF_TARGET_LPTICKER_LPTIM 1                 // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_LPTICKER_LPTIM_CLOCK 1           // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LPUART_CLOCK_SOURCE \
+  USE_LPUART_CLK_LSE | USE_LPUART_CLK_PCLK1 | USE_LPUART_CLK_PCLK3 // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LSE_AVAILABLE 1                           // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LSE_DRIVE_LOAD_LEVEL RCC_LSEDRIVE_LOW     // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_MPU_ROM_END 0x0fffffff                    // set by target:Target
+#define MBED_CONF_TARGET_NETWORK_DEFAULT_INTERFACE_TYPE ETHERNET   // set by target:NUCLEO_H743ZI2
+#define MBED_CONF_TARGET_RTC_CLOCK_SOURCE USE_RTC_CLK_LSE_OR_LSI   // set by target:MCU_STM32
+#define MBED_CONF_TARGET_SYSTEM_POWER_SUPPLY PWR_LDO_SUPPLY        // set by target:MCU_STM32H743xI
+#define MBED_CONF_TARGET_TICKLESS_FROM_US_TICKER 0                 // set by target:Target
+#define MBED_CONF_TARGET_XIP_ENABLE 0                              // set by target:Target
+#define MBED_CONF_TELIT_HE910_BAUDRATE 115200                      // set by library:TELIT_HE910
+#define MBED_CONF_TELIT_HE910_PROVIDE_DEFAULT 0                    // set by library:TELIT_HE910
+#define MBED_CONF_TELIT_ME310_BAUDRATE 115200                      // set by library:TELIT_ME310
+#define MBED_CONF_TELIT_ME310_PROVIDE_DEFAULT 0                    // set by library:TELIT_ME310
+#define MBED_CONF_TELIT_ME910_BAUDRATE 115200                      // set by library:TELIT_ME910
+#define MBED_CONF_TELIT_ME910_PROVIDE_DEFAULT 0                    // set by library:TELIT_ME910
+#define MBED_CONF_UBLOX_AT_BAUDRATE 115200                         // set by library:UBLOX_AT
+#define MBED_CONF_UBLOX_AT_PROVIDE_DEFAULT 0                       // set by library:UBLOX_AT
+#define MBED_CONF_UBLOX_N2XX_BAUDRATE 9600                         // set by library:UBLOX_N2XX
+#define MBED_CONF_UBLOX_N2XX_PROVIDE_DEFAULT 0                     // set by library:UBLOX_N2XX
+#define MBED_CONF_UBLOX_PPP_BAUDRATE 115200                        // set by library:UBLOX_PPP
+#define MBED_CONF_UBLOX_PPP_PROVIDE_DEFAULT 0                      // set by library:UBLOX_PPP
+#define MBED_CRC_TABLE_SIZE 16                                     // set by library:drivers
+#define MBED_LFS2_BLOCK_CYCLES 1024                                // set by library:littlefs2
+#define MBED_LFS2_BLOCK_SIZE 512                                   // set by library:littlefs2
+#define MBED_LFS2_CACHE_SIZE 64                                    // set by library:littlefs2
+#define MBED_LFS2_ENABLE_INFO 0                                    // set by library:littlefs2
+#define MBED_LFS2_INTRINSICS 1                                     // set by library:littlefs2
+#define MBED_LFS2_LOOKAHEAD_SIZE 64                                // set by library:littlefs2
+#define MBED_LFS_BLOCK_SIZE 512                                    // set by library:littlefs
+#define MBED_LFS_ENABLE_INFO 0                                     // set by library:littlefs
+#define MBED_LFS_INTRINSICS 1                                      // set by library:littlefs
+#define MBED_LFS_LOOKAHEAD 512                                     // set by library:littlefs
+#define MBED_LFS_PROG_SIZE 64                                      // set by library:littlefs
+#define MBED_LFS_READ_SIZE 64                                      // set by library:littlefs
+#define MBED_STACK_DUMP_ENABLED 0                                  // set by library:platform
+#define MBED_TRACE_COLOR_THEME 0                                   // set by library:mbed-trace
+#define MEM_ALLOC malloc                                           // set by library:mbed-trace
+#define MEM_FREE free                                              // set by library:mbed-trace
+#define PPP_DEBUG 0                                                // set by library:ppp
+#define STM32_D11_SPI_ETHERNET_PIN PB_5                            // set by target:NUCLEO_H743ZI2
+// Macros
+#define MBEDTLS_CIPHER_MODE_CTR // defined by library:SecureStore
+#define NSAPI_PPP_AVAILABLE \
+  (MBED_CONF_PPP_ENABLED || MBED_CONF_LWIP_PPP_ENABLED) // defined by library:ppp
+#define NSDYNMEM_TRACKER_ENABLED \
+  MBED_CONF_NANOSTACK_LIBSERVICE_NSDYNMEM_TRACKER_ENABLED // defined by library:nanostack-libservice
+#define NS_USE_EXTERNAL_MBED_TLS                          // defined by library:nanostack
+#define UNITY_INCLUDE_CONFIG_H                            // defined by library:utest
+#define _RTE_                                             // defined by library:rtos
+
+#endif
diff --git a/onert-micro/tests/mbed-os/startup_stm32h743xx.S b/onert-micro/tests/mbed-os/startup_stm32h743xx.S
new file mode 100644 (file)
index 0000000..b978ae1
--- /dev/null
@@ -0,0 +1,675 @@
+.syntax unified
+.cpu cortex-m7
+.fpu softvfp
+.thumb
+
+.global  g_pfnVectors
+.global  Default_Handler
+
+.word  _sidata
+.word  _sdata
+.word  _edata
+.word  _sbss
+.word  _ebss
+.section  .text.Reset_Handler
+.weak  Reset_Handler
+.type  Reset_Handler, %function
+
+Reset_Handler:
+  ldr   sp, =_estack
+  bl  main
+
+CopyDataInit:
+  ldr  r3, =_sidata
+  ldr  r3, [r3, r1]
+  str  r3, [r0, r1]
+  adds  r1, r1, #4
+
+LoopCopyDataInit:
+  ldr  r0, =_sdata
+  ldr  r3, =_edata
+  adds  r2, r0, r1
+  cmp  r2, r3
+  bcc  CopyDataInit
+  ldr  r2, =_sbss
+  b  LoopFillZerobss
+
+FillZerobss:
+  movs  r3, #0
+  str  r3, [r2], #4
+
+LoopFillZerobss:
+  ldr  r3, = _ebss
+  cmp  r2, r3
+  bcc  FillZerobss
+
+  bl _start
+  bx lr
+
+.size  Reset_Handler, .-Reset_Handler
+
+.section  .text.Default_Handler,"ax",%progbits
+
+Default_Handler:
+Infinite_Loop:
+  b  Infinite_Loop
+  .size  Default_Handler, .-Default_Handler
+   .section  .isr_vector,"a",%progbits
+  .type  g_pfnVectors, %object
+  .size  g_pfnVectors, .-g_pfnVectors
+
+
+g_pfnVectors:
+  .word  _estack
+  .word  Reset_Handler
+
+  .word  NMI_Handler
+  .word  HardFault_Handler
+  .word  MemManage_Handler
+  .word  BusFault_Handler
+  .word  UsageFault_Handler
+  .word  0
+  .word  0
+  .word  0
+  .word  0
+  .word  SVC_Handler
+  .word  DebugMon_Handler
+  .word  0
+  .word  PendSV_Handler
+  .word  SysTick_Handler
+
+  /* External Interrupts */
+  .word     WWDG_IRQHandler                   /* Window WatchDog              */
+  .word     PVD_AVD_IRQHandler                /* PVD/AVD through EXTI Line detection */
+  .word     TAMP_STAMP_IRQHandler             /* Tamper and TimeStamps through the EXTI line */
+  .word     RTC_WKUP_IRQHandler               /* RTC Wakeup through the EXTI line */
+  .word     FLASH_IRQHandler                  /* FLASH                        */
+  .word     RCC_IRQHandler                    /* RCC                          */
+  .word     EXTI0_IRQHandler                  /* EXTI Line0                   */
+  .word     EXTI1_IRQHandler                  /* EXTI Line1                   */
+  .word     EXTI2_IRQHandler                  /* EXTI Line2                   */
+  .word     EXTI3_IRQHandler                  /* EXTI Line3                   */
+  .word     EXTI4_IRQHandler                  /* EXTI Line4                   */
+  .word     DMA1_Stream0_IRQHandler           /* DMA1 Stream 0                */
+  .word     DMA1_Stream1_IRQHandler           /* DMA1 Stream 1                */
+  .word     DMA1_Stream2_IRQHandler           /* DMA1 Stream 2                */
+  .word     DMA1_Stream3_IRQHandler           /* DMA1 Stream 3                */
+  .word     DMA1_Stream4_IRQHandler           /* DMA1 Stream 4                */
+  .word     DMA1_Stream5_IRQHandler           /* DMA1 Stream 5                */
+  .word     DMA1_Stream6_IRQHandler           /* DMA1 Stream 6                */
+  .word     ADC_IRQHandler                    /* ADC1, ADC2 and ADC3s         */
+  .word     FDCAN1_IT0_IRQHandler             /* FDCAN1 interrupt line 0      */
+  .word     FDCAN2_IT0_IRQHandler             /* FDCAN2 interrupt line 0      */
+  .word     FDCAN1_IT1_IRQHandler             /* FDCAN1 interrupt line 1      */
+  .word     FDCAN2_IT1_IRQHandler             /* FDCAN2 interrupt line 1      */
+  .word     EXTI9_5_IRQHandler                /* External Line[9:5]s          */
+  .word     TIM1_BRK_IRQHandler               /* TIM1 Break interrupt         */
+  .word     TIM1_UP_IRQHandler                /* TIM1 Update interrupt        */
+  .word     TIM1_TRG_COM_IRQHandler           /* TIM1 Trigger and Commutation interrupt */
+  .word     TIM1_CC_IRQHandler                /* TIM1 Capture Compare         */
+  .word     TIM2_IRQHandler                   /* TIM2                         */
+  .word     TIM3_IRQHandler                   /* TIM3                         */
+  .word     TIM4_IRQHandler                   /* TIM4                         */
+  .word     I2C1_EV_IRQHandler                /* I2C1 Event                   */
+  .word     I2C1_ER_IRQHandler                /* I2C1 Error                   */
+  .word     I2C2_EV_IRQHandler                /* I2C2 Event                   */
+  .word     I2C2_ER_IRQHandler                /* I2C2 Error                   */
+  .word     SPI1_IRQHandler                   /* SPI1                         */
+  .word     SPI2_IRQHandler                   /* SPI2                         */
+  .word     USART1_IRQHandler                 /* USART1                       */
+  .word     USART2_IRQHandler                 /* USART2                       */
+  .word     USART3_IRQHandler                 /* USART3                       */
+  .word     EXTI15_10_IRQHandler              /* External Line[15:10]s        */
+  .word     RTC_Alarm_IRQHandler              /* RTC Alarm (A and B) through EXTI Line */
+  .word     0                                 /* Reserved                     */
+  .word     TIM8_BRK_TIM12_IRQHandler         /* TIM8 Break and TIM12         */
+  .word     TIM8_UP_TIM13_IRQHandler          /* TIM8 Update and TIM13        */
+  .word     TIM8_TRG_COM_TIM14_IRQHandler     /* TIM8 Trigger and Commutation and TIM14 */
+  .word     TIM8_CC_IRQHandler                /* TIM8 Capture Compare         */
+  .word     DMA1_Stream7_IRQHandler           /* DMA1 Stream7                 */
+  .word     FMC_IRQHandler                    /* FMC                          */
+  .word     SDMMC1_IRQHandler                 /* SDMMC1                       */
+  .word     TIM5_IRQHandler                   /* TIM5                         */
+  .word     SPI3_IRQHandler                   /* SPI3                         */
+  .word     UART4_IRQHandler                  /* UART4                        */
+  .word     UART5_IRQHandler                  /* UART5                        */
+  .word     TIM6_DAC_IRQHandler               /* TIM6 and DAC1&2 underrun errors */
+  .word     TIM7_IRQHandler                   /* TIM7                         */
+  .word     DMA2_Stream0_IRQHandler           /* DMA2 Stream 0                */
+  .word     DMA2_Stream1_IRQHandler           /* DMA2 Stream 1                */
+  .word     DMA2_Stream2_IRQHandler           /* DMA2 Stream 2                */
+  .word     DMA2_Stream3_IRQHandler           /* DMA2 Stream 3                */
+  .word     DMA2_Stream4_IRQHandler           /* DMA2 Stream 4                */
+  .word     ETH_IRQHandler                    /* Ethernet                     */
+  .word     ETH_WKUP_IRQHandler               /* Ethernet Wakeup through EXTI line */
+  .word     FDCAN_CAL_IRQHandler              /* FDCAN calibration unit interrupt*/
+  .word     0                                 /* Reserved                     */
+  .word     0                                 /* Reserved                     */
+  .word     0                                 /* Reserved                     */
+  .word     0                                 /* Reserved                     */
+  .word     DMA2_Stream5_IRQHandler           /* DMA2 Stream 5                */
+  .word     DMA2_Stream6_IRQHandler           /* DMA2 Stream 6                */
+  .word     DMA2_Stream7_IRQHandler           /* DMA2 Stream 7                */
+  .word     USART6_IRQHandler                 /* USART6                       */
+  .word     I2C3_EV_IRQHandler                /* I2C3 event                   */
+  .word     I2C3_ER_IRQHandler                /* I2C3 error                   */
+  .word     OTG_HS_EP1_OUT_IRQHandler         /* USB OTG HS End Point 1 Out   */
+  .word     OTG_HS_EP1_IN_IRQHandler          /* USB OTG HS End Point 1 In    */
+  .word     OTG_HS_WKUP_IRQHandler            /* USB OTG HS Wakeup through EXTI */
+  .word     OTG_HS_IRQHandler                 /* USB OTG HS                   */
+  .word     DCMI_IRQHandler                   /* DCMI                         */
+  .word     0                                 /* Reserved                     */
+  .word     RNG_IRQHandler                    /* Rng                          */
+  .word     FPU_IRQHandler                    /* FPU                          */
+  .word     UART7_IRQHandler                  /* UART7                        */
+  .word     UART8_IRQHandler                  /* UART8                        */
+  .word     SPI4_IRQHandler                   /* SPI4                         */
+  .word     SPI5_IRQHandler                   /* SPI5                         */
+  .word     SPI6_IRQHandler                   /* SPI6                         */
+  .word     SAI1_IRQHandler                   /* SAI1                         */
+  .word     LTDC_IRQHandler                   /* LTDC                         */
+  .word     LTDC_ER_IRQHandler                /* LTDC error                   */
+  .word     DMA2D_IRQHandler                  /* DMA2D                        */
+  .word     SAI2_IRQHandler                   /* SAI2                         */
+  .word     QUADSPI_IRQHandler                /* QUADSPI                      */
+  .word     LPTIM1_IRQHandler                 /* LPTIM1                       */
+  .word     CEC_IRQHandler                    /* HDMI_CEC                     */
+  .word     I2C4_EV_IRQHandler                /* I2C4 Event                   */
+  .word     I2C4_ER_IRQHandler                /* I2C4 Error                   */
+  .word     SPDIF_RX_IRQHandler               /* SPDIF_RX                     */
+  .word     OTG_FS_EP1_OUT_IRQHandler         /* USB OTG FS End Point 1 Out   */
+  .word     OTG_FS_EP1_IN_IRQHandler          /* USB OTG FS End Point 1 In    */
+  .word     OTG_FS_WKUP_IRQHandler            /* USB OTG FS Wakeup through EXTI */
+  .word     OTG_FS_IRQHandler                 /* USB OTG FS                   */
+  .word     DMAMUX1_OVR_IRQHandler            /* DMAMUX1 Overrun interrupt    */
+  .word     HRTIM1_Master_IRQHandler          /* HRTIM Master Timer global Interrupt */
+  .word     HRTIM1_TIMA_IRQHandler            /* HRTIM Timer A global Interrupt */
+  .word     HRTIM1_TIMB_IRQHandler            /* HRTIM Timer B global Interrupt */
+  .word     HRTIM1_TIMC_IRQHandler            /* HRTIM Timer C global Interrupt */
+  .word     HRTIM1_TIMD_IRQHandler            /* HRTIM Timer D global Interrupt */
+  .word     HRTIM1_TIME_IRQHandler            /* HRTIM Timer E global Interrupt */
+  .word     HRTIM1_FLT_IRQHandler             /* HRTIM Fault global Interrupt   */
+  .word     DFSDM1_FLT0_IRQHandler            /* DFSDM Filter0 Interrupt        */
+  .word     DFSDM1_FLT1_IRQHandler            /* DFSDM Filter1 Interrupt        */
+  .word     DFSDM1_FLT2_IRQHandler            /* DFSDM Filter2 Interrupt        */
+  .word     DFSDM1_FLT3_IRQHandler            /* DFSDM Filter3 Interrupt        */
+  .word     SAI3_IRQHandler                   /* SAI3 global Interrupt          */
+  .word     SWPMI1_IRQHandler                 /* Serial Wire Interface 1 global interrupt */
+  .word     TIM15_IRQHandler                  /* TIM15 global Interrupt      */
+  .word     TIM16_IRQHandler                  /* TIM16 global Interrupt      */
+  .word     TIM17_IRQHandler                  /* TIM17 global Interrupt      */
+  .word     MDIOS_WKUP_IRQHandler             /* MDIOS Wakeup  Interrupt     */
+  .word     MDIOS_IRQHandler                  /* MDIOS global Interrupt      */
+  .word     JPEG_IRQHandler                   /* JPEG global Interrupt       */
+  .word     MDMA_IRQHandler                   /* MDMA global Interrupt       */
+  .word     0                                 /* Reserved                    */
+  .word     SDMMC2_IRQHandler                 /* SDMMC2 global Interrupt     */
+  .word     HSEM1_IRQHandler                  /* HSEM1 global Interrupt      */
+  .word     0                                 /* Reserved                    */
+  .word     ADC3_IRQHandler                   /* ADC3 global Interrupt       */
+  .word     DMAMUX2_OVR_IRQHandler            /* DMAMUX Overrun interrupt    */
+  .word     BDMA_Channel0_IRQHandler          /* BDMA Channel 0 global Interrupt */
+  .word     BDMA_Channel1_IRQHandler          /* BDMA Channel 1 global Interrupt */
+  .word     BDMA_Channel2_IRQHandler          /* BDMA Channel 2 global Interrupt */
+  .word     BDMA_Channel3_IRQHandler          /* BDMA Channel 3 global Interrupt */
+  .word     BDMA_Channel4_IRQHandler          /* BDMA Channel 4 global Interrupt */
+  .word     BDMA_Channel5_IRQHandler          /* BDMA Channel 5 global Interrupt */
+  .word     BDMA_Channel6_IRQHandler          /* BDMA Channel 6 global Interrupt */
+  .word     BDMA_Channel7_IRQHandler          /* BDMA Channel 7 global Interrupt */
+  .word     COMP1_IRQHandler                  /* COMP1 global Interrupt     */
+  .word     LPTIM2_IRQHandler                 /* LP TIM2 global interrupt   */
+  .word     LPTIM3_IRQHandler                 /* LP TIM3 global interrupt   */
+  .word     LPTIM4_IRQHandler                 /* LP TIM4 global interrupt   */
+  .word     LPTIM5_IRQHandler                 /* LP TIM5 global interrupt   */
+  .word     LPUART1_IRQHandler                /* LP UART1 interrupt         */
+  .word     0                                 /* Reserved                   */
+  .word     CRS_IRQHandler                    /* Clock Recovery Global Interrupt */
+  .word     ECC_IRQHandler                    /* ECC diagnostic Global Interrupt */
+  .word     SAI4_IRQHandler                   /* SAI4 global interrupt      */
+  .word     0                                 /* Reserved                   */
+  .word     0                                 /* Reserved                   */
+  .word     WAKEUP_PIN_IRQHandler             /* Interrupt for all 6 wake-up pins */
+
+   .weak      NMI_Handler
+   .thumb_set NMI_Handler,Default_Handler
+
+   .weak      HardFault_Handler
+   .thumb_set HardFault_Handler,Default_Handler
+
+   .weak      MemManage_Handler
+   .thumb_set MemManage_Handler,Default_Handler
+
+   .weak      BusFault_Handler
+   .thumb_set BusFault_Handler,Default_Handler
+
+   .weak      UsageFault_Handler
+   .thumb_set UsageFault_Handler,Default_Handler
+
+   .weak      SVC_Handler
+   .thumb_set SVC_Handler,Default_Handler
+
+   .weak      DebugMon_Handler
+   .thumb_set DebugMon_Handler,Default_Handler
+
+   .weak      PendSV_Handler
+   .thumb_set PendSV_Handler,Default_Handler
+
+   .weak      SysTick_Handler
+   .thumb_set SysTick_Handler,Default_Handler
+
+   .weak      WWDG_IRQHandler
+   .thumb_set WWDG_IRQHandler,Default_Handler
+
+   .weak      PVD_AVD_IRQHandler
+   .thumb_set PVD_AVD_IRQHandler,Default_Handler
+
+   .weak      TAMP_STAMP_IRQHandler
+   .thumb_set TAMP_STAMP_IRQHandler,Default_Handler
+
+   .weak      RTC_WKUP_IRQHandler
+   .thumb_set RTC_WKUP_IRQHandler,Default_Handler
+
+   .weak      FLASH_IRQHandler
+   .thumb_set FLASH_IRQHandler,Default_Handler
+
+   .weak      RCC_IRQHandler
+   .thumb_set RCC_IRQHandler,Default_Handler
+
+   .weak      EXTI0_IRQHandler
+   .thumb_set EXTI0_IRQHandler,Default_Handler
+
+   .weak      EXTI1_IRQHandler
+   .thumb_set EXTI1_IRQHandler,Default_Handler
+
+   .weak      EXTI2_IRQHandler
+   .thumb_set EXTI2_IRQHandler,Default_Handler
+
+   .weak      EXTI3_IRQHandler
+   .thumb_set EXTI3_IRQHandler,Default_Handler
+
+   .weak      EXTI4_IRQHandler
+   .thumb_set EXTI4_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream0_IRQHandler
+   .thumb_set DMA1_Stream0_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream1_IRQHandler
+   .thumb_set DMA1_Stream1_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream2_IRQHandler
+   .thumb_set DMA1_Stream2_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream3_IRQHandler
+   .thumb_set DMA1_Stream3_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream4_IRQHandler
+   .thumb_set DMA1_Stream4_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream5_IRQHandler
+   .thumb_set DMA1_Stream5_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream6_IRQHandler
+   .thumb_set DMA1_Stream6_IRQHandler,Default_Handler
+
+   .weak      ADC_IRQHandler
+   .thumb_set ADC_IRQHandler,Default_Handler
+
+   .weak      FDCAN1_IT0_IRQHandler
+   .thumb_set FDCAN1_IT0_IRQHandler,Default_Handler
+
+   .weak      FDCAN2_IT0_IRQHandler
+   .thumb_set FDCAN2_IT0_IRQHandler,Default_Handler
+
+   .weak      FDCAN1_IT1_IRQHandler
+   .thumb_set FDCAN1_IT1_IRQHandler,Default_Handler
+
+   .weak      FDCAN2_IT1_IRQHandler
+   .thumb_set FDCAN2_IT1_IRQHandler,Default_Handler
+
+   .weak      EXTI9_5_IRQHandler
+   .thumb_set EXTI9_5_IRQHandler,Default_Handler
+
+   .weak      TIM1_BRK_IRQHandler
+   .thumb_set TIM1_BRK_IRQHandler,Default_Handler
+
+   .weak      TIM1_UP_IRQHandler
+   .thumb_set TIM1_UP_IRQHandler,Default_Handler
+
+   .weak      TIM1_TRG_COM_IRQHandler
+   .thumb_set TIM1_TRG_COM_IRQHandler,Default_Handler
+
+   .weak      TIM1_CC_IRQHandler
+   .thumb_set TIM1_CC_IRQHandler,Default_Handler
+
+   .weak      TIM2_IRQHandler
+   .thumb_set TIM2_IRQHandler,Default_Handler
+
+   .weak      TIM3_IRQHandler
+   .thumb_set TIM3_IRQHandler,Default_Handler
+
+   .weak      TIM4_IRQHandler
+   .thumb_set TIM4_IRQHandler,Default_Handler
+
+   .weak      I2C1_EV_IRQHandler
+   .thumb_set I2C1_EV_IRQHandler,Default_Handler
+
+   .weak      I2C1_ER_IRQHandler
+   .thumb_set I2C1_ER_IRQHandler,Default_Handler
+
+   .weak      I2C2_EV_IRQHandler
+   .thumb_set I2C2_EV_IRQHandler,Default_Handler
+
+   .weak      I2C2_ER_IRQHandler
+   .thumb_set I2C2_ER_IRQHandler,Default_Handler
+
+   .weak      SPI1_IRQHandler
+   .thumb_set SPI1_IRQHandler,Default_Handler
+
+   .weak      SPI2_IRQHandler
+   .thumb_set SPI2_IRQHandler,Default_Handler
+
+   .weak      USART1_IRQHandler
+   .thumb_set USART1_IRQHandler,Default_Handler
+
+   .weak      USART2_IRQHandler
+   .thumb_set USART2_IRQHandler,Default_Handler
+
+   .weak      USART3_IRQHandler
+   .thumb_set USART3_IRQHandler,Default_Handler
+
+   .weak      EXTI15_10_IRQHandler
+   .thumb_set EXTI15_10_IRQHandler,Default_Handler
+
+   .weak      RTC_Alarm_IRQHandler
+   .thumb_set RTC_Alarm_IRQHandler,Default_Handler
+
+   .weak      TIM8_BRK_TIM12_IRQHandler
+   .thumb_set TIM8_BRK_TIM12_IRQHandler,Default_Handler
+
+   .weak      TIM8_UP_TIM13_IRQHandler
+   .thumb_set TIM8_UP_TIM13_IRQHandler,Default_Handler
+
+   .weak      TIM8_TRG_COM_TIM14_IRQHandler
+   .thumb_set TIM8_TRG_COM_TIM14_IRQHandler,Default_Handler
+
+   .weak      TIM8_CC_IRQHandler
+   .thumb_set TIM8_CC_IRQHandler,Default_Handler
+
+   .weak      DMA1_Stream7_IRQHandler
+   .thumb_set DMA1_Stream7_IRQHandler,Default_Handler
+
+   .weak      FMC_IRQHandler
+   .thumb_set FMC_IRQHandler,Default_Handler
+
+   .weak      SDMMC1_IRQHandler
+   .thumb_set SDMMC1_IRQHandler,Default_Handler
+
+   .weak      TIM5_IRQHandler
+   .thumb_set TIM5_IRQHandler,Default_Handler
+
+   .weak      SPI3_IRQHandler
+   .thumb_set SPI3_IRQHandler,Default_Handler
+
+   .weak      UART4_IRQHandler
+   .thumb_set UART4_IRQHandler,Default_Handler
+
+   .weak      UART5_IRQHandler
+   .thumb_set UART5_IRQHandler,Default_Handler
+
+   .weak      TIM6_DAC_IRQHandler
+   .thumb_set TIM6_DAC_IRQHandler,Default_Handler
+
+   .weak      TIM7_IRQHandler
+   .thumb_set TIM7_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream0_IRQHandler
+   .thumb_set DMA2_Stream0_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream1_IRQHandler
+   .thumb_set DMA2_Stream1_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream2_IRQHandler
+   .thumb_set DMA2_Stream2_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream3_IRQHandler
+   .thumb_set DMA2_Stream3_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream4_IRQHandler
+   .thumb_set DMA2_Stream4_IRQHandler,Default_Handler
+
+   .weak      ETH_IRQHandler
+   .thumb_set ETH_IRQHandler,Default_Handler
+
+   .weak      ETH_WKUP_IRQHandler
+   .thumb_set ETH_WKUP_IRQHandler,Default_Handler
+
+   .weak      FDCAN_CAL_IRQHandler
+   .thumb_set FDCAN_CAL_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream5_IRQHandler
+   .thumb_set DMA2_Stream5_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream6_IRQHandler
+   .thumb_set DMA2_Stream6_IRQHandler,Default_Handler
+
+   .weak      DMA2_Stream7_IRQHandler
+   .thumb_set DMA2_Stream7_IRQHandler,Default_Handler
+
+   .weak      USART6_IRQHandler
+   .thumb_set USART6_IRQHandler,Default_Handler
+
+   .weak      I2C3_EV_IRQHandler
+   .thumb_set I2C3_EV_IRQHandler,Default_Handler
+
+   .weak      I2C3_ER_IRQHandler
+   .thumb_set I2C3_ER_IRQHandler,Default_Handler
+
+   .weak      OTG_HS_EP1_OUT_IRQHandler
+   .thumb_set OTG_HS_EP1_OUT_IRQHandler,Default_Handler
+
+   .weak      OTG_HS_EP1_IN_IRQHandler
+   .thumb_set OTG_HS_EP1_IN_IRQHandler,Default_Handler
+
+   .weak      OTG_HS_WKUP_IRQHandler
+   .thumb_set OTG_HS_WKUP_IRQHandler,Default_Handler
+
+   .weak      OTG_HS_IRQHandler
+   .thumb_set OTG_HS_IRQHandler,Default_Handler
+
+   .weak      DCMI_IRQHandler
+   .thumb_set DCMI_IRQHandler,Default_Handler
+
+   .weak      RNG_IRQHandler
+   .thumb_set RNG_IRQHandler,Default_Handler
+
+   .weak      FPU_IRQHandler
+   .thumb_set FPU_IRQHandler,Default_Handler
+
+   .weak      UART7_IRQHandler
+   .thumb_set UART7_IRQHandler,Default_Handler
+
+   .weak      UART8_IRQHandler
+   .thumb_set UART8_IRQHandler,Default_Handler
+
+   .weak      SPI4_IRQHandler
+   .thumb_set SPI4_IRQHandler,Default_Handler
+
+   .weak      SPI5_IRQHandler
+   .thumb_set SPI5_IRQHandler,Default_Handler
+
+   .weak      SPI6_IRQHandler
+   .thumb_set SPI6_IRQHandler,Default_Handler
+
+   .weak      SAI1_IRQHandler
+   .thumb_set SAI1_IRQHandler,Default_Handler
+
+   .weak      LTDC_IRQHandler
+   .thumb_set LTDC_IRQHandler,Default_Handler
+
+   .weak      LTDC_ER_IRQHandler
+   .thumb_set LTDC_ER_IRQHandler,Default_Handler
+
+   .weak      DMA2D_IRQHandler
+   .thumb_set DMA2D_IRQHandler,Default_Handler
+
+   .weak      SAI2_IRQHandler
+   .thumb_set SAI2_IRQHandler,Default_Handler
+
+   .weak      QUADSPI_IRQHandler
+   .thumb_set QUADSPI_IRQHandler,Default_Handler
+
+   .weak      LPTIM1_IRQHandler
+   .thumb_set LPTIM1_IRQHandler,Default_Handler
+
+   .weak      CEC_IRQHandler
+   .thumb_set CEC_IRQHandler,Default_Handler
+
+   .weak      I2C4_EV_IRQHandler
+   .thumb_set I2C4_EV_IRQHandler,Default_Handler
+
+   .weak      I2C4_ER_IRQHandler
+   .thumb_set I2C4_ER_IRQHandler,Default_Handler
+
+   .weak      SPDIF_RX_IRQHandler
+   .thumb_set SPDIF_RX_IRQHandler,Default_Handler
+
+   .weak      OTG_FS_EP1_OUT_IRQHandler
+   .thumb_set OTG_FS_EP1_OUT_IRQHandler,Default_Handler
+
+   .weak      OTG_FS_EP1_IN_IRQHandler
+   .thumb_set OTG_FS_EP1_IN_IRQHandler,Default_Handler
+
+   .weak      OTG_FS_WKUP_IRQHandler
+   .thumb_set OTG_FS_WKUP_IRQHandler,Default_Handler
+
+   .weak      OTG_FS_IRQHandler
+   .thumb_set OTG_FS_IRQHandler,Default_Handler
+
+   .weak      DMAMUX1_OVR_IRQHandler
+   .thumb_set DMAMUX1_OVR_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_Master_IRQHandler
+   .thumb_set HRTIM1_Master_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIMA_IRQHandler
+   .thumb_set HRTIM1_TIMA_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIMB_IRQHandler
+   .thumb_set HRTIM1_TIMB_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIMC_IRQHandler
+   .thumb_set HRTIM1_TIMC_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIMD_IRQHandler
+   .thumb_set HRTIM1_TIMD_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_TIME_IRQHandler
+   .thumb_set HRTIM1_TIME_IRQHandler,Default_Handler
+
+   .weak      HRTIM1_FLT_IRQHandler
+   .thumb_set HRTIM1_FLT_IRQHandler,Default_Handler
+
+   .weak      DFSDM1_FLT0_IRQHandler
+   .thumb_set DFSDM1_FLT0_IRQHandler,Default_Handler
+
+   .weak      DFSDM1_FLT1_IRQHandler
+   .thumb_set DFSDM1_FLT1_IRQHandler,Default_Handler
+
+   .weak      DFSDM1_FLT2_IRQHandler
+   .thumb_set DFSDM1_FLT2_IRQHandler,Default_Handler
+
+   .weak      DFSDM1_FLT3_IRQHandler
+   .thumb_set DFSDM1_FLT3_IRQHandler,Default_Handler
+
+   .weak      SAI3_IRQHandler
+   .thumb_set SAI3_IRQHandler,Default_Handler
+
+   .weak      SWPMI1_IRQHandler
+   .thumb_set SWPMI1_IRQHandler,Default_Handler
+
+   .weak      TIM15_IRQHandler
+   .thumb_set TIM15_IRQHandler,Default_Handler
+
+   .weak      TIM16_IRQHandler
+   .thumb_set TIM16_IRQHandler,Default_Handler
+
+   .weak      TIM17_IRQHandler
+   .thumb_set TIM17_IRQHandler,Default_Handler
+
+   .weak      MDIOS_WKUP_IRQHandler
+   .thumb_set MDIOS_WKUP_IRQHandler,Default_Handler
+
+   .weak      MDIOS_IRQHandler
+   .thumb_set MDIOS_IRQHandler,Default_Handler
+
+   .weak      JPEG_IRQHandler
+   .thumb_set JPEG_IRQHandler,Default_Handler
+
+   .weak      MDMA_IRQHandler
+   .thumb_set MDMA_IRQHandler,Default_Handler
+
+   .weak      SDMMC2_IRQHandler
+   .thumb_set SDMMC2_IRQHandler,Default_Handler
+
+   .weak      HSEM1_IRQHandler
+   .thumb_set HSEM1_IRQHandler,Default_Handler
+
+   .weak      ADC3_IRQHandler
+   .thumb_set ADC3_IRQHandler,Default_Handler
+
+   .weak      DMAMUX2_OVR_IRQHandler
+   .thumb_set DMAMUX2_OVR_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel0_IRQHandler
+   .thumb_set BDMA_Channel0_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel1_IRQHandler
+   .thumb_set BDMA_Channel1_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel2_IRQHandler
+   .thumb_set BDMA_Channel2_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel3_IRQHandler
+   .thumb_set BDMA_Channel3_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel4_IRQHandler
+   .thumb_set BDMA_Channel4_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel5_IRQHandler
+   .thumb_set BDMA_Channel5_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel6_IRQHandler
+   .thumb_set BDMA_Channel6_IRQHandler,Default_Handler
+
+   .weak      BDMA_Channel7_IRQHandler
+   .thumb_set BDMA_Channel7_IRQHandler,Default_Handler
+
+   .weak      COMP1_IRQHandler
+   .thumb_set COMP1_IRQHandler,Default_Handler
+
+   .weak      LPTIM2_IRQHandler
+   .thumb_set LPTIM2_IRQHandler,Default_Handler
+
+   .weak      LPTIM3_IRQHandler
+   .thumb_set LPTIM3_IRQHandler,Default_Handler
+
+   .weak      LPTIM4_IRQHandler
+   .thumb_set LPTIM4_IRQHandler,Default_Handler
+
+   .weak      LPTIM5_IRQHandler
+   .thumb_set LPTIM5_IRQHandler,Default_Handler
+
+   .weak      LPUART1_IRQHandler
+   .thumb_set LPUART1_IRQHandler,Default_Handler
+
+   .weak      CRS_IRQHandler
+   .thumb_set CRS_IRQHandler,Default_Handler
+
+   .weak      ECC_IRQHandler
+   .thumb_set ECC_IRQHandler,Default_Handler
+
+   .weak      SAI4_IRQHandler
+   .thumb_set SAI4_IRQHandler,Default_Handler
+
+   .weak      WAKEUP_PIN_IRQHandler
+   .thumb_set WAKEUP_PIN_IRQHandler,Default_Handler
diff --git a/packaging/EGL_HEADERS.tar.gz b/packaging/EGL_HEADERS.tar.gz
new file mode 100644 (file)
index 0000000..8022205
Binary files /dev/null and b/packaging/EGL_HEADERS.tar.gz differ
diff --git a/packaging/FLATBUFFERS-1.12.tar.gz b/packaging/FLATBUFFERS-1.12.tar.gz
deleted file mode 100644 (file)
index 166983b..0000000
Binary files a/packaging/FLATBUFFERS-1.12.tar.gz and /dev/null differ
diff --git a/packaging/FLATBUFFERS-2.0.tar.gz b/packaging/FLATBUFFERS-2.0.tar.gz
new file mode 100644 (file)
index 0000000..5e8a336
Binary files /dev/null and b/packaging/FLATBUFFERS-2.0.tar.gz differ
diff --git a/packaging/OPENGL_HEADERS.tar.gz b/packaging/OPENGL_HEADERS.tar.gz
new file mode 100644 (file)
index 0000000..53a395a
Binary files /dev/null and b/packaging/OPENGL_HEADERS.tar.gz differ
diff --git a/packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz b/packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz
deleted file mode 100644 (file)
index 396d12f..0000000
Binary files a/packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz and /dev/null differ
diff --git a/packaging/TENSORFLOW-2.8.0-EIGEN.tar.gz b/packaging/TENSORFLOW-2.8.0-EIGEN.tar.gz
new file mode 100644 (file)
index 0000000..94a307f
Binary files /dev/null and b/packaging/TENSORFLOW-2.8.0-EIGEN.tar.gz differ
diff --git a/packaging/TENSORFLOW-2.8.0-GEMMLOWP.tar.gz b/packaging/TENSORFLOW-2.8.0-GEMMLOWP.tar.gz
new file mode 100644 (file)
index 0000000..c76e088
Binary files /dev/null and b/packaging/TENSORFLOW-2.8.0-GEMMLOWP.tar.gz differ
diff --git a/packaging/TENSORFLOW-2.8.0.tar.gz b/packaging/TENSORFLOW-2.8.0.tar.gz
new file mode 100644 (file)
index 0000000..f0f7425
Binary files /dev/null and b/packaging/TENSORFLOW-2.8.0.tar.gz differ
diff --git a/packaging/TENSORFLOW_GPU.tar.gz b/packaging/TENSORFLOW_GPU.tar.gz
deleted file mode 100644 (file)
index 5133fee..0000000
Binary files a/packaging/TENSORFLOW_GPU.tar.gz and /dev/null differ
diff --git a/packaging/VULKAN.tar.gz b/packaging/VULKAN.tar.gz
new file mode 100644 (file)
index 0000000..64ae0bd
Binary files /dev/null and b/packaging/VULKAN.tar.gz differ
index d770c2c..2a1ce8e 100644 (file)
Binary files a/packaging/XNNPACK.tar.gz and b/packaging/XNNPACK.tar.gz differ
index 0518541..7cd0470 100644 (file)
@@ -1,31 +1,34 @@
 Name:    nnfw
 Summary: nnfw
-Version: 1.21.0
+Version: 1.22.0
 Release: 1
 Group:   Development
 License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
 
 Source0: %{name}-%{version}.tar.gz
 Source1: %{name}.manifest
-# TODO Update source number
 Source1001: nnapi_test_generated.tar.gz
-#Source1002: GTEST.tar.gz
-Source1003: TENSORFLOW-2.3.0-EIGEN.tar.gz
-Source1004: GEMMLOWP.tar.gz
-Source1005: TENSORFLOW-2.8.0-RUY.tar.gz
-Source1006: CPUINFO.tar.gz
-Source1007: XNNPACK.tar.gz
-Source1008: FXDIV.tar.gz
-Source1009: PTHREADPOOL.tar.gz
-Source1010: PSIMD.tar.gz
-Source1011: FP16.tar.gz
-Source1012: OPENCL_HEADERS.tar.gz
-Source1013: FARMHASH.tar.gz
-Source1014: ABSEIL.tar.gz
-Source1015: OOURAFFT.tar.gz
-Source1016: TENSORFLOW_GPU.tar.gz
 Source2001: nnfw.pc.in
 Source2002: nnfw-plugin.pc.in
+Source3001: ABSEIL.tar.gz
+Source3002: CPUINFO.tar.gz
+Source3003: EGL_HEADERS.tar.gz
+Source3004: FARMHASH.tar.gz
+Source3005: FP16.tar.gz
+Source3006: FXDIV.tar.gz
+Source3007: GEMMLOWP.tar.gz
+Source3008: OOURAFFT.tar.gz
+Source3009: OPENCL_HEADERS.tar.gz
+Source3010: OPENGL_HEADERS.tar.gz
+Source3011: PSIMD.tar.gz
+Source3012: PTHREADPOOL.tar.gz
+Source3013: TENSORFLOW-2.8.0-EIGEN.tar.gz
+Source3014: TENSORFLOW-2.8.0-GEMMLOWP.tar.gz
+Source3015: TENSORFLOW-2.8.0-RUY.tar.gz
+Source3016: TENSORFLOW-2.8.0.tar.gz
+Source3017: VULKAN.tar.gz
+Source3018: XNNPACK.tar.gz
+Source3019: FLATBUFFERS-2.0.tar.gz
 
 %{!?build_type:     %define build_type      Release}
 %{!?npud_build:     %define npud_build      1}
@@ -42,21 +45,13 @@ Source2002: nnfw-plugin.pc.in
 %endif
 
 BuildRequires:  cmake
-# Require flatbuffers-devel for onert frontend (model loading)
-BuildRequires:  pkgconfig(flatbuffers)
-
-%ifarch %{arm} aarch64
-# Require python for acl-ex library build pre-process
-BuildRequires:  python3
-BuildRequires:  libarmcl-devel >= v21.02
-%endif
 
 Requires(post): /sbin/ldconfig
 Requires(postun): /sbin/ldconfig
 
 %if %{test_build} == 1
 BuildRequires:  pkgconfig(boost)
-BuildRequires:  pkgconfig(tensorflow-lite)
+BuildRequires:  pkgconfig(tensorflow2-lite)
 BuildRequires:  hdf5-devel
 BuildRequires:  libaec-devel
 BuildRequires:  pkgconfig(zlib)
@@ -172,21 +167,25 @@ NPU daemon for optimal management of NPU hardware
 cp %{SOURCE1} .
 mkdir ./externals
 tar -xf %{SOURCE1001} -C ./tests/nnapi/src/
-#tar -xf %{SOURCE1002} -C ./externals
-tar -xf %{SOURCE1003} -C ./externals
-tar -xf %{SOURCE1004} -C ./externals
-tar -xf %{SOURCE1005} -C ./externals
-tar -xf %{SOURCE1006} -C ./externals
-tar -xf %{SOURCE1007} -C ./externals
-tar -xf %{SOURCE1008} -C ./externals
-tar -xf %{SOURCE1009} -C ./externals
-tar -xf %{SOURCE1010} -C ./externals
-tar -xf %{SOURCE1011} -C ./externals
-tar -xf %{SOURCE1012} -C ./externals
-tar -xf %{SOURCE1013} -C ./externals
-tar -xf %{SOURCE1014} -C ./externals
-tar -xf %{SOURCE1015} -C ./externals
-tar -xf %{SOURCE1016} -C ./externals
+tar -xf %{SOURCE3001} -C ./externals
+tar -xf %{SOURCE3002} -C ./externals
+tar -xf %{SOURCE3003} -C ./externals
+tar -xf %{SOURCE3004} -C ./externals
+tar -xf %{SOURCE3005} -C ./externals
+tar -xf %{SOURCE3006} -C ./externals
+tar -xf %{SOURCE3007} -C ./externals
+tar -xf %{SOURCE3008} -C ./externals
+tar -xf %{SOURCE3009} -C ./externals
+tar -xf %{SOURCE3010} -C ./externals
+tar -xf %{SOURCE3011} -C ./externals
+tar -xf %{SOURCE3012} -C ./externals
+tar -xf %{SOURCE3013} -C ./externals
+tar -xf %{SOURCE3014} -C ./externals
+tar -xf %{SOURCE3015} -C ./externals
+tar -xf %{SOURCE3016} -C ./externals
+tar -xf %{SOURCE3017} -C ./externals
+tar -xf %{SOURCE3018} -C ./externals
+tar -xf %{SOURCE3019} -C ./externals
 
 %build
 %ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
@@ -230,23 +229,22 @@ install -m 0644 ./nnfw-plugin.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw-plugin
 
 %if %{test_build} == 1
 mkdir -p %{test_install_path}/bin
+mkdir -p %{test_install_path}/nnapi-gtest
 mkdir -p %{test_install_path}/unittest
-mkdir -p %{test_install_path}/unittest_standalone
 mkdir -p %{test_install_path}/test
 
-install -m 755 build/out/bin/nnapi_test %{test_install_path}/bin
-install -m 755 build/out/bin/nnpackage_run %{test_install_path}/bin
+install -m 755 build/out/bin/onert_run %{test_install_path}/bin
 install -m 755 build/out/bin/tflite_comparator %{test_install_path}/bin
 install -m 755 build/out/bin/tflite_run %{test_install_path}/bin
-install -m 755 build/out/unittest/* %{test_install_path}/unittest
-install -m 755 build/out/unittest_standalone/*_test %{test_install_path}/unittest_standalone
-install -m 755 build/out/unittest_standalone/test_* %{test_install_path}/unittest_standalone
+install -m 755 build/out/nnapi-gtest/* %{test_install_path}/nnapi-gtest
+install -m 755 build/out/unittest/*_test %{test_install_path}/unittest
+install -m 755 build/out/unittest/test_* %{test_install_path}/unittest
 cp -r build/out/test/* %{test_install_path}/test
-cp -r build/out/unittest_standalone/nnfw_api_gtest_models %{test_install_path}/unittest_standalone
+cp -r build/out/unittest/nnfw_api_gtest_models %{test_install_path}/unittest
 
 # Share test script with ubuntu (ignore error if there is no list for target)
-cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{test_install_path}/unittest/.
-cp %{test_install_path}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{test_install_path}/unittest/nnapi_gtest.skip
+cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{test_install_path}/nnapi-gtest/.
+cp %{test_install_path}/nnapi-gtest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{test_install_path}/nnapi-gtest/nnapi_gtest.skip
 tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home}
 
 %if %{coverage_build} == 1
@@ -258,7 +256,13 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
 
 %if %{npud_build} == 1
 install -m 755 build/out/bin/npud %{buildroot}%{_bindir}
-%endif
+
+%if %{test_build} == 1
+mkdir -p %{test_install_path}/npud-gtest
+install -m 755 build/out/npud-gtest/* %{test_install_path}/npud-gtest
+%endif # test_build
+
+%endif # npud_build
 
 %endif
 
diff --git a/res/PyTorchExamples/examples/PReLUwConv1d/__init__.py b/res/PyTorchExamples/examples/PReLUwConv1d/__init__.py
new file mode 100644 (file)
index 0000000..b2aed98
--- /dev/null
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv1dPReLU(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op1 = nn.Conv1d(1, 1, 1)
+        self.op2 = nn.PReLU()
+
+    def forward(self, input):
+        return self.op2(self.op1(input))
+
+
+_model_ = net_Conv1dPReLU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5)
diff --git a/res/PyTorchExamples/examples/PReLUwConv2d/__init__.py b/res/PyTorchExamples/examples/PReLUwConv2d/__init__.py
new file mode 100644 (file)
index 0000000..c538071
--- /dev/null
@@ -0,0 +1,19 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2dPReLU(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op1 = nn.Conv2d(1, 1, 1)
+        self.op2 = nn.PReLU()
+
+    def forward(self, input):
+        return self.op2(self.op1(input))
+
+
+_model_ = net_Conv2dPReLU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5, 5)
index b6fb438..95d1928 100755 (executable)
@@ -52,18 +52,24 @@ for example in args.examples:
     if hasattr(module._model_, 'onnx_opset_version'):
         opset_version = module._model_.onnx_opset_version()
 
+    onnx_model_path = output_folder + example + ".onnx"
+
     torch.onnx.export(
         module._model_,
         module._dummy_,
-        output_folder + example + ".onnx",
+        onnx_model_path,
         verbose=True,
         opset_version=opset_version)
     print("Generate '" + example + ".onnx' - Done")
 
-    onnx_model = onnx.load(output_folder + example + ".onnx")
+    onnx_model = onnx.load(onnx_model_path)
     onnx.checker.check_model(onnx_model)
 
-    tf_prep = onnx_tf.backend.prepare(onnx_model)
+    inferred_model = onnx.shape_inference.infer_shapes(onnx_model)
+    onnx.checker.check_model(inferred_model)
+    onnx.save(inferred_model, onnx_model_path)
+
+    tf_prep = onnx_tf.backend.prepare(inferred_model)
     tf_prep.export_graph(path=output_folder + example + ".TF")
     print("Generate '" + example + " TF' - Done")
 
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_008/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_008/test.recipe
new file mode 100644 (file)
index 0000000..89b5c99
--- /dev/null
@@ -0,0 +1,29 @@
+operand {
+  name: "x"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 4 }
+}
+operand {
+  name: "y"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 2 dim: 2 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: false
+  }
+  input: "x"
+  input: "y"
+  input: ""
+  output: "out"
+}
+input: "x"
+input: "y"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_008/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_008/test.reverse
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_008/test.rule b/res/TensorFlowLiteRecipes/FullyConnected_008/test.rule
new file mode 100644 (file)
index 0000000..3cf4c26
--- /dev/null
@@ -0,0 +1,8 @@
+# To check if FullyConnected with non-const weight is replaced by MatMul
+# with replace_non_const_fc_with_batch_matmul pass
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "BATCH_MATMUL_EXIST"      $(op_count BATCH_MATMUL) '=' 1
+RULE    "RESHAPE_EXIST"           $(op_count RESHAPE) '=' 1
+RULE    "NO_FULLY_CONNECTED"      $(op_count FULLY_CONNECTED) '=' 0
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_009/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_009/test.recipe
new file mode 100644 (file)
index 0000000..4c02ffc
--- /dev/null
@@ -0,0 +1,43 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_009/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_009/test.reverse
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.recipe
new file mode 100644 (file)
index 0000000..19559c0
--- /dev/null
@@ -0,0 +1,145 @@
+#
+# generated with tflchef-reverse from PReLUwConv2d ONNX model
+#
+operand {
+  name: "input"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "mul_1/y"
+  type: FLOAT32
+  shape {
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.5"
+  }
+}
+operand {
+  name: "Const_2"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "0.25"
+  }
+}
+operand {
+  name: "ConvWeight"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "ConvBias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "ConvOut"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "Abs"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "Relu1"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "sub"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "mul_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+  name: "output"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  input: "input"
+  input: "ConvWeight"
+  input: "ConvBias"
+  output: "ConvOut"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    activation: NONE
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+  }
+}
+operation {
+  type: "Abs"
+  input: "ConvOut"
+  output: "Abs"
+}
+operation {
+  type: "ReLU"
+  input: "ConvOut"
+  output: "Relu1"
+}
+operation {
+  type: "Sub"
+  input: "ConvOut"
+  input: "Abs"
+  output: "sub"
+  sub_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "sub"
+  input: "Const_2"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "mul"
+  input: "mul_1/y"
+  output: "mul_1"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "Relu1"
+  input: "mul_1"
+  output: "output"
+  add_options {
+    activation: NONE
+  }
+}
+input: "input"
+output: "output"
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.rule b/res/TensorFlowLiteRecipes/Net_Conv_PReluGraph_000/test.rule
new file mode 100644 (file)
index 0000000..f70180a
--- /dev/null
@@ -0,0 +1,10 @@
+# To check if Sub-Graph can be converted to PReLU
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ABS_NOT_EXIST"           $(op_count ABS) '=' 0
+RULE    "ADD_NOT_EXIST"           $(op_count ADD) '=' 0
+RULE    "MUL_NOT_EXIST"           $(op_count MUL) '=' 0
+RULE    "RELU_NOT_EXIST"          $(op_count RELU) '=' 0
+RULE    "SUB_NOT_EXIST"           $(op_count SUB) '=' 0
+RULE    "PRELU_EXIST"             $(op_count PRELU) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.recipe
new file mode 100644 (file)
index 0000000..c9c66d2
--- /dev/null
@@ -0,0 +1,77 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 1 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "1"
+  }
+}
+operand {
+  name: "filter_1"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "1"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "1"
+  }
+}
+operand {
+  name: "bias_1"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "constant"
+    arg: "1"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 31 dim: 31 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+operand {
+  name: "output"
+  type: FLOAT32
+  shape { dim: 1 dim: 30 dim: 30 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ofm"
+  input: "filter_1"
+  input: "bias_1"
+  output: "output"
+}
+input: "ifm"
+output: "output"
diff --git a/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.rule b/res/TensorFlowLiteRecipes/Net_Duplicate_Weights_000/test.rule
new file mode 100644 (file)
index 0000000..4469dab
--- /dev/null
@@ -0,0 +1,8 @@
+# To check if RemoveDuplicateConstPass removes all duplicate consts
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "FILTER_COUNT" $(const_count filter) '=' 1
+RULE "DUPLICATE_FILTER_COUNT" $(const_count filter_1) '=' 0
+RULE "BIAS_COUNT" $(const_count bias) '=' 1
+RULE "DUPLICATE_BIAS_COUNT" $(const_count bias_1) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.recipe
new file mode 100644 (file)
index 0000000..a4d9f16
--- /dev/null
@@ -0,0 +1,66 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 }
+}
+operand {
+  name: "fc_ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc_wgt"
+  type: FLOAT32
+  shape { dim: 8 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc_bias"
+  type: FLOAT32
+  shape { dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "fc_ifm"
+  input: "fc_wgt"
+  input: "fc_bias"
+  output: "fc"
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "fc"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.rule b/res/TensorFlowLiteRecipes/Net_FullyConnected_Add_000/test.rule
new file mode 100644 (file)
index 0000000..a70aa92
--- /dev/null
@@ -0,0 +1,6 @@
+# To check if FullyConnected is folded
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "ADD_EXIST"               $(op_count ADD) '=' 1
+RULE    "NO_FC"                   $(op_count FULLY_CONNECTED) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.recipe
new file mode 100644 (file)
index 0000000..2160791
--- /dev/null
@@ -0,0 +1,126 @@
+# Tconv with asymmetric filter + BN + ReLU
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "5"
+    arg: "1"
+    arg: "2"
+  }
+}
+operand {
+  name: "FusedBatchNormV3"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+    arg: "-7.80109"
+  }
+}
+operand {
+  name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "2.00834"
+    arg: "1.00344"
+  }
+}
+operand {
+  name: "Relu"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 5
+    dim: 1
+    dim: 2
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+  input: "Hole"
+  output: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+}
+operation {
+  type: "Mul"
+  input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3"
+  output: "Relu"
+  add_options {
+    activation: RELU
+  }
+}
+input: "Hole"
+output: "Relu"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_005/test.rule
new file mode 100644 (file)
index 0000000..241bda9
--- /dev/null
@@ -0,0 +1,8 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "RELU_EXIST"              $(op_count RELU) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.recipe
new file mode 100644 (file)
index 0000000..1ce7c0d
--- /dev/null
@@ -0,0 +1,34 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 6 dim: 5 dim: 1 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "0" arg: "3" arg: "2" arg: "1"}
+}
+operand {
+  name: "transpose"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operation {
+  type: "Transpose"
+  input: "ifm"
+  input: "perm"
+  output: "transpose"
+}
+operation {
+  type: "Abs"
+  input: "transpose"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.reverse b/res/TensorFlowLiteRecipes/Net_Transpose_Abs_000/test.reverse
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.recipe
new file mode 100644 (file)
index 0000000..daf14d5
--- /dev/null
@@ -0,0 +1,48 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 6 dim: 5 dim: 1 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "0" arg: "3" arg: "2" arg: "1"}
+}
+operand {
+  name: "transpose"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 6 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operation {
+  type: "Transpose"
+  input: "ifm"
+  input: "perm"
+  output: "transpose"
+}
+operation {
+  type: "Add"
+  input: "transpose"
+  input: "add_const"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.reverse b/res/TensorFlowLiteRecipes/Net_Transpose_Add_000/test.reverse
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_005/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_005/test.recipe
new file mode 100644 (file)
index 0000000..8a9328b
--- /dev/null
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_005/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_005/test.rule
new file mode 100644 (file)
index 0000000..09931d4
--- /dev/null
@@ -0,0 +1,8 @@
+# To check model can be quantized without QuantizeDequantizeWeights.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_UINT8"             $(tensor_dtype ifm) '=' UINT8
+RULE    "CONV_UINT8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "WEIGHTS_UINT8"           $(tensor_dtype filter) '=' UINT8
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT32
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_006/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_006/test.recipe
new file mode 100644 (file)
index 0000000..8a9328b
--- /dev/null
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_006/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_006/test.rule
new file mode 100644 (file)
index 0000000..81b13a6
--- /dev/null
@@ -0,0 +1,8 @@
+# To check model can be quantized without QuantizeDequantizeWeights.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_INT16"             $(tensor_dtype ifm) '=' INT16
+RULE    "CONV_INT16"              $(tensor_dtype ofm) '=' INT16
+RULE    "WEIGHTS_INT16"           $(tensor_dtype filter) '=' INT16
+RULE    "BIAS_INT64"              $(tensor_dtype bias) '=' INT64
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.recipe
new file mode 100644 (file)
index 0000000..c0d47bf
--- /dev/null
@@ -0,0 +1,22 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "DepthToSpace"
+  depth_to_space_options {
+    block_size: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthToSpace_000/test.rule
new file mode 100644 (file)
index 0000000..d2052e7
--- /dev/null
@@ -0,0 +1,12 @@
+# To check fake quantization of DepthToSpace (D2S).
+# 1. ifm is float32.
+# 2. D2S is float32.
+# 3. Q/DQ is inserted at the beginning of the model (from ifm).
+# 4. Q/DQ is not inserted after D2S, because D2S does not change values of input.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_FP32"              $(tensor_dtype ifm) '=' FLOAT32
+RULE    "D2S_FP32"              $(tensor_dtype ofm) '=' FLOAT32
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 1
+RULE    "DEQUANTIZE_OP"         $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.recipe
new file mode 100644 (file)
index 0000000..ec403dd
--- /dev/null
@@ -0,0 +1,22 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 2 dim: 2 dim: 12 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "SpaceToDepth"
+  space_to_depth_options {
+    block_size: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.rule b/res/TensorFlowLiteRecipes/Quant_SpaceToDepth_000/test.rule
new file mode 100644 (file)
index 0000000..bb0c8cf
--- /dev/null
@@ -0,0 +1,12 @@
+# To check fake quantization of SpaceToDepth (S2D).
+# 1. ifm is float32.
+# 2. S2D is float32.
+# 3. Q/DQ is inserted at the beginning of the model (from ifm).
+# 4. Q/DQ is not inserted after S2D, because S2D does not change values of input.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_FP32"              $(tensor_dtype ifm) '=' FLOAT32
+RULE    "S2D_FP32"              $(tensor_dtype ofm) '=' FLOAT32
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 1
+RULE    "DEQUANTIZE_OP"         $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.recipe b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.recipe
new file mode 100644 (file)
index 0000000..687f4f5
--- /dev/null
@@ -0,0 +1,236 @@
+#
+# NOTE generated by tflchef-reverse with res/TensorFlowPythonExamples/examples/LSTM_retseq
+#
+operand {
+  name: "serving_default_input_1:0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+  }
+}
+operand {
+  name: "sequential/lstm/zeros"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+  }
+  is_variable: true
+}
+operand {
+  name: "arith.constant"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.630624"
+    arg: "0.0173528"
+    arg: "0.386502"
+    arg: "0.274398"
+  }
+}
+operand {
+  name: "arith.constant1"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.215122"
+    arg: "0.0211586"
+    arg: "0.374135"
+    arg: "0.123864"
+  }
+}
+operand {
+  name: "arith.constant2"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.395891"
+    arg: "-0.516027"
+    arg: "0.311454"
+    arg: "0.423152"
+  }
+}
+operand {
+  name: "arith.constant3"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.107339"
+    arg: "0.408966"
+    arg: "0.0376898"
+    arg: "-0.544077"
+  }
+}
+operand {
+  name: "arith.constant4"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "0"
+  }
+}
+operand {
+  name: "arith.constant5"
+  type: FLOAT32
+  shape {
+    dim: 2
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "1"
+  }
+}
+operand {
+  name: "arith.constant6"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.229282"
+    arg: "-0.0678827"
+    arg: "0.449137"
+    arg: "0.470665"
+    arg: "-0.563606"
+    arg: "-0.290711"
+    arg: "0.343602"
+    arg: "-0.427935"
+  }
+}
+operand {
+  name: "arith.constant7"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.443107"
+    arg: "-0.504989"
+    arg: "-0.0738791"
+    arg: "-0.538787"
+    arg: "0.440037"
+    arg: "0.268466"
+    arg: "0.0149825"
+    arg: "-0.42883"
+  }
+}
+operand {
+  name: "arith.constant8"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.523419"
+    arg: "-0.131416"
+    arg: "-0.328037"
+    arg: "-0.636753"
+    arg: "-0.0726868"
+    arg: "-0.347395"
+    arg: "0.390772"
+    arg: "0.467617"
+  }
+}
+operand {
+  name: "arith.constant9"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.184187"
+    arg: "-0.636662"
+    arg: "0.363794"
+    arg: "0.428437"
+    arg: "-0.431681"
+    arg: "-0.617431"
+    arg: "0.53586"
+    arg: "0.686365"
+  }
+}
+operand {
+  name: "sequential/lstm/zeros1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+  }
+  is_variable: true
+}
+operand {
+  name: "StatefulPartitionedCall:0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 2
+  }
+}
+operation {
+  type: "UnidirectionalSequenceLSTM"
+  input: "serving_default_input_1:0"
+  input: "arith.constant9"
+  input: "arith.constant8"
+  input: "arith.constant7"
+  input: "arith.constant6"
+  input: "arith.constant3"
+  input: "arith.constant2"
+  input: "arith.constant1"
+  input: "arith.constant"
+  input: ""
+  input: ""
+  input: ""
+  input: "arith.constant4"
+  input: "arith.constant5"
+  input: "arith.constant4"
+  input: "arith.constant4"
+  input: ""
+  input: ""
+  input: "sequential/lstm/zeros"
+  input: "sequential/lstm/zeros1"
+  input: ""
+  input: ""
+  input: ""
+  input: ""
+  output: "StatefulPartitionedCall:0"
+  unidirectional_sequence_lstm_options {
+    activation: TANH
+    cell_clip: 10
+    proj_clip: 0
+    time_major: false
+    asymmetric_quantize_inputs: false
+  }
+}
+input: "serving_default_input_1:0"
+output: "StatefulPartitionedCall:0"
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.reverse b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_002/test.reverse
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.recipe b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.recipe
new file mode 100644 (file)
index 0000000..3e4f6f1
--- /dev/null
@@ -0,0 +1,193 @@
+operand {
+  name: "serving_default_input_16:0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+  }
+  is_variable: false
+}
+operand {
+  name: "sequential_15/lstm_15/zeros"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  is_variable: true
+}
+operand {
+  name: "arith.constant"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.960517"
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.182756"
+  }
+}
+operand {
+  name: "arith.constant2"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.028718"
+  }
+}
+operand {
+  name: "arith.constant3"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.207806"
+  }
+}
+operand {
+  name: "arith.constant4"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+  }
+}
+operand {
+  name: "arith.constant5"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+  }
+}
+operand {
+  name: "arith.constant6"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.476033"
+  }
+}
+operand {
+  name: "arith.constant7"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.152916"
+  }
+}
+operand {
+  name: "arith.constant8"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.308059"
+  }
+}
+operand {
+  name: "arith.constant9"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.329067"
+  }
+}
+operand {
+  name: "sequential_15/lstm_15/zeros1"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+  }
+  is_variable: true
+}
+operand {
+  name: "StatefulPartitionedCall:0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+  }
+}
+operation {
+  type: "UnidirectionalSequenceLSTM"
+  input: "serving_default_input_16:0"
+  input: "arith.constant9"
+  input: "arith.constant8"
+  input: "arith.constant7"
+  input: "arith.constant6"
+  input: "arith.constant3"
+  input: "arith.constant2"
+  input: "arith.constant1"
+  input: "arith.constant"
+  input: ""
+  input: ""
+  input: ""
+  input: "arith.constant4"
+  input: "arith.constant5"
+  input: "arith.constant4"
+  input: "arith.constant4"
+  input: ""
+  input: ""
+  input: "sequential_15/lstm_15/zeros"
+  input: "sequential_15/lstm_15/zeros1"
+  input: ""
+  input: ""
+  input: ""
+  input: ""
+  output: "StatefulPartitionedCall:0"
+  unidirectional_sequence_lstm_options {
+    activation: TANH
+    cell_clip: 10
+    proj_clip: 0
+    time_major: false
+    asymmetric_quantize_inputs: false
+  }
+}
+input: "serving_default_input_16:0"
+output: "StatefulPartitionedCall:0"
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.rule b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_003/test.rule
new file mode 100644 (file)
index 0000000..2bda75c
--- /dev/null
@@ -0,0 +1,7 @@
+# To check if Unroll of UnidriectionalSequenceLSTM works
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_UNIDIRSEQLSTM"        $(op_count UNIDIRECTIONAL_SEQUENCE_LSTM) '=' 0
+RULE    "YES_LOGISTICS"           $(op_count LOGISTICS) '=' 3
+RULE    "YES_MUL"                 $(op_count MUL) '=' 3
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.recipe b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.recipe
new file mode 100644 (file)
index 0000000..47e6437
--- /dev/null
@@ -0,0 +1,425 @@
+operand {
+  name: "serving_default_input_48:0"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 3
+    dim: 4
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "sequential_47/lstm_46/zeros"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: true
+}
+operand {
+  name: "arith.constant"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.182069"
+    arg: "0.158518"
+    arg: "-0.249876"
+    arg: "-0.223681"
+    arg: "-0.0251322"
+    arg: "-0.234799"
+    arg: "0.0315703"
+    arg: "0.0713779"
+    arg: "-0.398819"
+    arg: "-0.331811"
+    arg: "-0.24586"
+    arg: "-0.034448"
+    arg: "-0.187116"
+    arg: "-0.224618"
+    arg: "0.280953"
+    arg: "-0.0503904"
+    arg: "0.0335912"
+    arg: "0.34419"
+    arg: "0.0784627"
+    arg: "0.246556"
+    arg: "-0.446514"
+    arg: "0.175145"
+    arg: "0.494241"
+    arg: "0.120458"
+    arg: "0.106793"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant1"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.409107"
+    arg: "-0.406787"
+    arg: "0.111563"
+    arg: "-0.194133"
+    arg: "-0.229023"
+    arg: "0.287904"
+    arg: "-0.344601"
+    arg: "0.0946776"
+    arg: "-0.198879"
+    arg: "0.532953"
+    arg: "0.105883"
+    arg: "0.113309"
+    arg: "-0.100015"
+    arg: "0.262142"
+    arg: "-0.223262"
+    arg: "-0.00894637"
+    arg: "-0.0819539"
+    arg: "0.195495"
+    arg: "-0.291116"
+    arg: "-0.0707405"
+    arg: "0.274591"
+    arg: "0.313034"
+    arg: "0.396099"
+    arg: "-0.186455"
+    arg: "0.0721643"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant2"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.0879868"
+    arg: "0.20888"
+    arg: "0.0121427"
+    arg: "-0.537515"
+    arg: "-0.20519"
+    arg: "-0.0189587"
+    arg: "0.269877"
+    arg: "-0.182624"
+    arg: "-0.0591339"
+    arg: "0.0318922"
+    arg: "-0.227111"
+    arg: "-0.149458"
+    arg: "-0.172937"
+    arg: "0.0187907"
+    arg: "0.0670664"
+    arg: "-0.121135"
+    arg: "-0.058337"
+    arg: "-0.0598793"
+    arg: "-0.362267"
+    arg: "0.0774832"
+    arg: "0.199173"
+    arg: "-0.0380472"
+    arg: "0.107854"
+    arg: "0.0658764"
+    arg: "0.0537086"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant3"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.017441"
+    arg: "0.272052"
+    arg: "-0.00516871"
+    arg: "-0.0291451"
+    arg: "0.0884765"
+    arg: "0.0531231"
+    arg: "0.0352237"
+    arg: "-0.00947183"
+    arg: "0.00681541"
+    arg: "-0.000782808"
+    arg: "0.201295"
+    arg: "0.26533"
+    arg: "-0.436603"
+    arg: "-0.0725246"
+    arg: "0.390646"
+    arg: "-0.393321"
+    arg: "-0.447548"
+    arg: "-0.021616"
+    arg: "-0.0852413"
+    arg: "0.143229"
+    arg: "0.0062271"
+    arg: "0.222503"
+    arg: "0.195852"
+    arg: "-0.112013"
+    arg: "0.322707"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant4"
+  type: FLOAT32
+  shape {
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+    arg: "0"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant5"
+  type: FLOAT32
+  shape {
+    dim: 5
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+    arg: "1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant6"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.110842"
+    arg: "0.362487"
+    arg: "-0.193648"
+    arg: "0.0795254"
+    arg: "-0.154508"
+    arg: "0.0420029"
+    arg: "-0.320009"
+    arg: "-0.299519"
+    arg: "0.0381875"
+    arg: "-0.439949"
+    arg: "-0.290634"
+    arg: "0.0254151"
+    arg: "-0.138734"
+    arg: "0.328987"
+    arg: "0.449845"
+    arg: "0.0656276"
+    arg: "0.0410624"
+    arg: "-0.35757"
+    arg: "0.234629"
+    arg: "-0.310387"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant7"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.418478"
+    arg: "-0.197303"
+    arg: "-0.0769891"
+    arg: "-0.352671"
+    arg: "-0.27675"
+    arg: "-0.221081"
+    arg: "-0.238606"
+    arg: "-0.0518556"
+    arg: "-0.470707"
+    arg: "0.162187"
+    arg: "-0.0575043"
+    arg: "-0.194339"
+    arg: "0.0110147"
+    arg: "-0.0778302"
+    arg: "0.0032438"
+    arg: "0.305049"
+    arg: "0.353269"
+    arg: "-0.257547"
+    arg: "-0.472484"
+    arg: "-0.0296589"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant8"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "0.233865"
+    arg: "0.260391"
+    arg: "0.343597"
+    arg: "0.403272"
+    arg: "-0.0299743"
+    arg: "-0.137641"
+    arg: "0.13583"
+    arg: "0.212403"
+    arg: "0.0147645"
+    arg: "-0.382367"
+    arg: "-0.368439"
+    arg: "0.260765"
+    arg: "-0.0455869"
+    arg: "0.329342"
+    arg: "-0.216915"
+    arg: "-0.441979"
+    arg: "0.147086"
+    arg: "0.131922"
+    arg: "-0.44475"
+    arg: "0.0715657"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "arith.constant9"
+  type: FLOAT32
+  shape {
+    dim: 5
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "-0.0868829"
+    arg: "0.127576"
+    arg: "-0.48598"
+    arg: "0.32627"
+    arg: "0.360762"
+    arg: "-0.235853"
+    arg: "-0.223454"
+    arg: "0.265532"
+    arg: "-0.163921"
+    arg: "0.130234"
+    arg: "0.411861"
+    arg: "-0.0193611"
+    arg: "0.165723"
+    arg: "0.326238"
+    arg: "0.119351"
+    arg: "-0.0257632"
+    arg: "0.455063"
+    arg: "-0.0131663"
+    arg: "-0.157016"
+    arg: "0.482517"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operand {
+  name: "sequential_47/lstm_46/zeros1"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: true
+}
+operand {
+  name: "StatefulPartitionedCall:0"
+  type: FLOAT32
+  shape {
+    dim: 2
+    dim: 3
+    dim: 5
+  }
+  quant {
+    quantized_dimension: 0
+  }
+  is_variable: false
+}
+operation {
+  type: "UnidirectionalSequenceLSTM"
+  input: "serving_default_input_48:0"
+  input: "arith.constant9"
+  input: "arith.constant8"
+  input: "arith.constant7"
+  input: "arith.constant6"
+  input: "arith.constant3"
+  input: "arith.constant2"
+  input: "arith.constant1"
+  input: "arith.constant"
+  input: ""
+  input: ""
+  input: ""
+  input: "arith.constant4"
+  input: "arith.constant5"
+  input: "arith.constant4"
+  input: "arith.constant4"
+  input: ""
+  input: ""
+  input: "sequential_47/lstm_46/zeros"
+  input: "sequential_47/lstm_46/zeros1"
+  input: ""
+  input: ""
+  input: ""
+  input: ""
+  output: "StatefulPartitionedCall:0"
+  unidirectional_sequence_lstm_options {
+    activation: TANH
+    cell_clip: 10
+    proj_clip: 0
+    time_major: false
+    asymmetric_quantize_inputs: false
+  }
+}
+input: "serving_default_input_48:0"
+output: "StatefulPartitionedCall:0"
diff --git a/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.rule b/res/TensorFlowLiteRecipes/UnidirectionalSequenceLSTM_004/test.rule
new file mode 100644 (file)
index 0000000..ccc7feb
--- /dev/null
@@ -0,0 +1,6 @@
+# To check if Unroll of UnidriectionalSequenceLSTM works
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_UNIDIRSEQLSTM"        $(op_count UNIDIRECTIONAL_SEQUENCE_LSTM) '=' 0
+RULE    "YES_FC"                  $(op_count FULLY_CONNECTED) '=' 5
index faa0d53..eb0395d 100644 (file)
@@ -2,8 +2,9 @@
 
 ## Prerequisite
 
-- Python 3.X
-- TensorFlow 1.15
+- Python 3.8
+- TensorFlow 2.8.0
+- NOTE some examples may use old versions
 
 ## Directory Layout
 
@@ -20,7 +21,14 @@ Follow python API name
 
 ## HOWTO: Create a Python environment
 
-TBA
+Install release debian packages in https://github.com/Samsung/ONE/releases
+and enter virtual environment.
+```
+source /usr/share/one/bin/venv/bin/activate
+```
+You may have to prepare for the first time. Read [how-to-prepare-virtualenv.txt]
+(https://github.com/Samsung/ONE/blob/master/compiler/one-cmds/how-to-prepare-virtualenv.txt)
+for more information.
 
 ## HOWTO: Generate a pbtxt from examples
 
diff --git a/res/TensorFlowPythonExamples/examples/GRU_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/GRU_unroll/__init__.py
new file mode 100644 (file)
index 0000000..51cb893
--- /dev/null
@@ -0,0 +1,8 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.GRU(2, input_shape=shape, unroll=True))
diff --git a/res/TensorFlowPythonExamples/examples/LSTM_batsize/__init__.py b/res/TensorFlowPythonExamples/examples/LSTM_batsize/__init__.py
new file mode 100644 (file)
index 0000000..dfcd469
--- /dev/null
@@ -0,0 +1,10 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289766546
diff --git a/res/TensorFlowPythonExamples/examples/LSTM_retseq/__init__.py b/res/TensorFlowPythonExamples/examples/LSTM_retseq/__init__.py
new file mode 100644 (file)
index 0000000..2748c80
--- /dev/null
@@ -0,0 +1,10 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape, return_sequences=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289768739
diff --git a/res/TensorFlowPythonExamples/examples/LSTM_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/LSTM_unroll/__init__.py
new file mode 100644 (file)
index 0000000..d21c2b5
--- /dev/null
@@ -0,0 +1,10 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1288436802
diff --git a/res/TensorFlowPythonExamples/examples/RNN_GRUCell_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/RNN_GRUCell_unroll/__init__.py
new file mode 100644 (file)
index 0000000..f2d5036
--- /dev/null
@@ -0,0 +1,9 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+grucell = keras.layers.GRUCell(2)
+model.add(keras.layers.RNN(grucell, input_shape=shape, unroll=True))
diff --git a/res/TensorFlowPythonExamples/examples/RNN_LSTMCell_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/RNN_LSTMCell_unroll/__init__.py
new file mode 100644 (file)
index 0000000..927f6b6
--- /dev/null
@@ -0,0 +1,11 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+lstmcell = keras.layers.LSTMCell(2)
+model.add(keras.layers.RNN(lstmcell, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289820894
diff --git a/res/TensorFlowPythonExamples/examples/SimpleRNN_unroll/__init__.py b/res/TensorFlowPythonExamples/examples/SimpleRNN_unroll/__init__.py
new file mode 100644 (file)
index 0000000..8219e6b
--- /dev/null
@@ -0,0 +1,10 @@
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.SimpleRNN(2, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289811569
index dfcc20b..e8279f9 100755 (executable)
@@ -6,22 +6,45 @@ import tensorflow as tf
 import importlib
 import argparse
 
+from pathlib import Path
+from tensorflow import keras
+
 parser = argparse.ArgumentParser(description='Process TensorFlow Python Examples')
 
-parser.add_argument('--mode', metavar='MODE', choices=['pbtxt'], default='pbtxt')
 parser.add_argument('examples', metavar='EXAMPLES', nargs='+')
 
 args = parser.parse_args()
 
-if args.mode == 'pbtxt':
-    for example in args.examples:
-        print("Generate '" + example + ".pbtxt'")
+output_folder = "./output/"
+
+Path(output_folder).mkdir(parents=True, exist_ok=True)
+
+for example in args.examples:
+    print("Generate '" + example + ".pbtxt'")
+
+    tf.compat.v1.reset_default_graph()
+    # https://stackoverflow.com/questions/37808866/proper-way-to-dynamically-import-a-module-with-relative-imports
+    m = importlib.import_module("examples." + example)
+
+    with open(output_folder + example + ".pbtxt", "w") as f:
+        f.write(str(tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True)))
+
+    print("Generate '" + example + ".pbtxt' - Done")
 
-        tf.compat.v1.reset_default_graph()
-        # https://stackoverflow.com/questions/37808866/proper-way-to-dynamically-import-a-module-with-relative-imports
-        importlib.import_module("examples." + example)
+    # keras sequential?
+    if hasattr(m, 'model') and isinstance(m.model, keras.Sequential):
+        print("Generate '" + example + ".h5'")
+        m.model.save(output_folder + example + ".h5")
+        print("Generate '" + example + ".h5' - Done")
 
-        with open(example + ".pbtxt", "w") as f:
-            f.write(str(tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True)))
+        # tflite export for experiments
+        converter = tf.lite.TFLiteConverter.from_keras_model(m.model)
+        converter.allow_custom_ops = True
+        converter.experimental_new_converter = True
+        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
+        converter._experimental_lower_tensor_list_ops = False
 
-        print("Generate '" + example + ".pbtxt' - Done")
+        tflite_model = converter.convert()
+        with open(output_folder + example + ".tflite", "wb") as f:
+            f.write(tflite_model)
+        print("Generate '" + example + ".tflite' - Done")
index bc39a09..07c3982 100644 (file)
@@ -8,7 +8,7 @@ android {
         minSdkVersion 26
         targetSdkVersion 29
         versionCode 1
-        versionName "1.21.0"
+        versionName "1.22.0"
 
         externalNativeBuild {
             ndkBuild {
diff --git a/runtime/contrib/android_tflite/CMakeLists.txt b/runtime/contrib/android_tflite/CMakeLists.txt
deleted file mode 100644 (file)
index c035ced..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-if(NOT BUILD_ANDROID_TFLITE)
-  return()
-endif(NOT BUILD_ANDROID_TFLITE)
-
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 REQUIRED)
-
-if(NOT DEFINED NDK_DIR)
-  message(FATAL_ERROR "NDK_DIR should be specified via environment variable")
-endif()
-message(STATUS "Found NDK: ${NDK_DIR}")
-
-#
-# Tensorflow Lite JNI library
-#
-set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/lite")
-set(TFLITE_JNI_BASE ${TENSORFLOW_LITE_BASE}/java/src/main/native)
-set(TFLITE_JNI_SRCS ${TFLITE_JNI_BASE}/exception_jni.cc
-                    ${TFLITE_JNI_BASE}/nativeinterpreterwrapper_jni.cc
-                    ${TFLITE_JNI_BASE}/tensor_jni.cc
-                    ${TFLITE_JNI_BASE}/tensorflow_lite_jni.cc
-                    ${CMAKE_CURRENT_SOURCE_DIR}/builtin_ops_jni.cc # Use nnfw's OpResolver
-                    )
-set(TFLITE_JNI_INCLUDES ${TENSORFLOW_LITE_BASE}/java/src/native)
-
-# TODO use tensorflow-lite static library instead of compiling all the sources again
-add_library(tensorflowlite_jni SHARED ${TFLITE_JNI_SRCS} ${TFLITE_SRCS})
-target_include_directories(tensorflowlite_jni PUBLIC ${TFLITE_JNI_INCLUDES} ${TFLITE_INCLUDES})
-target_link_libraries(tensorflowlite_jni eigen ${LIB_PTHREAD} dl)
-target_link_libraries(tensorflowlite_jni log)
-target_link_libraries(tensorflowlite_jni nnfw_lib_tflite)
-install(TARGETS tensorflowlite_jni DESTINATION lib)
diff --git a/runtime/contrib/android_tflite/builtin_ops_jni.cc b/runtime/contrib/android_tflite/builtin_ops_jni.cc
deleted file mode 100644 (file)
index 597f11a..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the License);
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/kernels/register.h"
-#include "tflite/ext/kernels/register.h"
-
-namespace tflite
-{
-
-std::unique_ptr<OpResolver> CreateOpResolver()
-{
-  return std::unique_ptr<::nnfw::tflite::BuiltinOpResolver>(
-    new ::nnfw::tflite::BuiltinOpResolver());
-}
-
-} // namespace tflite
index 6838183..8c1eb25 100644 (file)
@@ -126,7 +126,7 @@ public:
     {
       cl_int buildErr = CL_SUCCESS;
       auto buildInfo = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
-      for (auto &pair : buildInfo)
+      for (const auto &pair : buildInfo)
       {
         std::cerr << pair.second << std::endl << std::endl;
       }
index 7604aa9..4046d7c 100644 (file)
@@ -25,7 +25,7 @@
 namespace benchmark
 {
 
-// Data class between runner(nnpackage_run and tflite_run) and libbenchmark
+// Data class between runner(onert_run and tflite_run) and libbenchmark
 class Result
 {
 public:
index 2f3c855..6233930 100644 (file)
@@ -121,7 +121,7 @@ void MemoryPoller::process()
     }
     uint32_t cur_pss = getPssSum();
 
-    for (auto &phase : _phases)
+    for (const auto &phase : _phases)
     {
       auto &rss = _rss_map.at(phase);
       if (rss < cur_rss)
index 76993f2..d8d9c3c 100644 (file)
 #include <cassert>
 #include <chrono>
 #include <iostream>
-#include <sys/time.h>
+#include <time.h>
 
 namespace
 {
 
 uint64_t nowMicros()
 {
-  struct timeval tv;
-  gettimeofday(&tv, nullptr);
-  return static_cast<uint64_t>(tv.tv_sec) * 1e6 + tv.tv_usec;
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return static_cast<uint64_t>(ts.tv_nsec) / 1e3 + static_cast<uint64_t>(ts.tv_sec) * 1e6;
 }
 
 void SleepForMicros(uint64_t micros)
index 69d6a92..3e02adb 100644 (file)
@@ -19,4 +19,4 @@ target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_coverage)
 target_link_libraries(nnfw_lib_misc_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
 
 add_test(nnfw_lib_misc_test nnfw_lib_misc_test)
-install(TARGETS nnfw_lib_misc_test DESTINATION unittest_standalone)
+install(TARGETS nnfw_lib_misc_test DESTINATION unittest)
index cf8c520..8d0ba04 100644 (file)
@@ -23,6 +23,6 @@ target_link_libraries(ndarray_test PRIVATE nnfw_coverage)
 target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
 
 add_test(ndarray_test ndarray_test)
-install(TARGETS ndarray_test DESTINATION unittest_standalone)
+install(TARGETS ndarray_test DESTINATION unittest)
 
 add_subdirectory(example)
index 4e04555..5f6f665 100644 (file)
@@ -25,7 +25,7 @@
 #if defined(_MSC_VER)
 #include <chrono>  // NOLINT(build/c++11)
 #else
-#include <sys/time.h>
+#include <time.h>
 #endif
 
 namespace tflite {
@@ -43,9 +43,9 @@ uint64_t NowMicros() {
 #else
 
 uint64_t NowMicros() {
-  struct timeval tv;
-  gettimeofday(&tv, nullptr);
-  return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return static_cast<uint64_t>(ts.tv_nsec) / 1e3 + static_cast<uint64_t>(ts.tv_sec) * 1e6;
 }
 
 #endif  // defined(_MSC_VER)
index d2694b8..3c57790 100644 (file)
@@ -1,4 +1,4 @@
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 QUIET)
+nnfw_find_package(TensorFlowLite EXACT 2.8.0 QUIET)
 if(NOT TensorFlowLite_FOUND)
   message(STATUS "Check tensorflow lite library extension build: need tensorflow lite library")
   return()
@@ -11,7 +11,7 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 add_library(nnfw_lib_tflite STATIC ${SOURCES})
 set_target_properties(nnfw_lib_tflite PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(nnfw_lib_tflite PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(nnfw_lib_tflite PUBLIC tensorflow-lite)
+target_link_libraries(nnfw_lib_tflite PUBLIC tensorflow-lite-2.8.0)
 target_link_libraries(nnfw_lib_tflite PUBLIC nnfw_lib_misc)
 target_link_libraries(nnfw_lib_tflite PRIVATE ${LIB_PTHREAD} dl)
 target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_common)
diff --git a/runtime/libs/tflite/include/tflite/CopyInputInitializer.h b/runtime/libs/tflite/include/tflite/CopyInputInitializer.h
deleted file mode 100644 (file)
index 866af05..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
-#define __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
-
-#include <tensorflow/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-class CopyInputInitializer
-{
-public:
-  CopyInputInitializer(::tflite::Interpreter &from) : _from{from}
-  {
-    // DO NOTHING
-  }
-
-  void run(::tflite::Interpreter &interp);
-
-private:
-  template <typename T> void setValue(::tflite::Interpreter &interp, int tensor_idx);
-
-private:
-  ::tflite::Interpreter &_from;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
index 1c35b34..2d30d41 100644 (file)
@@ -23,7 +23,7 @@
 #ifndef __NNFW_TFLITE_DIFF_H__
 #define __NNFW_TFLITE_DIFF_H__
 
-#include "tensorflow/lite/interpreter.h"
+#include "tflite/TensorView.h"
 
 #include "misc/RandomGenerator.h"
 #include "misc/tensor/Index.h"
@@ -31,7 +31,7 @@
 #include "misc/tensor/Shape.h"
 #include "misc/tensor/Comparator.h"
 
-#include "tflite/TensorView.h"
+#include <tensorflow/lite/c/c_api.h>
 
 #include <functional>
 #include <vector>
@@ -65,11 +65,11 @@ private:
 public:
   /**
    * @brief Run two interpreter and return the output matching
-   * @param[in] pure   Interpreter object of expected(with TfLite)
-   * @param[in] nnapi  Interpreter object of obtained(through NNAPI)
+   * @param[in] expected    Interpreter object of expected
+   * @param[in] obtained  Interpreter object of obtained
    * @return  @c true if two Interpreter results are same, otherwise @c false
    */
-  bool run(::tflite::Interpreter &pure, ::tflite::Interpreter &nnapi) const;
+  bool run(TfLiteInterpreter &expected, TfLiteInterpreter &obtained) const;
   /**
    * @brief Compare two TensorView values and return the match result
    * @param[in] expected  TensorView object to read expected values
diff --git a/runtime/libs/tflite/include/tflite/FeatureView.h b/runtime/libs/tflite/include/tflite/FeatureView.h
deleted file mode 100644 (file)
index a8f069c..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     FeatureView.h
- * @brief    This file contains FeatureView class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_FEATURE_VIEW_H__
-#define __NNFW_TFLITE_FEATURE_VIEW_H__
-
-#include "tensorflow/lite/interpreter.h"
-
-#include "tflite/InputIndex.h"
-#include "tflite/OutputIndex.h"
-
-#include "misc/feature/Shape.h"
-#include "misc/feature/Reader.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-template <typename T> class FeatureView;
-
-/**
- * @brief Class to support reading element of float type feature
- */
-template <> class FeatureView<float> : public nnfw::misc::feature::Reader<float>
-{
-public:
-  /**
-   * @brief     Construct a new FeatureView object
-   * @param[in] interp  Interpreter to read from
-   * @param[in] index   InputIndex index of input
-   */
-  FeatureView(::tflite::Interpreter &interp, const InputIndex &index);
-  /**
-   * @brief     Construct a new FeatureView object
-   * @param[in] interp  Interpreter to read from
-   * @param[in] index   OutputIndex index of output
-   */
-  FeatureView(::tflite::Interpreter &interp, const OutputIndex &index);
-
-public:
-  /**
-   * @brief     Get value of element using channel, row and column index
-   * @param[in] ch    Channel index
-   * @param[in] row   Row index
-   * @param[in] col   Column index
-   * @return    Value of element
-   */
-  float at(uint32_t ch, uint32_t row, uint32_t col) const;
-  /**
-   * @brief     Get reference of element using channel, row and column index
-   * @param[in] ch  Channel index
-   * @param[in] row Row index
-   * @param[in] col Column index
-   * @return    Reference of element
-   */
-  float &at(uint32_t ch, uint32_t row, uint32_t col);
-
-  float at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const = 0;
-
-private:
-  /**
-   * @brief     Get offset of element from channel, row and column index
-   * @param[in] ch  Channel index
-   * @param[in] row Row index
-   * @param[in] col Column index
-   * @return    Offset of element
-   */
-  uint32_t getElementOffset(uint32_t ch, uint32_t row, uint32_t col) const
-  {
-    uint32_t res = 0;
-
-    // TensorFlow Lite assumes that NHWC ordering for tessor
-    res += row * _shape.W * _shape.C;
-    res += col * _shape.C;
-    res += ch;
-
-    return res;
-  }
-
-private:
-  nnfw::misc::feature::Shape _shape;
-  float *_base;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_FEATURE_VIEW_H__
index deaf05a..8fc1949 100644 (file)
@@ -40,7 +40,7 @@ public:
    * @brief Construct a InterpreterSession object with interpreter of TfLite
    * @param[in] interp The TfLite interpreter pointer
    */
-  InterpreterSession(::tflite::Interpreter *interp) : _interp{interp}
+  InterpreterSession(TfLiteInterpreter *interp) : _interp{interp}
   {
     // DO NOTHING
   }
@@ -50,7 +50,7 @@ public:
    * @brief Get TfLite interpreter pointer
    * @return The TfLite interpreter
    */
-  ::tflite::Interpreter *interp(void) override { return _interp; }
+  TfLiteInterpreter *interp(void) override { return _interp; }
 
 public:
   /**
@@ -59,9 +59,7 @@ public:
    */
   bool prepare(void) override
   {
-    _interp->UseNNAPI(false);
-
-    if (kTfLiteOk != _interp->AllocateTensors())
+    if (kTfLiteOk != TfLiteInterpreterAllocateTensors(_interp))
     {
       return false;
     }
@@ -76,7 +74,7 @@ public:
   bool run(void) override
   {
     // Return true if Invoke returns kTfLiteOk
-    return kTfLiteOk == _interp->Invoke();
+    return kTfLiteOk == TfLiteInterpreterInvoke(_interp);
   }
 
   /**
@@ -90,7 +88,7 @@ public:
   }
 
 private:
-  ::tflite::Interpreter *const _interp;
+  TfLiteInterpreter *const _interp;
 };
 
 } // namespace tflite
diff --git a/runtime/libs/tflite/include/tflite/NNAPISession.h b/runtime/libs/tflite/include/tflite/NNAPISession.h
deleted file mode 100644 (file)
index 681dda8..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     NNAPISession.h
- * @brief    This file contains NNAPISession class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_NNAPI_SESSION_H__
-#define __NNFW_TFLITE_NNAPI_SESSION_H__
-
-#include "Session.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define NNAPI interpreter session which is inherited from Session class
- */
-class NNAPISession final : public Session
-{
-public:
-  /**
-   * @brief Construct a NNAPISession object with interpreter of TfLite
-   * @param[in] interp The TfLite interpreter pointer
-   * @note Invoke BuildGraph() of NNAPI delegate from Interpreter
-   */
-  NNAPISession(::tflite::Interpreter *interp) : _interp{interp}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Get TfLite interpreter pointer
-   * @return The TfLite interpreter
-   */
-  ::tflite::Interpreter *interp(void) override { return _interp; }
-
-public:
-  /**
-   * @brief Prepare the TfLite interpreter session
-   * @return @c true if tensor preparation is successful, otherwise @c false
-   */
-  bool prepare(void) override
-  {
-    // Explicitly turn off T/F lite internal NNAPI delegation in order to use locally defined
-    // NNAPI delegation.
-    _interp->UseNNAPI(true);
-
-    if (kTfLiteOk != _interp->AllocateTensors())
-    {
-      return false;
-    }
-
-    return true;
-  }
-
-  /**
-   * @brief Run the Invoke function of NNAPI delegate
-   * @return @c true if Invoke() is successful, otherwise @c false
-   */
-  bool run(void) override { return kTfLiteOk == _interp->Invoke(); }
-
-  /**
-   * @brief Tear down TfLite interpreter session
-   * @return @c true always
-   */
-  bool teardown(void) override
-  {
-    // DO NOTHING
-    return true;
-  }
-
-private:
-  ::tflite::Interpreter *const _interp;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_NNAPI_SESSION_H__
diff --git a/runtime/libs/tflite/include/tflite/OutputIndex.h b/runtime/libs/tflite/include/tflite/OutputIndex.h
deleted file mode 100644 (file)
index dd1ca8d..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     OutputIndex.h
- * @brief    This file contains OutputIndex class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_OUTPUT_INDEX_H__
-#define __NNFW_TFLITE_OUTPUT_INDEX_H__
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define OutputIndex
- */
-class OutputIndex
-{
-public:
-  /**
-   * @brief Construct a OutputIndex object with index value
-   * @param[in] index The value of index
-   */
-  OutputIndex(int index) : _index(index)
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Get index value as int
-   * @return Index valuel as int
-   */
-  int asInt(void) const { return _index; }
-
-private:
-  int _index;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_OUTPUT_INDEX_H__
diff --git a/runtime/libs/tflite/include/tflite/OutputResetter.h b/runtime/libs/tflite/include/tflite/OutputResetter.h
deleted file mode 100644 (file)
index 424068d..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_TFLITE_OUTPUT_RESETTER_H__
-#define __NNFW_TFLITE_OUTPUT_RESETTER_H__
-
-#include <tensorflow/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-class OutputResetter
-{
-public:
-  OutputResetter()
-  {
-    // DO NOTHING
-  }
-
-  void run(::tflite::Interpreter &interp);
-
-private:
-  template <typename T> void resetValue(::tflite::Interpreter &interp, int tensor_idx);
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_OUTPUT_RESETTER_H__
diff --git a/runtime/libs/tflite/include/tflite/Quantization.h b/runtime/libs/tflite/include/tflite/Quantization.h
deleted file mode 100644 (file)
index 8272bcd..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     Quantization.h
- * @brief    This file contains BitwiseIntToFloat union and quantization related
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_QUANTIZATION_H__
-#define __NNFW_TFLITE_QUANTIZATION_H__
-
-/**
- * @brief Union to provide bitwise conversion of integer and float
- */
-union BitwiseIntToFloat {
-  int i;
-  float f;
-};
-
-static const float FLOAT_NEAREST_TO_1 = BitwiseIntToFloat{0x3f7fffff}.f;
-
-#include "tensorflow/lite/context.h"
-
-/**
- * @brief   Get TfLiteQuantizationParams object with default values
- * @return  TfLiteQuantizationParams object
- */
-TfLiteQuantizationParams make_default_quantization(void);
-
-#endif // __NNFW_TFLITE_QUANTIZATION_H__
index 3c241a8..7dac3a8 100644 (file)
@@ -19,7 +19,7 @@
 
 #include <misc/RandomGenerator.h>
 
-#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/c/c_api.h>
 
 namespace nnfw
 {
@@ -34,10 +34,7 @@ public:
     // DO NOTHING
   }
 
-  void run(::tflite::Interpreter &interp);
-
-private:
-  template <typename T> void setValue(::tflite::Interpreter &interp, int tensor_idx);
+  void run(TfLiteInterpreter &interp);
 
 private:
   nnfw::misc::RandomGenerator &_randgen;
diff --git a/runtime/libs/tflite/include/tflite/RandomTestRunner.h b/runtime/libs/tflite/include/tflite/RandomTestRunner.h
deleted file mode 100644 (file)
index abbf3b2..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  RandomTestRunner.h
- * @brief This file contains class for random input testing
- */
-
-#ifndef __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
-#define __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
-
-#include "tflite/interp/Builder.h"
-
-#include <misc/RandomGenerator.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Structure for NNAPI correctness test
- */
-struct RandomTestParam
-{
-  int verbose;               //!< Verbosity of debug information
-  int tolerance;             //!< Torlerance of value difference
-  int tensor_logging = 0;    //!< Save logging to a file if not 0
-  std::string log_path = ""; //!< Path of log file, meaningful only when tensor_logging is 1
-};
-
-/**
- * @brief Class to define Random test runner
- */
-class RandomTestRunner
-{
-public:
-  /**
-   * @brief     Construct a new RandomTestRunner object
-   * @param[in] seed          Random seed value
-   * @param[in] param         RandomTestParam object for test runner
-   * @param[in] quantization  TfLiteQuantizationParams type to represent quantization value
-   */
-  RandomTestRunner(uint32_t seed, const RandomTestParam &param)
-    : _randgen{seed, 0.0f, 2.0f}, _param{param}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief     Run the random test runner
-   * @param[in] running_count  Count to run tflite interpreter with NNAPI
-   * @return    0 if test succeeds, otherwise failure
-   */
-  int run(size_t running_count);
-
-public:
-  /**
-   * @brief  Get RandomGenerator reference
-   * @return RandomGenerator reference
-   */
-  nnfw::misc::RandomGenerator &generator() { return _randgen; };
-
-public:
-  /**
-   * @brief     Compile the random test runner
-   * @param[in] builder  Interpreter Builder used to run
-   */
-  void compile(const nnfw::tflite::Builder &builder);
-
-private:
-  nnfw::misc::RandomGenerator _randgen;
-  const RandomTestParam _param;
-  std::unique_ptr<::tflite::Interpreter> _tfl_interp;
-  std::unique_ptr<::tflite::Interpreter> _nnapi;
-
-public:
-  /**
-   * @brief     Create a RandomTestRunner object
-   * @param[in] seed  Random seed value
-   * @return    RandomGenerator object
-   */
-  static RandomTestRunner make(uint32_t seed);
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
index b653acf..0aa2ce7 100644 (file)
@@ -23,7 +23,7 @@
 #ifndef __NNFW_TFLITE_SESSION_H__
 #define __NNFW_TFLITE_SESSION_H__
 
-#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/c/c_api.h>
 
 namespace nnfw
 {
@@ -44,7 +44,7 @@ struct Session
    * @brief   Get the Interpreter object pointer
    * @return  The Interpreter object pointer
    */
-  virtual ::tflite::Interpreter *interp(void) = 0;
+  virtual TfLiteInterpreter *interp(void) = 0;
 
   /**
    * @brief   Prepare the session
diff --git a/runtime/libs/tflite/include/tflite/TensorLogger.h b/runtime/libs/tflite/include/tflite/TensorLogger.h
deleted file mode 100644 (file)
index 0837dfc..0000000
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     TensorLogger.h
- * @brief    This file contains TensorLogger class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_LOGGER_H__
-#define __NNFW_TFLITE_TENSOR_LOGGER_H__
-
-#include "misc/tensor/IndexIterator.h"
-#include "tflite/TensorView.h"
-
-#include <tensorflow/lite/interpreter.h>
-#include <tensorflow/lite/context.h>
-#include <fstream>
-#include <iomanip>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to write input and output value / shape into a file in python form
- * @note This is a utility to write input and output value / shape into a file in python form.\n
- *       any python app can load this value by running the python code below:\n
- *       exec(open(filename).read())\n
- *       generated python code looks like the following: \n
- *       tensor_shape_gen = []\n
- *       tensor_value_gen = []\n\n
- *       tensor_shape_gen.append("{2, 1, 2}")\n
- *       tensor_value_gen.append([1, 2, 3, 4])\n\n
- *       tensor_shape_gen.append("{2}")\n
- *       tensor_value_gen.append([1, 2])\n\n
- *       tensor_shape_gen.append("{2, 1, 2}")\n
- *       tensor_value_gen.append([1, 4, 3, 8])\n
- */
-class TensorLogger
-{
-private:
-  std::ofstream _outfile;
-
-public:
-  /**
-   * @brief Get TensorLogger instance
-   * @return The TensorLogger instance
-   */
-  static TensorLogger &get()
-  {
-    static TensorLogger instance;
-    return instance;
-  }
-
-  /**
-   * @brief Save the tensor details to file from interpreter
-   * @param[in] path The file path to save
-   * @param[in] interp The TfLite interpreter
-   */
-  void save(const std::string &path, ::tflite::Interpreter &interp)
-  {
-    open(path);
-
-    int log_index = 0;
-    for (const auto id : interp.inputs())
-    {
-      _outfile << "# input tensors" << std::endl;
-      printTensor(interp, id, log_index++);
-    }
-    for (const auto id : interp.outputs())
-    {
-      _outfile << "# output tensors" << std::endl;
-      printTensor(interp, id, log_index++);
-    }
-    close();
-  }
-
-private:
-  void open(const std::string &path)
-  {
-    if (!_outfile.is_open())
-      _outfile.open(path, std::ios_base::out);
-
-    _outfile << "# ------ file: " << path << " ------" << std::endl
-             << "tensor_shape_gen = []" << std::endl
-             << "tensor_value_gen = []" << std::endl
-             << std::endl;
-  }
-
-  void printTensor(::tflite::Interpreter &interp, const int id, const int log_index)
-  {
-    const TfLiteTensor *tensor = interp.tensor(id);
-
-    _outfile << "# tensor name: " << tensor->name << std::endl;
-    _outfile << "# tflite::interpreter.tensor(" << id << ") -> tensor_value_gen[" << log_index
-             << "]" << std::endl;
-
-    if (tensor->type == kTfLiteInt32)
-    {
-      printTensorShape(tensor);
-      printTensorValue<int32_t>(tensor, tensor->data.i32);
-    }
-    else if (interp.tensor(id)->type == kTfLiteUInt8)
-    {
-      printTensorShape(tensor);
-      printTensorValue<uint8_t>(tensor, tensor->data.uint8);
-    }
-    else if (tensor->type == kTfLiteFloat32)
-    {
-      printTensorShape(tensor);
-      printTensorValue<float>(tensor, tensor->data.f);
-    }
-  }
-
-  void printTensorShape(const TfLiteTensor *tensor)
-  {
-    _outfile << "tensor_shape_gen.append('{";
-
-    int r = 0;
-    for (; r < tensor->dims->size - 1; r++)
-    {
-      _outfile << tensor->dims->data[r] << ", ";
-    }
-    _outfile << tensor->dims->data[r];
-
-    _outfile << "}')" << std::endl;
-  }
-
-  template <typename T> void printTensorValue(const TfLiteTensor *tensor, T *tensor_data_ptr)
-  {
-    _outfile << "tensor_value_gen.append([";
-
-    _outfile << std::fixed << std::setprecision(10);
-
-    const T *end = reinterpret_cast<const T *>(tensor->data.raw_const + tensor->bytes);
-    for (T *ptr = tensor_data_ptr; ptr < end; ptr++)
-      _outfile << *ptr << ", ";
-
-    _outfile << "])" << std::endl << std::endl;
-  }
-
-  void close()
-  {
-    _outfile << "# --------- tensor shape and value defined above ---------" << std::endl;
-    _outfile.close();
-  }
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_LOGGER_H__
diff --git a/runtime/libs/tflite/include/tflite/TensorUtils.h b/runtime/libs/tflite/include/tflite/TensorUtils.h
deleted file mode 100644 (file)
index 08af146..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file    TensorUtils.h
- * @brief   This file contains utilities function
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_UTILS_H__
-#define __NNFW_TFLITE_TENSOR_UTILS_H__
-
-#include <tensorflow/lite/context.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Get @c true if tensor type is kTfLiteFloat32, otherwise @c false
- * @param[in] tensor The tensor object to be compared
- * @return @c true if tensor type is kTfLiteFloat32, otherwise @c false
- */
-inline bool isFloatTensor(const TfLiteTensor *tensor) { return tensor->type == kTfLiteFloat32; }
-
-/**
- * @brief Get @c true if tensor is 4-D tensor and the first dimension length is 1,
- *        otherwise @c false
- * @param[in] tensor The tensor object to be compared
- * @return @c true if tensor is 4-D tensor and the first dimension length is 1, otherwise @c false
- */
-inline bool isFeatureTensor(const TfLiteTensor *tensor)
-{
-  return (tensor->dims->size == 4) && (tensor->dims->data[0] == 1);
-}
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_UTILS_H__
index ce791a7..956fce4 100644 (file)
 #ifndef __NNFW_TFLITE_TENSOR_VIEW_H__
 #define __NNFW_TFLITE_TENSOR_VIEW_H__
 
-#include "tensorflow/lite/interpreter.h"
-
 #include "misc/tensor/Shape.h"
 #include "misc/tensor/Index.h"
 #include "misc/tensor/Reader.h"
 #include "misc/tensor/NonIncreasingStride.h"
 
+#include <tensorflow/lite/c/c_api.h>
+
 namespace nnfw
 {
 namespace tflite
@@ -98,19 +98,17 @@ public:
    * @param[in] tensor_index The tensor index
    * @return The new TensorView<T> object
    */
-  static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index)
+  static TensorView<T> make(const TfLiteTensor *tensor)
   {
-    auto tensor_ptr = interp.tensor(tensor_index);
-
     // Set 'shape'
-    nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size);
+    nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
 
     for (uint32_t axis = 0; axis < shape.rank(); ++axis)
     {
-      shape.dim(axis) = tensor_ptr->dims->data[axis];
+      shape.dim(axis) = TfLiteTensorDim(tensor, axis);
     }
 
-    return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index));
+    return TensorView<T>(shape, reinterpret_cast<T *>(TfLiteTensorData(tensor)));
   }
 };
 
diff --git a/runtime/libs/tflite/include/tflite/interp/Builder.h b/runtime/libs/tflite/include/tflite/interp/Builder.h
deleted file mode 100644 (file)
index 0f54e17..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     Builder.h
- * @brief    This file contains Builder structure
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_BUILDER_H__
-
-#include <tensorflow/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Structure to Builder
- */
-struct Builder
-{
-  /**
-   * @brief Destroy the Builder object
-   */
-  virtual ~Builder() = default;
-
-  /**
-   * @brief Build a FlatBuffer model
-   * @return The TfLite interpreter object
-   */
-  virtual std::unique_ptr<::tflite::Interpreter> build(void) const = 0;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_BUILDER_H__
diff --git a/runtime/libs/tflite/include/tflite/interp/FlatBufferBuilder.h b/runtime/libs/tflite/include/tflite/interp/FlatBufferBuilder.h
deleted file mode 100644 (file)
index 2d96af5..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     FlatBufferBuilder.h
- * @brief    This file contains FlatBufferBuilder class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
-
-#include <tensorflow/lite/model.h>
-
-#include "tflite/interp/Builder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define FlatBufferBuilder which is inherited from Builder
- */
-class FlatBufferBuilder final : public Builder
-{
-public:
-  /**
-   * @brief Construct a FlatBufferBuilder object with FlatBufferModel of TfLite
-   * @param[in] model The TfLite Flatbuffer model
-   */
-  FlatBufferBuilder(const ::tflite::FlatBufferModel &model) : _model{model}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Build a FlatBuffer model
-   * @return The TfLite interpreter pointer address
-   */
-  std::unique_ptr<::tflite::Interpreter> build(void) const override;
-
-private:
-  const ::tflite::FlatBufferModel &_model;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
diff --git a/runtime/libs/tflite/include/tflite/interp/FunctionBuilder.h b/runtime/libs/tflite/include/tflite/interp/FunctionBuilder.h
deleted file mode 100644 (file)
index 7bfb8db..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     FunctionBuilder.h
- * @brief    This file contains FunctionBuilder class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
-
-#include <tensorflow/lite/model.h>
-
-#include "tflite/interp/Builder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define FunctionBuilder which is inherited from Builder
- */
-class FunctionBuilder final : public Builder
-{
-public:
-  using SetupFunc = std::function<void(::tflite::Interpreter &)>;
-
-public:
-  /**
-   * @brief Construct a FunctionBuilder object with SetupFunction
-   * @param[in] fn The SetupFunc object
-   */
-  FunctionBuilder(const SetupFunc &fn) : _fn{fn}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Build a SetupFunc
-   * @return The TfLite interpreter pointer address
-   */
-  std::unique_ptr<::tflite::Interpreter> build(void) const override;
-
-private:
-  SetupFunc _fn;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
diff --git a/runtime/libs/tflite/src/CopyInputInitializer.cpp b/runtime/libs/tflite/src/CopyInputInitializer.cpp
deleted file mode 100644 (file)
index 1950dad..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/CopyInputInitializer.h"
-#include "tflite/TensorView.h"
-
-#include <misc/tensor/IndexIterator.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-void CopyInputInitializer::run(::tflite::Interpreter &interp)
-{
-  for (const auto &tensor_idx : interp.inputs())
-  {
-    TfLiteTensor *tensor = interp.tensor(tensor_idx);
-    switch (tensor->type)
-    {
-      case kTfLiteInt32:
-        setValue<int32_t>(interp, tensor_idx);
-        break;
-      case kTfLiteUInt8:
-        setValue<uint8_t>(interp, tensor_idx);
-        break;
-      case kTfLiteInt8:
-        setValue<int8_t>(interp, tensor_idx);
-        break;
-      case kTfLiteBool:
-        setValue<bool>(interp, tensor_idx);
-        break;
-      case kTfLiteFloat32:
-        setValue<float>(interp, tensor_idx);
-        break;
-      default:
-        throw std::runtime_error{"Not supported input type"};
-    }
-  }
-}
-
-template <typename T>
-void CopyInputInitializer::setValue(::tflite::Interpreter &interp, int tensor_idx)
-{
-  auto tensor_from_view = nnfw::tflite::TensorView<T>::make(_from, tensor_idx);
-  auto tensor_to_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
-
-  nnfw::misc::tensor::iterate(tensor_from_view.shape())
-    << [&](const nnfw::misc::tensor::Index &ind) {
-         tensor_to_view.at(ind) = tensor_from_view.at(ind);
-       };
-}
-
-} // namespace tflite
-} // namespace nnfw
index 2d2b66e..8165798 100644 (file)
@@ -22,6 +22,8 @@
 #include "misc/tensor/Zipper.h"
 #include "misc/tensor/Comparator.h"
 
+#include <tensorflow/lite/c/c_api.h>
+
 #include <iostream>
 #include <cassert>
 
@@ -190,53 +192,57 @@ bool TfLiteInterpMatchApp::compareSingleTensorView<float>(
 
 #include <map>
 
-bool TfLiteInterpMatchApp::run(::tflite::Interpreter &interp, ::tflite::Interpreter &nnapi) const
+bool TfLiteInterpMatchApp::run(TfLiteInterpreter &expected, TfLiteInterpreter &obtained) const
 {
-  assert(interp.outputs() == nnapi.outputs());
+  auto output_count = TfLiteInterpreterGetOutputTensorCount(&expected);
+  assert(output_count == TfLiteInterpreterGetOutputTensorCount(&obtained));
 
   bool all_matched = true;
 
-  using Comparator = std::function<bool(int id, ::tflite::Interpreter &, ::tflite::Interpreter &)>;
+  using Comparator = std::function<bool(int32_t, const TfLiteTensor *, const TfLiteTensor *)>;
 
   std::map<TfLiteType, Comparator> comparators;
 
-  comparators[kTfLiteUInt8] = [this](int id, ::tflite::Interpreter &interp,
-                                     ::tflite::Interpreter &nnapi) {
-    const auto expected = nnfw::tflite::TensorView<uint8_t>::make(interp, id);
-    const auto obtained = nnfw::tflite::TensorView<uint8_t>::make(nnapi, id);
+  comparators[kTfLiteUInt8] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+                                     const TfLiteTensor *obtained_tensor) {
+    const auto expected_view = nnfw::tflite::TensorView<uint8_t>::make(expected_tensor);
+    const auto obtained_view = nnfw::tflite::TensorView<uint8_t>::make(obtained_tensor);
 
-    return compareSingleTensorView(expected, obtained, id);
+    return compareSingleTensorView(expected_view, obtained_view, id);
   };
 
-  comparators[kTfLiteInt32] = [this](int id, ::tflite::Interpreter &interp,
-                                     ::tflite::Interpreter &nnapi) {
-    const auto expected = nnfw::tflite::TensorView<int32_t>::make(interp, id);
-    const auto obtained = nnfw::tflite::TensorView<int32_t>::make(nnapi, id);
+  comparators[kTfLiteInt32] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+                                     const TfLiteTensor *obtained_tensor) {
+    const auto expected_view = nnfw::tflite::TensorView<int32_t>::make(expected_tensor);
+    const auto obtained_view = nnfw::tflite::TensorView<int32_t>::make(obtained_tensor);
 
-    return compareSingleTensorView(expected, obtained, id);
+    return compareSingleTensorView(expected_view, obtained_view, id);
   };
 
-  comparators[kTfLiteFloat32] = [this](int id, ::tflite::Interpreter &interp,
-                                       ::tflite::Interpreter &nnapi) {
-    const auto expected = nnfw::tflite::TensorView<float>::make(interp, id);
-    const auto obtained = nnfw::tflite::TensorView<float>::make(nnapi, id);
+  comparators[kTfLiteFloat32] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+                                       const TfLiteTensor *obtained_tensor) {
+    const auto expected_view = nnfw::tflite::TensorView<float>::make(expected_tensor);
+    const auto obtained_view = nnfw::tflite::TensorView<float>::make(obtained_tensor);
 
-    return compareSingleTensorView(expected, obtained, id);
+    return compareSingleTensorView(expected_view, obtained_view, id);
   };
 
-  comparators[kTfLiteBool] = [this](int id, ::tflite::Interpreter &interp,
-                                    ::tflite::Interpreter &nnapi) {
-    const auto expected = nnfw::tflite::TensorView<bool>::make(interp, id);
-    const auto obtained = nnfw::tflite::TensorView<bool>::make(nnapi, id);
+  comparators[kTfLiteBool] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+                                    const TfLiteTensor *obtained_tensor) {
+    const auto expected_view = nnfw::tflite::TensorView<bool>::make(expected_tensor);
+    const auto obtained_view = nnfw::tflite::TensorView<bool>::make(obtained_tensor);
 
-    return compareSingleTensorView(expected, obtained, id);
+    return compareSingleTensorView(expected_view, obtained_view, id);
   };
 
-  for (const auto &id : interp.outputs())
+  for (int32_t idx = 0; idx < output_count; idx++)
   {
-    assert(interp.tensor(id)->type == nnapi.tensor(id)->type);
+    auto const expected_tensor = TfLiteInterpreterGetOutputTensor(&expected, idx);
+    auto const obtained_tensor = TfLiteInterpreterGetOutputTensor(&obtained, idx);
+    auto const tensor_type = TfLiteTensorType(expected_tensor);
+    assert(tensor_type == TfLiteTensorType(obtained_tensor));
 
-    auto it = comparators.find(interp.tensor(id)->type);
+    auto it = comparators.find(tensor_type);
 
     if (it == comparators.end())
     {
@@ -245,7 +251,7 @@ bool TfLiteInterpMatchApp::run(::tflite::Interpreter &interp, ::tflite::Interpre
 
     const auto &comparator = it->second;
 
-    if (!comparator(id, interp, nnapi))
+    if (!comparator(idx, expected_tensor, obtained_tensor))
     {
       all_matched = false;
     }
diff --git a/runtime/libs/tflite/src/FeatureView.cpp b/runtime/libs/tflite/src/FeatureView.cpp
deleted file mode 100644 (file)
index fdf5a4b..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/FeatureView.h"
-#include "tflite/TensorUtils.h"
-
-#include <cassert>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-nnfw::misc::feature::Shape getFeatureShape(const TfLiteTensor *tensor)
-{
-  nnfw::misc::feature::Shape shape{tensor->dims->data[3], tensor->dims->data[1],
-                                   tensor->dims->data[2]};
-
-  return shape;
-}
-
-FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const InputIndex &index)
-{
-  const auto tensor_index = interp.inputs().at(index.asInt());
-  auto tensor_ptr = interp.tensor(tensor_index);
-
-  assert(isFloatTensor(tensor_ptr));
-  assert(isFeatureTensor(tensor_ptr));
-
-  _shape = getFeatureShape(tensor_ptr);
-  _base = interp.typed_tensor<float>(tensor_index);
-}
-
-FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const OutputIndex &index)
-{
-  const auto tensor_index = interp.outputs().at(index.asInt());
-  auto tensor_ptr = interp.tensor(tensor_index);
-
-  assert(isFloatTensor(tensor_ptr));
-  assert(isFeatureTensor(tensor_ptr));
-
-  _shape = getFeatureShape(tensor_ptr);
-  _base = interp.typed_tensor<float>(tensor_index);
-}
-
-float FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col) const
-{
-  return *(_base + getElementOffset(ch, row, col));
-}
-
-float &FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col)
-{
-  return *(_base + getElementOffset(ch, row, col));
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/src/OutputResetter.cpp b/runtime/libs/tflite/src/OutputResetter.cpp
deleted file mode 100644 (file)
index 486bb40..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/OutputResetter.h"
-#include "tflite/TensorView.h"
-
-#include <misc/tensor/IndexIterator.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-void OutputResetter::run(::tflite::Interpreter &interp)
-{
-  for (const auto &tensor_idx : interp.outputs())
-  {
-    TfLiteTensor *tensor = interp.tensor(tensor_idx);
-    switch (tensor->type)
-    {
-      case kTfLiteInt32:
-        resetValue<int32_t>(interp, tensor_idx);
-        break;
-      case kTfLiteUInt8:
-        resetValue<uint8_t>(interp, tensor_idx);
-        break;
-      case kTfLiteInt8:
-        resetValue<int8_t>(interp, tensor_idx);
-        break;
-      case kTfLiteBool:
-        resetValue<bool>(interp, tensor_idx);
-        break;
-      case kTfLiteFloat32:
-        resetValue<float>(interp, tensor_idx);
-        break;
-      default:
-        throw std::runtime_error{"Not supported output type"};
-    }
-  }
-}
-
-template <typename T> void OutputResetter::resetValue(::tflite::Interpreter &interp, int tensor_idx)
-{
-  auto tensor_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
-
-  nnfw::misc::tensor::iterate(tensor_view.shape())
-    << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = 0; };
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/src/Quantization.cpp b/runtime/libs/tflite/src/Quantization.cpp
deleted file mode 100644 (file)
index 9c162c3..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/Quantization.h"
-
-TfLiteQuantizationParams make_default_quantization(void)
-{
-  return TfLiteQuantizationParams{0.0f, 0};
-}
index 57dd7f6..9ed90f3 100644 (file)
@@ -23,28 +23,43 @@ namespace nnfw
 {
 namespace tflite
 {
+namespace
+{
+
+template <typename T>
+void setValue(nnfw::misc::RandomGenerator &randgen, const TfLiteTensor *tensor)
+{
+  auto tensor_view = nnfw::tflite::TensorView<T>::make(tensor);
+
+  nnfw::misc::tensor::iterate(tensor_view.shape())
+    << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = randgen.generate<T>(); };
+}
 
-void RandomInputInitializer::run(::tflite::Interpreter &interp)
+} // namespace
+
+void RandomInputInitializer::run(TfLiteInterpreter &interp)
 {
-  for (const auto &tensor_idx : interp.inputs())
+  const auto input_count = TfLiteInterpreterGetInputTensorCount(&interp);
+  for (int32_t idx = 0; idx < input_count; idx++)
   {
-    TfLiteTensor *tensor = interp.tensor(tensor_idx);
-    switch (tensor->type)
+    auto tensor = TfLiteInterpreterGetInputTensor(&interp, idx);
+    auto const tensor_type = TfLiteTensorType(tensor);
+    switch (tensor_type)
     {
       case kTfLiteFloat32:
-        setValue<float>(interp, tensor_idx);
+        setValue<float>(_randgen, tensor);
         break;
       case kTfLiteInt32:
-        setValue<int32_t>(interp, tensor_idx);
+        setValue<int32_t>(_randgen, tensor);
         break;
       case kTfLiteUInt8:
-        setValue<uint8_t>(interp, tensor_idx);
+        setValue<uint8_t>(_randgen, tensor);
         break;
       case kTfLiteBool:
-        setValue<bool>(interp, tensor_idx);
+        setValue<bool>(_randgen, tensor);
         break;
       case kTfLiteInt8:
-        setValue<int8_t>(interp, tensor_idx);
+        setValue<int8_t>(_randgen, tensor);
         break;
       default:
         throw std::runtime_error{"Not supported input type"};
@@ -52,14 +67,5 @@ void RandomInputInitializer::run(::tflite::Interpreter &interp)
   }
 }
 
-template <typename T>
-void RandomInputInitializer::setValue(::tflite::Interpreter &interp, int tensor_idx)
-{
-  auto tensor_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
-
-  nnfw::misc::tensor::iterate(tensor_view.shape())
-    << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = _randgen.generate<T>(); };
-}
-
 } // namespace tflite
 } // namespace nnfw
diff --git a/runtime/libs/tflite/src/RandomTestRunner.cpp b/runtime/libs/tflite/src/RandomTestRunner.cpp
deleted file mode 100644 (file)
index 48e7f11..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/CopyInputInitializer.h"
-#include "tflite/OutputResetter.h"
-#include "tflite/RandomInputInitializer.h"
-#include "tflite/RandomTestRunner.h"
-#include "tflite/Diff.h"
-#include "tflite/TensorLogger.h"
-
-#include <misc/tensor/IndexIterator.h>
-#include <misc/tensor/Object.h>
-#include <misc/EnvVar.h>
-#include <misc/fp32.h>
-
-#include <cassert>
-#include <map>
-#include <functional>
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-using namespace std::placeholders;
-
-void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
-{
-  _tfl_interp = builder.build();
-  _nnapi = builder.build();
-
-  _tfl_interp->UseNNAPI(false);
-  _nnapi->UseNNAPI(true);
-
-  // Allocate Tensors
-  _tfl_interp->AllocateTensors();
-  _nnapi->AllocateTensors();
-}
-
-int RandomTestRunner::run(size_t running_count)
-{
-  assert(_tfl_interp->inputs() == _nnapi->inputs());
-  assert(_tfl_interp->outputs() == _nnapi->outputs());
-
-  nnfw::tflite::OutputResetter resetter;
-  resetter.run(*(_tfl_interp.get()));
-
-  RandomInputInitializer initializer{_randgen};
-  initializer.run(*(_tfl_interp.get()));
-
-  std::cout << "[NNAPI TEST] Run T/F Lite Interpreter without NNAPI" << std::endl;
-  _tfl_interp->Invoke();
-
-  for (size_t i = 1; i <= running_count; ++i)
-  {
-    resetter.run(*(_nnapi.get()));
-
-    CopyInputInitializer copy_initializer{*(_tfl_interp.get())};
-    copy_initializer.run(*(_nnapi.get()));
-
-    std::cout << "[NNAPI TEST #" << i << "] Run T/F Lite Interpreter with NNAPI" << std::endl;
-
-    if (_nnapi->Invoke() != kTfLiteOk)
-    {
-      throw std::runtime_error{"Failed to Run T/F Lite Interpreter with NNAPI"};
-    }
-
-    // Compare OFM
-    std::cout << "[NNAPI TEST #" << i << "] Compare the result" << std::endl;
-
-    const auto tolerance = _param.tolerance;
-
-    auto equals = [tolerance](float lhs, float rhs) {
-      // NOTE Hybrid approach
-      // TODO Allow users to set tolerance for absolute_epsilon_equal
-      if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
-      {
-        return true;
-      }
-
-      return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance);
-    };
-
-    nnfw::misc::tensor::Comparator comparator(equals);
-    TfLiteInterpMatchApp app(comparator);
-
-    app.verbose() = _param.verbose;
-
-    bool res = app.run(*_tfl_interp, *_nnapi);
-
-    if (!res)
-    {
-      return 255;
-    }
-
-    std::cout << "[NNAPI TEST #" << i << "] PASSED" << std::endl << std::endl;
-
-    if (_param.tensor_logging)
-      nnfw::tflite::TensorLogger::get().save(_param.log_path, *_tfl_interp);
-  }
-
-  return 0;
-}
-
-RandomTestRunner RandomTestRunner::make(uint32_t seed)
-{
-  RandomTestParam param;
-
-  param.verbose = nnfw::misc::EnvVar("VERBOSE").asInt(0);
-  param.tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
-  param.tensor_logging = nnfw::misc::EnvVar("TENSOR_LOGGING").asBool(false);
-  param.log_path = nnfw::misc::EnvVar("TENSOR_LOGGING").asString("tensor_log.txt");
-
-  return RandomTestRunner{seed, param};
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/src/interp/FlatBufferBuilder.cpp b/runtime/libs/tflite/src/interp/FlatBufferBuilder.cpp
deleted file mode 100644 (file)
index 70dbb27..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/interp/FlatBufferBuilder.h"
-
-#include <tensorflow/lite/kernels/register.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-std::unique_ptr<::tflite::Interpreter> FlatBufferBuilder::build(void) const
-{
-  std::unique_ptr<::tflite::Interpreter> interpreter;
-
-  ::tflite::ops::builtin::BuiltinOpResolver resolver;
-
-  ::tflite::InterpreterBuilder builder(_model, resolver);
-
-  builder(&interpreter);
-
-  return interpreter;
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/runtime/libs/tflite/src/interp/FunctionBuilder.cpp b/runtime/libs/tflite/src/interp/FunctionBuilder.cpp
deleted file mode 100644 (file)
index 599a4f3..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/interp/FunctionBuilder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-std::unique_ptr<::tflite::Interpreter> FunctionBuilder::build(void) const
-{
-  auto res = std::unique_ptr<::tflite::Interpreter>{new ::tflite::Interpreter};
-
-  _fn(*res);
-
-  return res;
-}
-
-} // namespace tflite
-} // namespace nnfw
index 2fbb96f..be30ee2 100644 (file)
@@ -21,6 +21,6 @@
  * NNFW_VERSION is a uint32 value representing nnfw runtime version
  * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
  */
-#define NNFW_VERSION 0x01001500
+#define NNFW_VERSION 0x01001600
 
 #endif // __NNFW_VERSION_H__
index 9b43dd3..8eedb53 100644 (file)
@@ -16,7 +16,7 @@
 
 #include "nnfw_api_internal.h"
 #include "CustomKernelRegistry.h"
-#include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
 #include "util/ConfigSource.h"
 #include "util/Exceptions.h"
 #include "util/logging.h"
@@ -208,29 +208,24 @@ NNFW_STATUS nnfw_session::create(nnfw_session **session)
 {
   if (session == nullptr)
     return NNFW_STATUS_UNEXPECTED_NULL;
-
-  // Create session
-  *session = new (std::nothrow) nnfw_session();
-  if (*session == nullptr)
+  try
   {
-    std::cerr << "Error during session creation" << std::endl;
-    return NNFW_STATUS_OUT_OF_MEMORY;
+    auto new_session = std::unique_ptr<nnfw_session>(new nnfw_session());
+    new_session->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+    *session = new_session.release();
   }
-
-  // Initialize fields
-  try
+  catch (const std::bad_alloc &e)
   {
-    (*session)->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+    std::cerr << "Error during session creation" << std::endl;
+    *session = nullptr; // Set nullptr on error to keep the old behavior
+    return NNFW_STATUS_OUT_OF_MEMORY;
   }
   catch (const std::exception &e)
   {
     std::cerr << "Error during session initialization : " << e.what() << std::endl;
-    delete *session;
-    *session = nullptr;
-
+    *session = nullptr; // Set nullptr on error to keep the old behavior
     return NNFW_STATUS_ERROR;
   }
-
   return NNFW_STATUS_NO_ERROR;
 }
 
@@ -331,7 +326,6 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
     std::string manifest_file_name = package_path + "/metadata/MANIFEST";
     std::ifstream mfs(manifest_file_name);
 
-    _package_file_path = package_path;
     // extract the filename of the first(index 0) model
     // e.g. In MANIFEST file, { "models" : [ "firstmodel.tflite", "2nd.tflite" ] }
     Json::Value root;
@@ -351,7 +345,14 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
       }
     }
     _nnpkg = std::make_shared<onert::ir::NNPkg>();
-    for (uint32_t i = 0; i < models.size(); ++i)
+    auto num_models = models.size();
+    if (num_models == 0 || (num_models - 1) > onert::ir::ModelIndex::max())
+    {
+      std::cerr << "Invalid model size - " << std::to_string(num_models) << std::endl;
+      return NNFW_STATUS_ERROR;
+    }
+
+    for (uint16_t i = 0; i < num_models; ++i)
     {
       auto model_file_path = package_path + std::string("/") + models[i].asString();
       auto model_type = model_types[i].asString();
@@ -390,6 +391,8 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
       for (uint32_t j = 0; j < tos.size(); ++j)
         _nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString()));
     }
+
+    _nnpkg->verify();
     _state = State::MODEL_LOADED;
   }
   catch (const std::exception &e)
@@ -420,14 +423,7 @@ NNFW_STATUS nnfw_session::prepare()
 
   try
   {
-    // TODO: Compile all models in case of multiple models
-    if (_nnpkg->model_count() > 2)
-    {
-      std::cerr << "Error during model prepare : more than 3 multiple models are not supported yet."
-                << std::endl;
-      return NNFW_STATUS_ERROR;
-    }
-    auto compiler = std::make_unique<onert::compiler::Compiler>(_nnpkg, _coptions);
+    auto compiler = onert::compiler::CompilerFactory::get().create(_nnpkg, _coptions);
     _nnpkg.reset();
     _compiler_artifact = compiler->compile();
     _execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors);
@@ -442,50 +438,10 @@ NNFW_STATUS nnfw_session::prepare()
   return NNFW_STATUS_NO_ERROR;
 }
 
-NNFW_STATUS nnfw_session::prepare_pipeline(const char *map_file_path)
+NNFW_STATUS nnfw_session::prepare_pipeline(const char *)
 {
-  // NOTE. If users want to run prepare_pipeline() more than one time, this could be removed.
-  if (!isStateModelLoaded())
-  {
-    std::cerr << "Error during model prepare pipeline : ";
-    if (isStateInitialized())
-    {
-      std::cerr << "prepare_pipeline should be run once";
-    }
-    else
-    {
-      std::cerr << "invalid state";
-    }
-    std::cerr << std::endl;
-    return NNFW_STATUS_INVALID_STATE;
-  }
-
-  try
-  {
-    auto model = _nnpkg->primary_model();
-    auto compiler = std::make_unique<onert::compiler::Compiler>(model, *_coptions[0]);
-    _nnpkg.reset();
-    auto artifacts = compiler->compile(_package_file_path.c_str(), map_file_path);
-
-    for (auto it = artifacts.begin(); it != artifacts.end(); ++it)
-    {
-      _executions.push_back(std::make_shared<onert::exec::Execution>(it->get()->_executors));
-    }
-    make_dependency();
-    _threads.resize(_executions.size());
-    for (uint32_t i = 0; i < _threads.size(); i++)
-    {
-      _threads[i] = std::thread(&onert::exec::Execution::runInference, _executions[i].get());
-    }
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "Error during model prepare : " << e.what() << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
-  _state = State::PREPARED;
-  return NNFW_STATUS_NO_ERROR;
+  std::cerr << "Pipeline prepare_pipeline: deprecated feature " << std::endl;
+  return NNFW_STATUS_ERROR;
 }
 
 NNFW_STATUS nnfw_session::run()
@@ -497,12 +453,6 @@ NNFW_STATUS nnfw_session::run()
     return NNFW_STATUS_INVALID_STATE;
   }
 
-  if (!_executions.empty())
-  {
-    std::cerr << "Error during nnfw_session::run : not supported for pipeline run" << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
   try
   {
     _execution->execute();
@@ -532,13 +482,6 @@ NNFW_STATUS nnfw_session::run_async()
     return NNFW_STATUS_INVALID_STATE;
   }
 
-  if (!_executions.empty())
-  {
-    std::cerr << "Error during nnfw_session::run_async : not supported for pipeline run"
-              << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
   _execution->startExecute();
 
   _state = State::RUNNING;
@@ -554,12 +497,6 @@ NNFW_STATUS nnfw_session::await()
     return NNFW_STATUS_ERROR;
   }
 
-  if (!_executions.empty())
-  {
-    std::cerr << "Error during nnfw_session::await : not supported for pipeline run" << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
   _execution->waitFinish();
 
   _state = State::FINISHED_RUN;
@@ -583,13 +520,6 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
     return NNFW_STATUS_ERROR;
   }
 
-  if (!_executions.empty())
-  {
-    std::cerr << "Error during nnfw_session::set_input : not supported for pipeline run"
-              << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
   try
   {
     _execution->setInput(onert::ir::IOIndex(index), buffer, length);
@@ -619,13 +549,6 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b
     return NNFW_STATUS_ERROR;
   }
 
-  if (!_executions.empty())
-  {
-    std::cerr << "Error during nnfw_session::set_output : not supported for pipeline run"
-              << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
   try
   {
     _execution->setOutput(onert::ir::IOIndex(index), buffer, length);
@@ -650,7 +573,7 @@ NNFW_STATUS nnfw_session::input_size(uint32_t *number)
       std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl;
       return NNFW_STATUS_UNEXPECTED_NULL;
     }
-    *number = primary_subgraph()->getInputs().size();
+    *number = getInputSize();
   }
   catch (const std::exception &e)
   {
@@ -672,7 +595,7 @@ NNFW_STATUS nnfw_session::output_size(uint32_t *number)
       std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl;
       return NNFW_STATUS_UNEXPECTED_NULL;
     }
-    *number = primary_subgraph()->getOutputs().size();
+    *number = getOutputSize();
   }
   catch (const std::exception &e)
   {
@@ -684,6 +607,13 @@ NNFW_STATUS nnfw_session::output_size(uint32_t *number)
 
 NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
 {
+  if (!isStatePreparedOrFinishedRun())
+  {
+    std::cerr << "Error during nnfw_session::set_input_layout : "
+              << "run should be run after prepare" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
   try
   {
     if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
@@ -692,14 +622,8 @@ NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
       std::cerr << "Error during nnfw_session::set_input_layout, not supported layout" << std::endl;
       return NNFW_STATUS_ERROR;
     }
-    if (_execution)
-    {
-      _execution->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
-    }
-    else
-    {
-      _executions.at(0)->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
-    }
+
+    _execution->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
   }
   catch (const std::exception &e)
   {
@@ -711,6 +635,13 @@ NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
 
 NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
 {
+  if (!isStatePreparedOrFinishedRun())
+  {
+    std::cerr << "Error during nnfw_session::set_output_layout : "
+              << "run should be run after prepare" << std::endl;
+    return NNFW_STATUS_INVALID_STATE;
+  }
+
   try
   {
     if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
@@ -720,15 +651,8 @@ NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
                 << std::endl;
       return NNFW_STATUS_ERROR;
     }
-    if (_execution)
-    {
-      _execution->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
-    }
-    else
-    {
-      _executions.at(_executions.size() - 1)
-        ->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
-    }
+
+    _execution->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
   }
   catch (const std::exception &e)
   {
@@ -771,27 +695,13 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
 
   if (!isStatePreparedOrFinishedRun())
   {
-    // In this case, if we apply input shape in primary_subgraph, it will propagate after
-    // compilation and excution
-    auto model = _nnpkg->primary_model();
-    auto primary_subgraph = model->primary_subgraph();
-    auto ind = primary_subgraph->getInputs().at(index);
-    auto &input = primary_subgraph->operands().at(ind);
 
-    // overwrite input shape with the shape from ti
-    input.info().shape(new_shape);
+    // In this case, if we apply input shape, it will propagate after compilation and excution
+    auto &info = _nnpkg->inputInfo(index);
+    info.shape(new_shape);
   }
   else // when called after nnfw_session::prepare()
-  {
-    if (_execution)
-    {
-      _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
-    }
-    else
-    {
-      _executions.at(0)->changeInputShape(onert::ir::IOIndex(index), new_shape);
-    }
-  }
+    _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
 
   return NNFW_STATUS_NO_ERROR;
 }
@@ -815,21 +725,26 @@ NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
                 << std::endl;
       return NNFW_STATUS_UNEXPECTED_NULL;
     }
-    if (index >= primary_subgraph()->getInputs().size())
+
+    if (index >= getInputSize())
     {
       std::cerr << "Error during nnfw_session::input_tensorinfo, index is out of range."
                 << std::endl;
       return NNFW_STATUS_ERROR;
     }
-    auto opidx = primary_subgraph()->getInputs().at(index);
-    auto shape = primary_subgraph()->operands().at(opidx).shape();
-    if (isStatePreparedOrFinishedRun())
+
+    if (isStateModelLoaded())
+    {
+      auto info = _nnpkg->inputInfo(index);
+      fillTensorInfo(ti, info.shape(), info.typeInfo().type());
+    }
+    else
     {
-      shape = _execution ? _execution->getInputShape(onert::ir::IOIndex{index})
-                         : _executions.at(0)->getInputShape(onert::ir::IOIndex{index});
+      auto io_index = onert::ir::IOIndex{index};
+      auto shape = _execution->getInputShape(io_index);
+      auto dtype = _compiler_artifact->_executors->inputInfo(io_index).typeInfo().type();
+      fillTensorInfo(ti, shape, dtype);
     }
-    auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
-    fillTensorInfo(ti, shape, dtype);
   }
   catch (const std::exception &e)
   {
@@ -851,26 +766,27 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
     return NNFW_STATUS_UNEXPECTED_NULL;
   }
 
-  if (index >= primary_subgraph()->getOutputs().size())
-  {
-    std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
-              << std::endl;
-    return NNFW_STATUS_ERROR;
-  }
-
   try
   {
-    auto opidx = primary_subgraph()->getOutputs().at(index);
-    auto shape = primary_subgraph()->operands().at(opidx).shape();
-    // If it is called after `nnfw_run` then get the shape from Execution, not from the graph
-    if (isStateFinishedRun())
+    if (index >= getOutputSize())
+    {
+      std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
+                << std::endl;
+      return NNFW_STATUS_ERROR;
+    }
+
+    if (isStateModelLoaded())
     {
-      shape = _execution
-                ? _execution->getOutputShape(onert::ir::IOIndex{index})
-                : _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index});
+      auto info = _nnpkg->outputInfo(index);
+      fillTensorInfo(ti, info.shape(), info.typeInfo().type());
+    }
+    else
+    {
+      auto io_index = onert::ir::IOIndex{index};
+      auto shape = _execution->getOutputShape(io_index);
+      auto dtype = _compiler_artifact->_executors->outputInfo(io_index).typeInfo().type();
+      fillTensorInfo(ti, shape, dtype);
     }
-    auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
-    fillTensorInfo(ti, shape, dtype);
   }
   catch (const std::exception &e)
   {
@@ -881,86 +797,16 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
   return NNFW_STATUS_NO_ERROR;
 }
 
-void nnfw_session::make_dependency()
+NNFW_STATUS nnfw_session::push_pipeline_input(std::vector<void *> *, std::vector<uint32_t> *)
 {
-  for (uint32_t out_exe = 0; out_exe < _executions.size(); out_exe++)
-  {
-    auto &out_graph = _executions[out_exe]->primary_subgraph();
-    for (uint32_t in_exe = 0; in_exe < _executions.size(); in_exe++)
-    {
-      if (out_exe == in_exe)
-        continue;
-      auto &in_graph = _executions[in_exe]->primary_subgraph();
-      for (auto out = out_graph._name_to_output_begin(); out != out_graph._name_to_output_end();
-           out++)
-      {
-        auto out_opidx = out_graph.getOutputs().at(out->second);
-        auto out_shape = out_graph.operands().at(out_opidx).shape();
-        for (auto in = in_graph._name_to_input_begin(); in != in_graph._name_to_input_end(); in++)
-        {
-          if (out->first != in->first)
-            continue;
-
-          auto in_opidx = in_graph.getInputs().at(in->second);
-          auto in_shape = in_graph.operands().at(in_opidx).shape();
-          if (out_shape.rank() != in_shape.rank())
-            continue;
-
-          bool is_same = true;
-          for (int32_t i = 0; i < out_shape.rank(); i++)
-          {
-            if (out_shape.dim(i) != in_shape.dim(i))
-            {
-              is_same = false;
-              break;
-            }
-          }
-
-          if (is_same)
-            _executions[out_exe]->pushNextExe(_executions[in_exe], out->second, in->second);
-        }
-      }
-    }
-  }
-}
-
-NNFW_STATUS nnfw_session::push_pipeline_input(std::vector<void *> *inputs,
-                                              std::vector<uint32_t> *lengths)
-{
-  static uint32_t count = 0;
-  if (inputs->empty())
-  {
-    _executions[0]->setFinish();
-    for (uint32_t i = 0; i < _threads.size(); i++)
-    {
-      _threads[i].join();
-    }
-    return NNFW_STATUS_NO_ERROR;
-  }
-  _executions[0]->asyncIoDescSemWait();
-  _executions[0]->createNewAsyncDesc(count++);
-  for (uint32_t i = 0; i < inputs->size(); i++)
-  {
-    _executions[0]->executeAsyncInput(onert::ir::IOIndex(i), inputs->at(i), lengths->at(i));
-  }
-  _executions[0]->asyncIoDescSemPost();
-  return NNFW_STATUS_NO_ERROR;
+  std::cerr << "Pipeline push_pipeline_input: deprecated feature " << std::endl;
+  return NNFW_STATUS_ERROR;
 }
 
-NNFW_STATUS nnfw_session::pop_pipeline_output(std::vector<void *> *outputs)
+NNFW_STATUS nnfw_session::pop_pipeline_output(std::vector<void *> *)
 {
-  auto results = _executions[_executions.size() - 1]->getAsyncResults();
-  while (results->empty())
-  {
-    if (_executions[_executions.size() - 1]->stopWait())
-      return NNFW_STATUS_ERROR;
-  }
-
-  auto result = results->front();
-  results->pop_front();
-  for (uint32_t i = 0; i < result.size(); i++)
-    outputs->push_back(result[i]);
-  return NNFW_STATUS_NO_ERROR;
+  std::cerr << "Pipeline pop_pipeline_output: deprecated feature " << std::endl;
+  return NNFW_STATUS_ERROR;
 }
 
 NNFW_STATUS nnfw_session::register_custom_operation(const std::string &id,
@@ -1088,10 +934,6 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
   {
     options.he_profiling_mode = toBool(value);
   }
-  else if (skey == config::DISABLE_COMPILE)
-  {
-    options.disable_compile = toBool(value);
-  }
   else
   {
     return NNFW_STATUS_ERROR;
@@ -1103,23 +945,41 @@ const onert::ir::Graph *nnfw_session::primary_subgraph()
 {
   if (_nnpkg != nullptr)
   {
-    assert(_execution == nullptr && _executions.empty());
+    assert(_execution == nullptr);
     return _nnpkg->primary_model()->primary_subgraph().get();
   }
   else
   {
-    assert(_execution != nullptr || !_executions.empty());
-    // TODO Remove const_cast
+    assert(_execution != nullptr);
     // We assumed the graph will not change after compilation, but shape could change
-    if (!_executions.empty())
-    {
-      return &_executions[0]->primary_parentgraph();
-    }
-
     return &_execution->primary_subgraph();
   }
 }
 
+uint32_t nnfw_session::getInputSize()
+{
+  if (isStateInitialized())
+    throw std::runtime_error{"Model is not loaded yet"};
+
+  if (isStateModelLoaded())
+    return _nnpkg->inputSize();
+
+  // Session is prepared (general inference)
+  return _compiler_artifact->_executors->inputSize();
+}
+
+uint32_t nnfw_session::getOutputSize()
+{
+  if (isStateInitialized())
+    throw std::runtime_error{"Model is not loaded yet"};
+
+  if (isStateModelLoaded())
+    return _nnpkg->outputSize();
+
+  // Session is prepared (general inference)
+  return _compiler_artifact->_executors->outputSize();
+}
+
 NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_size)
 {
   if (!isStateModelLoaded())
@@ -1174,7 +1034,7 @@ bool nnfw_session::isStateInitialized()
   {
     assert(_nnpkg == nullptr);
     assert(_coptions.empty());
-    assert(_execution == nullptr && _executions.empty());
+    assert(_execution == nullptr);
     return true;
   }
   else
@@ -1189,7 +1049,7 @@ bool nnfw_session::isStateModelLoaded()
   {
     assert(_nnpkg != nullptr);
     assert(!_coptions.empty());
-    assert(_execution == nullptr && _executions.empty());
+    assert(_execution == nullptr);
     return true;
   }
   else
@@ -1204,7 +1064,7 @@ bool nnfw_session::isStatePrepared()
   {
     assert(_nnpkg == nullptr);
     assert(!_coptions.empty());
-    assert(_execution != nullptr || !_executions.empty());
+    assert(_execution != nullptr);
     return true;
   }
   else
@@ -1219,7 +1079,7 @@ bool nnfw_session::isStateRunning()
   {
     assert(_nnpkg == nullptr);
     assert(!_coptions.empty());
-    assert(_execution != nullptr || !_executions.empty());
+    assert(_execution != nullptr);
     return true;
   }
   return false;
@@ -1231,7 +1091,7 @@ bool nnfw_session::isStateFinishedRun()
   {
     assert(_nnpkg == nullptr);
     assert(!_coptions.empty());
-    assert(_execution != nullptr || !_executions.empty());
+    assert(_execution != nullptr);
     return true;
   }
   else
index 9b729fd..8e2c2fb 100644 (file)
@@ -136,9 +136,6 @@ public:
   NNFW_STATUS set_available_backends(const char *backends);
   NNFW_STATUS set_op_backend(const char *op, const char *backend);
 
-  // accessor
-  std::vector<std::shared_ptr<onert::exec::Execution>> *get_executions() { return &_executions; }
-
   //
   // Internal-only API
   //
@@ -151,7 +148,6 @@ public:
   //
   // Experimental API
   //
-  void make_dependency();
   NNFW_STATUS push_pipeline_input(std::vector<void *> *inputs, std::vector<uint32_t> *lengths);
   NNFW_STATUS pop_pipeline_output(std::vector<void *> *outputs);
 
@@ -166,6 +162,9 @@ public:
 
 private:
   const onert::ir::Graph *primary_subgraph();
+  uint32_t getInputSize();
+  uint32_t getOutputSize();
+
   bool isStateInitialized();
   bool isStateModelLoaded();
   bool isStatePrepared();
@@ -181,8 +180,6 @@ private:
   std::unique_ptr<onert::exec::Execution> _execution;
   std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
   std::vector<std::thread> _threads;
-  std::vector<std::shared_ptr<onert::exec::Execution>> _executions;
-  std::string _package_file_path;
 };
 
 #endif // __API_NNFW_API_INTERNAL_H__
index e709286..5b0ec92 100644 (file)
@@ -256,7 +256,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
 
   auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
   std::vector<const ::arm_compute::ICLTensor *> input_tensors;
-  for (auto &ifm_ind : input_indexes)
+  for (const auto &ifm_ind : input_indexes)
     input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
 
   std::unique_ptr<::arm_compute::IFunction> fn;
index e008fd6..b0b5ca6 100644 (file)
@@ -162,7 +162,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
     auto &offset = parent_info.coordinates;
     auto frontend_layout = parent_info.frontend_layout;
 
-    assert(obj.shape().rank() <= ir::Shape::MAX_RANK);
+    assert(obj.shape().rank() <= ir::Shape::kMaxRank);
     auto shape = obj.shape();
     if (_operands.at(parent_index).shape().rank() >= 4 && frontend_layout == ir::Layout::NHWC &&
         backend_layout == ir::Layout::NCHW)
@@ -218,11 +218,11 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
 {
   auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
 
-  for (auto &entry : lifetime_map)
+  for (const auto &entry : lifetime_map)
   {
-    auto &use = entry.second;
-    auto use_type = use.first;
-    auto use_index = use.second;
+    const auto &use = entry.second;
+    const auto &use_type = use.first;
+    const auto &use_index = use.second;
     assert(use_index.valid());
     if (use_type == UsesType::FIRST)
       _tensor_mgr->startLifetime(use_index);
@@ -255,9 +255,9 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
   assert(_tensor_mgr->nonconstTensors().size() == 0);
 
   // Normal tensors
-  for (auto &entry : _tensor_info_map)
+  for (const auto &entry : _tensor_info_map)
   {
-    auto ind = entry.first;
+    const auto &ind = entry.first;
     if (_parent_map.count(ind) > 0)
       continue;
 
@@ -273,9 +273,9 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
   assert(_tensor_mgr->nonconstSubtensors().size() == 0);
   // TODO Iterate `_parent_map` instead, once the optimizer bug is fixed
   //      `Optimizer` iterates the entire Operations, so there is a bug if iterating _parent_map
-  for (auto &entry : _tensor_info_map)
+  for (const auto &entry : _tensor_info_map)
   {
-    auto ind = entry.first;
+    const auto &ind = entry.first;
     if (_parent_map.count(ind) == 0)
       continue;
 
@@ -343,7 +343,7 @@ template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
   const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
 {
-  for (auto &cand : seq)
+  for (const auto &cand : seq)
   {
     if (!isSubTensorOf(parent, cand))
     {
index 7bb72d7..5536d27 100644 (file)
@@ -65,7 +65,7 @@ public:
       .operands()
       .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
 
-    for (auto &it : ret)
+    for (auto &&it : ret)
     {
       auto &fn_seq = it.second;
       fn_seq->iterate([&](exec::IFunction &ifunc) {
index e6f7b84..da48a78 100644 (file)
@@ -50,7 +50,7 @@ FunctionMap BackendContext::genKernels()
     .operands()
     .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
 
-  for (auto &it : ret)
+  for (auto &&it : ret)
   {
     auto &fn_seq = it.second;
     fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
index 762ee73..896883b 100644 (file)
@@ -279,7 +279,7 @@ void KernelGenerator::visit(const ir::operation::AddN &node)
   const auto output_index{node.getOutputs().at(0)};
 
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &input_idx : node.getInputs())
+  for (const auto &input_idx : node.getInputs())
     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
 
   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
@@ -386,7 +386,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
+  for (const auto &ifm_idx : node.getInputs())
     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
   auto fn = std::make_unique<ops::ConcatLayer>();
@@ -626,7 +626,7 @@ void KernelGenerator::visit(const ir::operation::Einsum &node)
 
   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
+  for (const auto &ifm_idx : node.getInputs())
     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
   const auto equation = node.param().equation;
@@ -643,7 +643,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
                           std::vector<custom::TypeInfo> &types,
                           std::vector<IPortableTensor *> &tensors) {
-    for (auto &idx : opSeq)
+    for (const auto &idx : opSeq)
     {
       const auto &operand = _ctx.at(idx);
       // TODO make sure using `_current_layout` is correct for custom operations
@@ -750,7 +750,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
+  for (const auto &ifm_idx : node.getInputs())
     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
   auto fn = std::make_unique<ops::PackLayer>();
@@ -772,7 +772,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 
   std::vector<IPortableTensor *> output_tensors;
-  for (auto &output_idx : node.getOutputs())
+  for (const auto &output_idx : node.getOutputs())
     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 
   auto fn = std::make_unique<ops::UnpackLayer>();
@@ -934,7 +934,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
 
   std::vector<IPortableTensor *> out_tensors;
-  for (auto &output_idx : node.getOutputs())
+  for (const auto &output_idx : node.getOutputs())
     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 
   auto fn = std::make_unique<ops::SplitLayer>();
@@ -1261,7 +1261,7 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
 
   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
+  for (const auto &ifm_idx : node.getInputs())
     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
   const auto epsilon = node.param().epsilon;
@@ -1372,7 +1372,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
 
   std::vector<IPortableTensor *> out_tensors;
-  for (auto &output_idx : node.getOutputs())
+  for (const auto &output_idx : node.getOutputs())
     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 
   auto fn = std::make_unique<ops::SplitVLayer>();
index d67ba16..cdf9655 100644 (file)
@@ -28,6 +28,7 @@
 #include "TensorBuilder.h"
 
 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 
 namespace onert
@@ -55,15 +56,16 @@ public:
     {
       return nullptr;
     }
-    auto tm = createTensorManager(&environment->context());
 
-    auto tr = std::make_shared<TensorRegistry>(tm);
-
-    tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info;
-    create_info.precision = tflite::gpu::cl::CalculationsPrecision::F32;
+    tflite::gpu::CreateGpuModelInfo create_info;
+    create_info.precision = tflite::gpu::CalculationsPrecision::F32;
     create_info.storage_type =
       tflite::gpu::cl::GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
-    create_info.hints.Add(tflite::gpu::cl::ModelHints::kFastestInference);
+    create_info.hints.Add(tflite::gpu::ModelHints::kFastestInference);
+
+    auto tm = createTensorManager(&environment->context(), create_info, environment);
+
+    auto tr = std::make_shared<TensorRegistry>(tm);
 
     auto cc = std::make_shared<tflite::gpu::cl::CreationContext>();
     cc->device = environment->GetDevicePtr();
@@ -71,7 +73,7 @@ public:
     cc->queue = environment->queue();
     cc->cache = environment->program_cache();
 
-    auto tb = std::make_shared<TensorBuilder>(operands, tm, create_info, environment);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
 
index ec94421..b09319d 100644 (file)
@@ -86,6 +86,32 @@ ITensorRegistry *BackendContext::genTensors()
   return tensor_registry.get();
 }
 
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap fn_map;
+
+  for (auto op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    fn_map.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  kernel_gen->get_operation(fn_map);
+  tensor_builder->allocate();
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &&it : fn_map)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return fn_map;
+}
+
 } // namespace gpu_cl
 } // namespace backend
 } // namespace onert
index 7412d2b..da5daae 100644 (file)
@@ -25,6 +25,7 @@
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
 #include "TensorBuilder.h"
+
 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
 
 namespace onert
@@ -52,6 +53,7 @@ public:
   }
 
   ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
 
 protected:
   void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
index eb19642..d62dbd8 100644 (file)
@@ -24,7 +24,26 @@ if(NOT Fp16_FOUND)
   return()
 endif(NOT Fp16_FOUND)
 
-nnas_find_package(TensorFlowGpu QUIET)
+nnas_find_package(VulkanSource QUIET)
+if(NOT VulkanSource_FOUND)
+  return()
+endif(NOT VulkanSource_FOUND)
+
+nnas_find_package(Opengl_HeadersSource QUIET)
+if(NOT Opengl_HeadersSource_FOUND)
+  return()
+endif(NOT Opengl_HeadersSource_FOUND)
+
+nnas_find_package(Egl_HeadersSource QUIET)
+if(NOT Egl_HeadersSource_FOUND)
+  return()
+endif(NOT Egl_HeadersSource_FOUND)
+
+if (NOT ${TARGET_OS} MATCHES "tizen")
+  nnas_find_package(FlatBuffers REQUIRED)
+endif ()
+
+nnfw_find_package(TensorFlowGpu QUIET)
 if(NOT TensorFlowGpu_FOUND)
   message(FATAL_ERROR 'TensorFlowGpu lib not found')
   return()
@@ -35,18 +54,32 @@ file(GLOB_RECURSE SOURCES "*.cc")
 add_library(${LIB_ONERT_BACKEND_GPU_CL} SHARED ${SOURCES})
 
 target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TENSORFLOWGPU_SOURCE_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TensorFlowSource_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${VulkanSource_DIR}/include)
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${Opengl_HeadersSource_DIR}/api)
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${Egl_HeadersSource_DIR}/api)
+
+if (${TARGET_OS} MATCHES "tizen")
+    target_compile_options(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE "-Wno-error=deprecated-copy")
+endif ()
+
+target_compile_options(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE "-DCL_TARGET_OPENCL_VERSION=220" "-DEGL_NO_X11")
 
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE abseil)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE dl)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE farmhash)
-target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} INTERFACE Open_CL_Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE OpenCL_Headers)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE fp16)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE TensorFlowGpu)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE onert_core)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${LIB_ONERT_BACKEND_CL_COMMON})
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_common)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_coverage)
+if (${TARGET_OS} MATCHES "tizen")
+  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE flatbuffers)
+else()
+  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE flatbuffers::flatbuffers)
+endif ()
 
 set_target_properties(${LIB_ONERT_BACKEND_GPU_CL} PROPERTIES OUTPUT_NAME backend_gpu_cl)
 
@@ -55,4 +88,8 @@ if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
                      COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_GPU_CL}>)
 endif()
 
+add_library(tflite_ignore_warnings INTERFACE)
+target_compile_options(tflite_ignore_warnings INTERFACE -Wno-unused-parameter -Wno-sign-compare)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE tflite_ignore_warnings)
+
 install(TARGETS ${LIB_ONERT_BACKEND_GPU_CL} DESTINATION lib)
index 5e8a11a..6afbd49 100644 (file)
@@ -22,9 +22,9 @@
 #include <vector>
 #include <memory>
 
-#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
-#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_operation.h"
 
 namespace onert
 {
@@ -35,53 +35,51 @@ namespace gpu_cl
 class ClFunction : public ::onert::exec::IFunction
 {
 public:
-  ClFunction() : _gpu_operations(), _creation_context() {}
-
-public:
-  void configure(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
+  ClFunction(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
+    : _creation_context(creation_context), _gpu_operations()
   {
-    _creation_context = creation_context;
   }
 
-  void add_operation(std::unique_ptr<tflite::gpu::cl::GPUOperation> gpu_operation)
+public:
+  void add_operation(tflite::gpu::cl::ClOperation *gpu_operation)
   {
-    _gpu_operations.push_back(std::move(gpu_operation));
+    _gpu_operations.push_back(gpu_operation);
   }
 
   void run() override
   {
-    for (const auto &gpu_operation : _gpu_operations)
+    for (const auto gpu_operation : _gpu_operations)
     {
       if (!gpu_operation->AddToQueue(_creation_context->queue).ok())
       {
         throw std::runtime_error("Failed to AddToQueue.");
       }
-      if (!_creation_context->queue->WaitForCompletion().ok())
-      {
-        throw std::runtime_error("Failed to WaitForCompletion.");
-      }
     }
   }
 
   void prepare() override
   {
-    for (const auto &gpu_operation : _gpu_operations)
+    for (const auto gpu_operation : _gpu_operations)
     {
+      if (!gpu_operation->GetGpuOperation().AssembleCode(_creation_context->GetGpuInfo()).ok())
+      {
+        throw std::runtime_error("Failed to AssembleCode.");
+      }
       if (!gpu_operation->Compile(*_creation_context).ok())
       {
         throw std::runtime_error("Failed to Compile.");
       }
-
       if (!gpu_operation->UpdateParams().ok())
       {
         throw std::runtime_error("Failed to UpdateParams.");
       }
+      gpu_operation->GetGpuOperation().args_.ReleaseCPURepresentation();
     }
   }
 
 private:
-  std::vector<std::unique_ptr<tflite::gpu::cl::GPUOperation>> _gpu_operations;
   std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
+  std::vector<tflite::gpu::cl::ClOperation *> _gpu_operations;
 };
 
 } // namespace gpu_cl
index 6a455bb..f8f94aa 100644 (file)
@@ -41,9 +41,6 @@ public:
   bool supportDynamicTensor() override { return false; }
   bool supportFP16() override { return true; }
   std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
-
-private:
-  void *_handle = nullptr;
 };
 
 } // namespace gpu_cl
index 04edc39..a24c4f5 100644 (file)
 #include "TensorManager.h"
 
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h"
+#include "tensorflow/lite/delegates/gpu/common/tensor.h"
+#include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/dw_convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/simple_selectors.h"
 
 #include "ir/Operations.h"
 #include "ir/Operations.Include.h"
@@ -38,9 +39,6 @@
 #include "util/logging.h"
 #include "util/Utils.h"
 
-using namespace tflite::gpu;
-using namespace tflite::gpu::cl;
-
 namespace onert
 {
 namespace backend
@@ -48,39 +46,170 @@ namespace backend
 namespace gpu_cl
 {
 
-HW ToHW(int32_t h, int32_t w) { return HW(h > 0 ? h : 1, w > 0 ? w : 1); }
+void KernelGenerator::addClNode(const std::vector<ir::OperandIndex> &inputs,
+                                const std::vector<ir::OperandIndex> &outputs,
+                                std::unique_ptr<tflite::gpu::GPUOperation> gpu_op)
+{
+  tflite::gpu::cl::CLNode cl_node;
+  cl_node.cl_operation.Init(std::move(gpu_op));
+  cl_node.inputs.resize(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i)
+  {
+    cl_node.inputs[i] = inputs[i].value();
+  }
+  cl_node.outputs.resize(outputs.size());
+  for (size_t i = 0; i < outputs.size(); ++i)
+  {
+    cl_node.outputs[i] = outputs[i].value();
+  }
+  _nodes.push_back(std::move(cl_node));
+  _operation_indexes.push_back(_operation_index);
+  return;
+}
+
+void KernelGenerator::get_operation(FunctionMap &Functions)
+{
+  size_t size = _nodes.size();
+  size_t i = 0;
+  for (auto &&it : Functions)
+  {
+    auto index = it.first;
+    auto node_index = _operation_indexes[i];
+    while (index == node_index)
+    {
+      auto &fn_seq = it.second;
+      auto &node = _nodes[i++];
+      for (size_t j = 0; j < node.inputs.size(); ++j)
+      {
+        uint32_t idx = node.inputs[j];
+        node.cl_operation.GetGpuOperation().SetSrc(
+          _tensor_reg->getClTensor(ir::OperandIndex{idx})->handle(), j);
+      }
+      for (size_t j = 0; j < node.outputs.size(); ++j)
+      {
+        uint32_t idx = node.outputs[j];
+        node.cl_operation.GetGpuOperation().SetDst(
+          _tensor_reg->getClTensor(ir::OperandIndex{idx})->handle(), j);
+      }
+      fn_seq->iterate([&](exec::IFunction &ifunc) {
+        static_cast<ClFunction &>(ifunc).add_operation(&node.cl_operation);
+      });
+      if (i == size)
+      {
+        break;
+      }
+      node_index = _operation_indexes[i];
+    }
+    if (i == size)
+    {
+      break;
+    }
+  }
+}
 
-template <typename AttrT>
-void UpdatePadding(const ir::PaddingType type, const BHWC &input_shape, AttrT *attr)
+absl::Status KernelGenerator::readConstTensor(const ir::OperandIndex &index,
+                                              tflite::gpu::TensorOrScalar *param)
 {
-  if (type == ir::PaddingType::SAME)
+  const auto shape = _ctx.at(index).shape();
+  if (shape.rank() == 0 && shape.num_elements() == 1)
   {
-    attr->padding = CalculateSamePadding(input_shape, *attr);
+    tflite::gpu::Tensor<tflite::gpu::Scalar, tflite::gpu::DataType::FLOAT32> tensor;
+    tensor.shape.v = 1;
+    tensor.data.resize(1);
+    std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+    *param = tensor.data[0];
   }
   else
   {
-    attr->padding.prepended = HW(0, 0);
-    attr->padding.appended = HW(0, 0);
+    if (CheckIfLinearConvertible(&shape))
+    {
+      tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32> tensor;
+      tensor.shape.v = shape.dim(shape.rank() - 1);
+      tensor.data.resize(shape.num_elements());
+      std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+      *param = std::move(tensor);
+    }
+    else
+    {
+      tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32> tensor;
+      if (shape.rank() == 3)
+      {
+        tensor.shape.h = shape.dim(0);
+        tensor.shape.w = shape.dim(1);
+        tensor.shape.c = shape.dim(2);
+      }
+      else if (shape.rank() == 4)
+      {
+        if (shape.dim(0) != 1)
+        {
+          return absl::UnimplementedError("Batch size is not equal to 1.");
+        }
+        tensor.shape.h = shape.dim(1);
+        tensor.shape.w = shape.dim(2);
+        tensor.shape.c = shape.dim(3);
+      }
+      else
+      {
+        return absl::InvalidArgumentError(
+          "Expected a 3D tensor of shape HxWxC or a 4D tensor of shape 1xHxWxC.");
+      }
+      tensor.data.resize(shape.num_elements());
+      std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+      *param = std::move(tensor);
+    }
   }
+  return absl::OkStatus();
 }
 
-PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+absl::Status KernelGenerator::readConstTensor(
+  const ir::OperandIndex &index,
+  absl::variant<tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32>,
+                tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32>> *alpha)
 {
-  switch (type_ir)
+  const auto shape = _ctx.at(index).shape();
+  if (CheckIfLinearConvertible(&shape))
   {
-    case ir::operation::Pool2D::PoolType::AVG:
-      return PoolingType::AVERAGE;
-    case ir::operation::Pool2D::PoolType::MAX:
-      return PoolingType::MAX;
-    default:
-      throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
+    tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32> tensor;
+    tensor.shape.v = shape.dim(shape.rank() - 1);
+    tensor.data.resize(shape.num_elements());
+    std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+    *alpha = std::move(tensor);
   }
+  else
+  {
+    tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32> tensor;
+    if (shape.rank() == 3)
+    {
+      tensor.shape.h = shape.dim(0);
+      tensor.shape.w = shape.dim(1);
+      tensor.shape.c = shape.dim(2);
+    }
+    else if (shape.rank() == 4)
+    {
+      if (shape.dim(0) != 1)
+      {
+        return absl::UnimplementedError("Batch size is not equal to 1.");
+      }
+      tensor.shape.h = shape.dim(1);
+      tensor.shape.w = shape.dim(2);
+      tensor.shape.c = shape.dim(3);
+    }
+    else
+    {
+      return absl::InvalidArgumentError(
+        "Expected a 3D tensor of shape HxWxC or a 4D tensor of shape 1xHxWxC.");
+    }
+    tensor.data.resize(shape.num_elements());
+    std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+    *alpha = std::move(tensor);
+  }
+  return absl::OkStatus();
 }
 
-KernelGenerator::KernelGenerator(const ir::Graph &graph,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder,
-                                 const std::shared_ptr<TensorRegistry> &tensor_reg,
-                                 const std::shared_ptr<CreationContext> &creation_context)
+KernelGenerator::KernelGenerator(
+  const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+  const std::shared_ptr<TensorRegistry> &tensor_reg,
+  const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context)
   : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
     _operations_ctx(graph.operations()), _current_layout{graph.layout()},
     _tensor_builder(tensor_builder), _tensor_reg(tensor_reg), _creation_context(creation_context)
@@ -89,13 +218,13 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph,
 
 std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
 {
-  auto ret = std::make_unique<exec::FunctionSequence>();
-  ret->enableDynamicShapeInferer(false);
-
+  auto fn_seq = std::make_unique<exec::FunctionSequence>();
+  fn_seq->enableDynamicShapeInferer(false);
+  _operation_index = ind;
   const auto &op = _graph.operations().at(ind);
   op.accept(*this);
-  ret->append(releaseFunction());
-  return ret;
+  fn_seq->append(releaseFunction());
+  return fn_seq;
 }
 
 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
@@ -104,63 +233,66 @@ void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 
-  // const auto activation = node.param().activation;
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+  const bool lhs_const = _ctx.at(lhs_index).isConstant();
+  const bool rhs_const = _ctx.at(rhs_index).isConstant();
+
+  if (lhs_const && rhs_const)
+  {
+    throw std::runtime_error("No runtime input tensors for " + node.name());
+  }
+
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
 
-  OperationDef op_def;
-  op_def.precision = CalculationsPrecision::F32;
+  tflite::gpu::OperationType op_type = convertArithmeticType(node.param().arithmetic_type);
 
-  op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(lhs_index)->descriptor);
-  auto lhs_shape = _tensor_reg->getClTensorReserver(lhs_index)->shape;
+  if (!lhs_const && !rhs_const)
+  {
+    auto lhs_shape = _tensor_reg->getClTensor(lhs_index)->get_info()._shape;
+    auto rhs_shape = _tensor_reg->getClTensor(rhs_index)->get_info()._shape;
+
+    bool swap =
+      (op_type == tflite::gpu::OperationType::MUL) &&
+      (lhs_shape.h <= rhs_shape.h && lhs_shape.w <= rhs_shape.w && lhs_shape.c <= rhs_shape.c);
 
-  op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(rhs_index)->descriptor);
-  auto rhs_shape = _tensor_reg->getClTensorReserver(rhs_index)->shape;
+    auto first_index = swap ? rhs_index : lhs_index;
+    auto second_index = swap ? lhs_index : rhs_index;
 
-  op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
-  auto out_shape = _tensor_reg->getClTensorReserver(ofm_index)->shape;
+    op_def.src_tensors.push_back(_tensor_reg->getClTensor(first_index)->get_info()._desc);
+    op_def.src_tensors.push_back(_tensor_reg->getClTensor(second_index)->get_info()._desc);
+    op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
 
-  auto fn = std::make_unique<ClFunction>();
+    auto second_shape = _tensor_reg->getClTensor(second_index)->get_info()._shape;
 
-  std::unique_ptr<GPUOperation> gpu_op;
-  switch (node.param().arithmetic_type)
+    tflite::gpu::GPUOperation operation = CreateElementwiseTwoInput(op_def, op_type, second_shape);
+    gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+
+    addClNode({first_index, second_index}, {ofm_index}, std::move(gpu_op));
+  }
+  else
   {
-    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
-    {
-      std::vector<int> channels(2);
-      channels[0] = lhs_shape.c;
-      channels[1] = rhs_shape.c;
-      SelectAdd(op_def, channels, out_shape.c, &gpu_op);
-
-      auto ofm_tensor = _tensor_reg->getClTensor(ofm_index);
-      auto lhs_tensor = _tensor_reg->getClTensor(lhs_index);
-      auto rhs_tensor = _tensor_reg->getClTensor(rhs_index);
-      gpu_op->SetSrc(lhs_tensor->handle(), ir::operation::BinaryArithmetic::Input::LHS);
-      gpu_op->SetSrc(rhs_tensor->handle(), ir::operation::BinaryArithmetic::Input::RHS);
-      gpu_op->SetDst(ofm_tensor->handle(), 0);
-
-      fn->configure(_creation_context);
-      fn->add_operation(std::move(gpu_op));
-      break;
-    }
-    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
-    {
-      // NYI
-      break;
-    }
-    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
-    {
-      // NYI
-      break;
-    }
-    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    auto non_const_index = rhs_const ? lhs_index : rhs_index;
+    auto const_index = rhs_const ? rhs_index : lhs_index;
+
+    op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+    op_def.src_tensors.push_back(_tensor_reg->getClTensor(non_const_index)->get_info()._desc);
+
+    tflite::gpu::ElementwiseAttributes attr;
+
+    if (!readConstTensor(const_index, &attr.param).ok())
     {
-      // NYI
-      break;
+      throw std::runtime_error("BinaryArithmetic unsupported constant tensor");
     }
-    default:
-      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
-      break;
-  }
 
+    tflite::gpu::GPUOperation operation =
+      CreateElementwise(_creation_context->GetGpuInfo(), op_def, op_type, attr);
+    gpu_op = absl::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+
+    addClNode({non_const_index}, {ofm_index}, std::move(gpu_op));
+  }
   _return_fn = std::move(fn);
 }
 
@@ -174,30 +306,30 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
 
   const auto param = node.param();
 
-  OperationDef op_def;
-  op_def.precision = CalculationsPrecision::F32;
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
 
-  op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input)->descriptor);
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input)->get_info()._desc);
 
-  auto input_shape = _tensor_reg->getClTensorReserver(input)->shape;
-  auto kernel_shape = _tensor_reg->getClTensorReserver(kernel)->shape;
-  auto output_shape = _tensor_reg->getClTensorReserver(output)->shape;
-  auto bias_shape = _tensor_reg->getClTensorReserver(bias)->shape;
+  auto input_shape = _tensor_reg->getClTensor(input)->get_info()._shape;
+  auto kernel_shape = _tensor_reg->getClTensor(kernel)->get_info()._shape;
+  auto output_shape = _tensor_reg->getClTensor(output)->get_info()._shape;
+  auto bias_shape = _tensor_reg->getClTensor(bias)->get_info()._shape;
 
-  op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
 
-  ModelHints hints;
-  std::unique_ptr<GPUOperation> gpu_op; // = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
+  tflite::gpu::ModelHints hints;
+  std::unique_ptr<tflite::gpu::GPUOperation>
+    gpu_op; // = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
 
-  auto input_tensor = _tensor_reg->getClTensor(input);
   auto kernel_tensor = _tensor_reg->getClTensor(kernel);
   auto bias_tensor = _tensor_reg->getClTensor(bias);
-  auto output_tensor = _tensor_reg->getClTensor(output);
 
-  Convolution2DAttributes attr;
+  tflite::gpu::Convolution2DAttributes attr;
   attr.strides = ToHW(param.stride.vertical, param.stride.horizontal);
-  attr.dilations = HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
-                      std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
+  attr.dilations =
+    tflite::gpu::HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
+                    std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
 
   bool is_weight = (_ctx.at(kernel).isConstant() ? true : false);
 
@@ -220,12 +352,14 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
 
   UpdatePadding(param.padding.type, input_shape, &attr);
 
-  gpu_op = SelectConvolution(attr, output_shape, _creation_context->GetDeviceInfo(), op_def, hints);
-  gpu_op->SetSrc(input_tensor->handle(), ir::operation::Conv2D::INPUT);
+  gpu_op = SelectConvolution(attr, output_shape, _creation_context->GetGpuInfo(), op_def, hints);
 
-  auto fn = std::make_unique<ClFunction>();
+  tflite::gpu::cl::CLNode cl_node;
+  cl_node.inputs.resize(1);
+  cl_node.inputs[0] = input.value();
+  cl_node.outputs.resize(1);
 
-  fn->configure(_creation_context);
+  auto fn = std::make_unique<ClFunction>(_creation_context);
 
   const auto activation = node.param().activation;
 
@@ -233,47 +367,43 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   {
     case ir::Activation::NONE:
     {
-      gpu_op->SetDst(output_tensor->handle(), 0);
-      fn->add_operation(std::move(gpu_op));
+      addClNode({input}, {output}, std::move(gpu_op));
       break;
     }
+    case ir::Activation::RELU:
     case ir::Activation::RELU6:
     {
-      std::unique_ptr<GPUOperation> gpu_op_1;
-      OperationDef op_def_1;
-      std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
-
-      _new_tensors[output] = new_tensor;
-      if (!CreateTensor(*_creation_context->context, output_shape,
-                        _tensor_reg->getClTensorReserver(output)->descriptor, new_tensor.get())
-             .ok())
-      {
-        throw std::runtime_error("Error CreateTensor.");
-      }
+      std::unique_ptr<tflite::gpu::GPUOperation> gpu_op_1;
+      tflite::gpu::OperationDef op_def_1;
+      const auto shape = _ctx.at(output).shape();
+      auto new_ind = _tensor_reg->addNewClTensor(shape);
+
+      addClNode({input}, {new_ind}, std::move(gpu_op));
 
-      gpu_op->SetDst(new_tensor.get(), 0);
-      fn->add_operation(std::move(gpu_op));
-      op_def_1.precision = CalculationsPrecision::F32;
-      op_def_1.src_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
-      op_def_1.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
+      op_def_1.precision = tflite::gpu::CalculationsPrecision::F32;
+      op_def_1.src_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
+      op_def_1.dst_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
 
-      //   - ReLU6: clip = 6, alpha = 0
-      ReLUAttributes attr_1;
-      attr_1.clip = 6;
+      tflite::gpu::ReLUAttributes attr_1;
+      if (activation == ir::Activation::RELU6)
+      {
+        attr_1.clip = 6;
+      }
+      else
+      {
+        attr_1.clip = 0;
+      }
       attr_1.alpha = 0;
       gpu_op_1 = SelectReLU(attr_1, op_def_1);
 
-      gpu_op_1->SetSrc(new_tensor.get(), 0);
-      gpu_op_1->SetDst(output_tensor->handle(), 0);
-      fn->add_operation(std::move(gpu_op_1));
+      addClNode({new_ind}, {output}, std::move(gpu_op_1));
       break;
     }
     default:
     {
-      throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+      throw std::runtime_error("gpu_cl KernelGenerator : Not supported Conv2D activiation");
     }
   }
-
   _return_fn = std::move(fn);
 }
 
@@ -292,28 +422,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 
   const auto multiplier = node.param().multiplier;
 
-  auto ofm_tensor = _tensor_reg->getClTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getClTensor(ifm_index);
-  auto ker_tensor = _tensor_reg->getClTensor(ker_index);
-  auto bias_tensor = _tensor_reg->getClTensor(bias_index);
-
   bool is_weight = (_ctx.at(ker_index).isConstant() ? true : false);
-  OperationDef op_def;
-  op_def.precision = CalculationsPrecision::F32;
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
 
-  op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(ifm_index)->descriptor);
-  auto input_shape = _tensor_reg->getClTensorReserver(ifm_index)->shape;
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(ifm_index)->get_info()._desc);
+  auto input_shape = _tensor_reg->getClTensor(ifm_index)->get_info()._shape;
 
-  auto ker_shape = _tensor_reg->getClTensorReserver(ker_index)->shape;
+  auto ker_shape = _tensor_reg->getClTensor(ker_index)->get_info()._shape;
 
-  op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
-  auto out_shape = _tensor_reg->getClTensorReserver(ofm_index)->shape;
-  auto bias_shape = _tensor_reg->getClTensorReserver(bias_index)->shape;
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+  auto out_shape = _tensor_reg->getClTensor(ofm_index)->get_info()._shape;
+  auto bias_shape = _tensor_reg->getClTensor(bias_index)->get_info()._shape;
 
-  DepthwiseConvolution2DAttributes attr;
+  tflite::gpu::DepthwiseConvolution2DAttributes attr;
   attr.strides = ToHW(stride.vertical, stride.horizontal);
-  attr.dilations = HW(std::max(static_cast<u_int32_t>(1), dilation.height_factor),
-                      std::max(static_cast<u_int32_t>(1), dilation.width_factor));
+  attr.dilations = tflite::gpu::HW(std::max(static_cast<u_int32_t>(1), dilation.height_factor),
+                                   std::max(static_cast<u_int32_t>(1), dilation.width_factor));
 
   if (is_weight)
   {
@@ -323,12 +448,14 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
     attr.weights.shape.w = ker_shape.w;
     attr.weights.shape.i = ker_shape.c;
     attr.weights.data.resize(ker_shape.DimensionsProduct());
-    memcpy(attr.weights.data.data(), _ctx.at(ker_index).data()->base(), ker_tensor->total_size());
+    memcpy(attr.weights.data.data(), _ctx.at(ker_index).data()->base(),
+           _ctx.at(ker_index).operandSize());
   }
   attr.bias.id = bias_index.value();
   attr.bias.shape.v = bias_shape.b != 1 ? bias_shape.b : bias_shape.c;
   attr.bias.data.resize(bias_shape.DimensionsProduct());
-  memcpy(attr.bias.data.data(), _ctx.at(bias_index).data()->base(), bias_tensor->total_size());
+  memcpy(attr.bias.data.data(), _ctx.at(bias_index).data()->base(),
+         _ctx.at(bias_index).operandSize());
   UpdatePadding(padding.type, input_shape, &attr);
 
   if (multiplier != 1)
@@ -338,7 +465,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
     const int filter_width = ker_shape.w;
     const int output_depth = out_shape.c;
 
-    tflite::gpu::Tensor<OHWI, DataType::FLOAT32> weights;
+    tflite::gpu::Tensor<tflite::gpu::OHWI, tflite::gpu::DataType::FLOAT32> weights;
     weights.id = attr.weights.id;
     weights.shape = tflite::gpu::OHWI(output_depth, filter_height, filter_width, input_depth);
     weights.data.resize(weights.shape.DimensionsProduct());
@@ -356,12 +483,12 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
     attr.weights = std::move(weights);
   }
 
-  auto fn = std::make_unique<ClFunction>();
-  std::unique_ptr<GPUOperation> gpu_op;
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
 
   if (is_weight)
   {
-    gpu_op = SelectDWConvolution(attr, _creation_context->GetDeviceInfo(), op_def);
+    gpu_op = SelectDWConvolution(attr, _creation_context->GetGpuInfo(), op_def);
   }
   else
   {
@@ -370,57 +497,51 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
       throw std::runtime_error(
         "No support of depthwise runtime weights with channel multiplier != 1");
     }
-    gpu_op = SelectDWConvolutionDynamicWeights(attr, _creation_context->GetDeviceInfo(), op_def);
+    gpu_op = SelectDWConvolutionDynamicWeights(attr, _creation_context->GetGpuInfo(), op_def);
   }
 
-  gpu_op->SetSrc(ifm_tensor->handle(), ir::operation::DepthwiseConv2D::Input::INPUT);
-
-  fn->configure(_creation_context);
-
   const auto activation = node.param().activation;
 
   switch (activation)
   {
     case ir::Activation::NONE:
     {
-      gpu_op->SetDst(ofm_tensor->handle(), 0);
-      fn->add_operation(std::move(gpu_op));
+      addClNode({ifm_index}, {ofm_index}, std::move(gpu_op));
       break;
     }
+    case ir::Activation::RELU:
     case ir::Activation::RELU6:
     {
-      std::unique_ptr<GPUOperation> gpu_op_1;
-      OperationDef op_def_1;
-      std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
-
-      _new_tensors[ofm_index] = new_tensor;
-      if (!CreateTensor(*_creation_context->context, out_shape,
-                        _tensor_reg->getClTensorReserver(ofm_index)->descriptor, new_tensor.get())
-             .ok())
-      {
-        throw std::runtime_error("Error CreateTensor.");
-      }
+      std::unique_ptr<tflite::gpu::GPUOperation> gpu_op_1;
+      tflite::gpu::OperationDef op_def_1;
+      const auto shape = _ctx.at(ofm_index).shape();
+      auto new_ind = _tensor_reg->addNewClTensor(shape);
+
+      addClNode({ifm_index}, {new_ind}, std::move(gpu_op));
 
-      gpu_op->SetDst(new_tensor.get(), 0);
-      fn->add_operation(std::move(gpu_op));
-      op_def_1.precision = CalculationsPrecision::F32;
-      op_def_1.src_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
-      op_def_1.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
+      op_def_1.precision = tflite::gpu::CalculationsPrecision::F32;
 
-      //   - ReLU6: clip = 6, alpha = 0
-      ReLUAttributes attr_1;
-      attr_1.clip = 6;
+      op_def_1.src_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+      op_def_1.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+
+      tflite::gpu::ReLUAttributes attr_1;
+      if (activation == ir::Activation::RELU6)
+      {
+        attr_1.clip = 6;
+      }
+      else
+      {
+        attr_1.clip = 0;
+      }
       attr_1.alpha = 0;
       gpu_op_1 = SelectReLU(attr_1, op_def_1);
 
-      gpu_op_1->SetSrc(new_tensor.get(), 0);
-      gpu_op_1->SetDst(ofm_tensor->handle(), 0);
-      fn->add_operation(std::move(gpu_op_1));
+      addClNode({new_ind}, {ofm_index}, std::move(gpu_op_1));
       break;
     }
     default:
     {
-      throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+      throw std::runtime_error("gpu_cl KernelGenerator : Not supported DepthwiseConv2D acvivation");
     }
   }
 
@@ -429,26 +550,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 
 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 {
-  std::unique_ptr<GPUOperation> gpu_op;
-  auto fn = std::make_unique<ClFunction>();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
 
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+  auto fn = std::make_unique<ClFunction>(_creation_context);
   switch (node.param().op_type)
   {
     case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
     case ir::operation::ElementwiseActivation::Type::RELU:
     {
-      const auto output_index{node.getOutputs().at(0)};
-      const auto input_index{
-        node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
-
-      OperationDef op_def;
-      op_def.precision = CalculationsPrecision::F32;
-      auto output_tensor = _tensor_reg->getClTensor(output_index);
-      auto input_tensor = _tensor_reg->getClTensor(input_index);
-      op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
-      op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
-
-      ReLUAttributes attr;
+      tflite::gpu::ReLUAttributes attr;
       if (ir::operation::ElementwiseActivation::Type::LEAKY_RELU == node.param().op_type)
       {
         attr.alpha = node.param().alpha;
@@ -460,17 +578,33 @@ void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
         attr.clip = node.param().alpha;
       }
       gpu_op = SelectReLU(attr, op_def);
-      gpu_op->SetSrc(input_tensor->handle(), ir::operation::ElementwiseActivation::Input::INPUT);
-      gpu_op->SetDst(output_tensor->handle(), 0);
-      fn->configure(_creation_context);
-      fn->add_operation(std::move(gpu_op));
-
-      _return_fn = std::move(fn);
+      break;
+    }
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+    {
+      if (_ctx.at(input_index).typeInfo().type() != ir::DataType::FLOAT32)
+      {
+        throw std::runtime_error{"Unsupported data type of LOGISTIC"};
+      }
+      tflite::gpu::GPUOperation operation =
+        CreateElementwiseOneInput(_creation_context->GetGpuInfo(), op_def,
+                                  convertElementwiseActivationType(node.param().op_type));
+      gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+      break;
+    }
+    case ir::operation::ElementwiseActivation::Type::TANH:
+    {
+      tflite::gpu::GPUOperation operation = CreateElementwiseOneInput(
+        _creation_context->GetGpuInfo(), op_def, tflite::gpu::OperationType::TANH);
+      gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
       break;
     }
     default:
-      throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+      throw std::runtime_error(
+        "gpu_cl KernelGenerator : Not supported operation on ElementwiseActivation");
   }
+  addClNode({input_index}, {output_index}, std::move(gpu_op));
+  _return_fn = std::move(fn);
 }
 
 void KernelGenerator::visit(const ir::operation::Pool2D &node)
@@ -478,24 +612,24 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
 
-  OperationDef op_def;
-  op_def.precision = CalculationsPrecision::F32;
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
 
-  op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
-  auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+  auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
 
-  op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
 
   const auto kh = node.param().kh;
   const auto kw = node.param().kw;
   const auto stride = node.param().stride;
   const auto op_type = convertPoolType(node.param().op_type);
 
-  Pooling2DAttributes attributes;
+  tflite::gpu::Pooling2DAttributes attributes;
   attributes.type = op_type;
-  attributes.kernel = HW(kh > 0 ? kh : 1, kw > 0 ? kw : 1);
-  attributes.strides =
-    HW(stride.vertical > 0 ? stride.vertical : 1, stride.horizontal > 0 ? stride.horizontal : 1);
+  attributes.kernel = tflite::gpu::HW(kh > 0 ? kh : 1, kw > 0 ? kw : 1);
+  attributes.strides = tflite::gpu::HW(stride.vertical > 0 ? stride.vertical : 1,
+                                       stride.horizontal > 0 ? stride.horizontal : 1);
 
   if (node.param().padding.type == ir::PaddingType::SAME)
   {
@@ -503,23 +637,15 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
   }
   else
   {
-    attributes.padding.prepended = HW(0, 0);
-    attributes.padding.appended = HW(0, 0);
+    attributes.padding.prepended = tflite::gpu::HW(0, 0);
+    attributes.padding.appended = tflite::gpu::HW(0, 0);
   }
 
-  auto fn = std::make_unique<ClFunction>();
-  std::unique_ptr<GPUOperation> gpu_op;
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
   gpu_op = SelectPooling(attributes, op_def);
 
-  auto input_tensor = _tensor_reg->getClTensor(input_index);
-  auto output_tensor = _tensor_reg->getClTensor(output_index);
-
-  gpu_op->SetSrc(input_tensor->handle(), ir::operation::Pool2D::Input::INPUT);
-  gpu_op->SetDst(output_tensor->handle(), 0);
-
-  fn->configure(_creation_context);
-  fn->add_operation(std::move(gpu_op));
-
+  addClNode({input_index}, {output_index}, std::move(gpu_op));
   _return_fn = std::move(fn);
 }
 
@@ -528,31 +654,24 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  OperationDef op_def;
-  op_def.precision = CalculationsPrecision::F32;
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
 
-  op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
-  auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+  auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
 
-  op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
-  auto output_shape = _tensor_reg->getClTensorReserver(output_index)->shape;
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+  auto output_shape = _tensor_reg->getClTensor(output_index)->get_info()._shape;
 
-  ReshapeAttributes attr;
+  tflite::gpu::ReshapeAttributes attr;
   attr.new_shape = output_shape;
 
-  auto fn = std::make_unique<ClFunction>();
-  std::unique_ptr<GPUOperation> gpu_op;
+  auto fn = std::make_unique<ClFunction>(_creation_context);
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
   const int src_channels = input_shape.c;
   SelectReshape(src_channels, attr.new_shape.c, op_def, &gpu_op);
 
-  auto input_tensor = _tensor_reg->getClTensor(input_index);
-  auto output_tensor = _tensor_reg->getClTensor(output_index);
-  gpu_op->SetSrc(input_tensor->handle(), ir::operation::Reshape::Input::INPUT);
-  gpu_op->SetDst(output_tensor->handle(), 0);
-
-  fn->configure(_creation_context);
-  fn->add_operation(std::move(gpu_op));
-
+  addClNode({input_index}, {output_index}, std::move(gpu_op));
   _return_fn = std::move(fn);
 }
 
@@ -568,27 +687,20 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
     throw std::runtime_error("Softmax.beta != 1 is not supported in gpu_cl");
   }
 
-  OperationDef op_def;
-  op_def.precision = CalculationsPrecision::F32;
+  tflite::gpu::OperationDef op_def;
+  op_def.precision = tflite::gpu::CalculationsPrecision::F32;
 
-  op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
+  op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
 
-  op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
-  auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+  op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+  auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
 
-  auto fn = std::make_unique<ClFunction>();
+  auto fn = std::make_unique<ClFunction>(_creation_context);
 
-  std::unique_ptr<GPUOperation> gpu_op;
+  std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
   SelectSoftmax(input_shape, op_def, &gpu_op);
-  auto output_tensor = _tensor_reg->getClTensor(output_index);
-  auto input_tensor = _tensor_reg->getClTensor(input_index);
-
-  gpu_op->SetSrc(input_tensor->handle(), ir::operation::Softmax::Input::INPUT);
-  gpu_op->SetDst(output_tensor->handle(), 0);
-
-  fn->configure(_creation_context);
-  fn->add_operation(std::move(gpu_op));
 
+  addClNode({input_index}, {output_index}, std::move(gpu_op));
   _return_fn = std::move(fn);
 }
 
index 91fd3cd..5e8c262 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <backend/CustomKernelBuilder.h>
 #include <backend/basic/KernelGeneratorBase.h>
+#include <backend/BackendContext.h>
 #include <ir/Operands.h>
 #include <ir/Operations.h>
 #include <ir/Operations.Include.h>
@@ -46,6 +47,8 @@ public:
 
   std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
 
+  void get_operation(FunctionMap &Functions);
+
 private:
   void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Conv2D &) override;
@@ -54,6 +57,14 @@ private:
   void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::Softmax &) override;
+  absl::Status readConstTensor(const ir::OperandIndex &index, tflite::gpu::TensorOrScalar *param);
+  absl::Status readConstTensor(
+    const ir::OperandIndex &index,
+    absl::variant<tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32>,
+                  tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32>> *alpha);
+  void addClNode(const std::vector<ir::OperandIndex> &inputs,
+                 const std::vector<ir::OperandIndex> &outputs,
+                 std::unique_ptr<tflite::gpu::GPUOperation> gpu_op);
 
 private:
   const ir::Operands &_ctx;
@@ -62,7 +73,9 @@ private:
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<TensorRegistry> _tensor_reg;
   std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
-  ir::OperandIndexMap<std::shared_ptr<tflite::gpu::cl::Tensor>> _new_tensors;
+  std::vector<tflite::gpu::cl::CLNode> _nodes;
+  ir::OperationIndex _operation_index;
+  std::vector<ir::OperationIndex> _operation_indexes;
 };
 
 } // namespace gpu_cl
index a3b9b39..4b34c39 100644 (file)
 #ifndef __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
 #define __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
 
-#include "ex/InferenceContextEx.h"
 #include "operand/CLTensor.h"
 
 #include "ir/OperandIndexMap.h"
 #include "ir/OperandInfo.h"
 #include "util/logging.h"
 
+#include "tensorflow/lite/delegates/gpu/spi.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
-#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
 
 #include <cassert>
 
@@ -41,24 +42,31 @@ namespace gpu_cl
 class MemoryManager
 {
 public:
-  MemoryManager(tflite::gpu::cl::CLContext *context) : _context{context} {}
+  MemoryManager(tflite::gpu::cl::CLContext *context, tflite::gpu::CreateGpuModelInfo create_info,
+                const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
+    : _context{context}, _create_info{create_info}, _environment{environment}
+  {
+  }
 
   ~MemoryManager() = default;
 
   void allocate(void)
   {
+    std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> converter_builder =
+      NewConverterBuilder(_environment.get());
     for (const auto &tensor_entry : _tensors)
     {
       auto tensor = tensor_entry.second;
       auto type = tensor->get_type();
 
-      // if (type == TensorType::TENSOR_TYPE_DELETE) {
-      //   continue;
-      // }
+      if (type == TensorType::TENSOR_TYPE_DELETE)
+      {
+        continue;
+      }
+
+      const auto &shape = tensor->get_info()._shape;
+      const auto &descriptor = tensor->get_info()._desc;
 
-      const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
-      const auto &shape = t->shape;
-      const auto &descriptor = t->descriptor;
       if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
       {
         std::runtime_error("Failed to CreateTensor");
@@ -66,10 +74,10 @@ public:
       switch (type)
       {
         case TensorType::TENSOR_TYPE_INPUT:
-          tensor->writeConvertInit();
+          tensor->writeConvertInit(converter_builder.get(), _environment);
           break;
         case TensorType::TENSOR_TYPE_OUTPUT:
-          tensor->readConvertInit();
+          tensor->readConvertInit(converter_builder.get(), _environment);
           break;
         default:
           break;
@@ -89,65 +97,60 @@ public:
   { /* DO NOTHING */
   }
 
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                   tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
-                   std::shared_ptr<tflite::gpu::cl::Environment> environment,
-                   tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, TensorType type)
   {
-    tflite::gpu::ValueId max_id = 0;
-    auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
-    const auto shape = info.shape();
+    auto data_type = DeduceDataTypeFromPrecision(_create_info.precision);
 
-    auto tensor = std::make_shared<operand::CLTensor>(shape.rank(), shape, environment, type);
-    _tensors[ind] = tensor;
-    tflite::gpu::BHWC t_shape;
-    switch (shape.rank())
+    tflite::gpu::BHWC BHWC_shape = ToBHWC(info.shape());
+
+    tflite::gpu::TensorStorageType storage_type = _create_info.storage_type;
+    tflite::gpu::Layout layout =
+      BHWC_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+
+    if (!SelectBestStorageType(_environment->device().GetInfo(), BHWC_shape, storage_type,
+                               data_type, layout, &storage_type)
+           .ok())
     {
-      case 1:
-        // B layout
-        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
-        break;
-      case 2:
-        // BC layout
-        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
-        break;
-      case 3:
-        // BWC layout
-        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
-        break;
-      case 4:
-        // BHWC layout
-        t_shape = tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
-        break;
-      default:
-        break;
+      throw std::runtime_error("Failed to SelectBestStorageType");
     }
+    auto tensor = std::make_shared<operand::CLTensor>(
+      info.shape().rank(), type, BHWC_shape,
+      tflite::gpu::TensorDescriptor{data_type, storage_type, layout});
+    _tensors[ind] = tensor;
+  }
 
-    tflite::gpu::cl::TensorStorageType storage_type = create_info.storage_type;
-    tflite::gpu::Layout layout =
-      t_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+  ir::OperandIndex addTensor(const ir::Shape &shape)
+  {
+    auto data_type = DeduceDataTypeFromPrecision(_create_info.precision);
 
-    tflite::gpu::ValueId id = ind.value();
-    storage_type =
-      tflite::gpu::cl::SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
-    auto dummy = std::make_shared<InferenceContextEx::DummyTensor>();
-    dummy->shape = t_shape;
-    dummy->descriptor = tflite::gpu::cl::TensorDescriptor{data_type, storage_type, layout};
-    tensor_reserver_.Add(id, dummy);
+    tflite::gpu::BHWC BHWC_shape = ToBHWC(shape);
 
-    max_id = std::max(max_id, id);
+    tflite::gpu::TensorStorageType storage_type = _create_info.storage_type;
+    tflite::gpu::Layout layout =
+      BHWC_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
 
-    tensor_reserver_.SetNext(max_id + 1);
+    if (!SelectBestStorageType(_environment->device().GetInfo(), BHWC_shape, storage_type,
+                               data_type, layout, &storage_type)
+           .ok())
+    {
+      throw std::runtime_error("Failed to SelectBestStorageType");
+    }
+    auto ind = ir::OperandIndex(_new_id--);
+    auto tensor = std::make_shared<operand::CLTensor>(
+      shape.rank(), TensorType::TENSOR_TYPE_VALID, BHWC_shape,
+      tflite::gpu::TensorDescriptor{data_type, storage_type, layout});
+    _tensors[ind] = tensor;
+    return ind;
   }
 
   ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &tensors(void) { return _tensors; }
 
-  InferenceContextEx::TensorReserverEx &tensorReservers(void) { return tensor_reserver_; }
-
 private:
   ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> _tensors;
-  InferenceContextEx::TensorReserverEx tensor_reserver_;
   tflite::gpu::cl::CLContext *_context;
+  tflite::gpu::CreateGpuModelInfo _create_info;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+  uint32_t _new_id = UINT32_MAX;
 };
 
 } // namespace gpu_cl
index e717334..3183354 100644 (file)
@@ -21,7 +21,6 @@
 
 #include "TensorManager.h"
 
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
@@ -45,11 +44,8 @@ namespace gpu_cl
 
 using UsesType = cl_common::UsesType;
 
-TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
-                             tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
-                             const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
-  : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
-                                                                               environment}
+TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr)
+  : _operands{operands}, _tensor_mgr{tensor_mgr}
 {
   assert(_tensor_mgr);
 }
@@ -89,9 +85,9 @@ void TensorBuilder::allocate(void)
 {
   auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
 
-  for (auto &entry : lifetime_map)
+  for (const auto &entry : lifetime_map)
   {
-    auto &use = entry.second;
+    const auto &use = entry.second;
     auto use_type = use.first;
     auto use_index = use.second;
     assert(use_index.valid());
@@ -118,18 +114,22 @@ void TensorBuilder::buildTensors(void)
   assert(_tensor_mgr->constTensors().size() == 0);
   assert(_tensor_mgr->nonconstTensors().size() == 0);
   // Normal tensors
-  for (auto &entry : _tensor_info_map)
+  for (const auto &entry : _tensor_info_map)
   {
-    auto ind = entry.first;
+    const auto &ind = entry.first;
     if (_parent_map.count(ind) > 0)
       continue;
     auto type = _tensor_type_map.at(ind);
     const auto &info = entry.second;
-    _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_,
-                             type);
+    _tensor_mgr->buildTensor(ind, info, type);
   }
 }
 
+ir::OperandIndex TensorBuilder::addTensor(const ir::Shape &shape)
+{
+  return _tensor_mgr->addTensor(shape);
+}
+
 } // namespace gpu_cl
 } // namespace backend
 } // namespace onert
index 2a5cb8b..e0333fe 100644 (file)
@@ -34,9 +34,7 @@ namespace gpu_cl
 class TensorBuilder
 {
 public:
-  TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
-                tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
-                const std::shared_ptr<tflite::gpu::cl::Environment> &environment);
+  TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr);
 
   /**
    * @brief     Register tensor information to allocate on ACL-CL backend
@@ -83,6 +81,7 @@ public:
 private:
   void buildTensors(void);
   ir::OperandIndex findRootParent(ir::OperandIndex index);
+  ir::OperandIndex addTensor(const ir::Shape &shape);
 
 private:
   const ir::Operands &_operands;
@@ -92,8 +91,6 @@ private:
   ir::OperandIndexMap<size_t> _uses_count_map;
 
   std::unique_ptr<TensorManager> _tensor_mgr;
-  tflite::gpu::cl::InferenceContext::CreateInferenceInfo _create_info;
-  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
 
   // for linear executor
   cl_common::LifetimeSeq _lifetime_seq;
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h
deleted file mode 100644 (file)
index 7290ff5..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
-#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
-
-#include "absl/status/status.h"
-#include "tensorflow/lite/delegates/gpu/common/shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum TensorType
-{
-  TENSOR_TYPE_VALID = 0,
-  TENSOR_TYPE_INPUT = 1,
-  TENSOR_TYPE_OUTPUT = 2,
-  TENSOR_TYPE_DELETE = 3
-};
-
-absl::Status ExtractAxisFromIndex(int dims, int index, tflite::gpu::Axis *axis);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
index 9fe0605..02e26ed 100644 (file)
@@ -42,23 +42,28 @@ void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
 void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
 
 void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
-                                std::shared_ptr<tflite::gpu::cl::Environment> environment,
-                                tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+                                TensorType type)
 {
   assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
 
   if (info.isConstant())
   {
-    _const_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+    _const_mgr->buildTensor(ind, info, type);
     _ind_to_mgr.insert({ind, *_const_mgr});
   }
   else
   {
-    _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+    _nonconst_mgr->buildTensor(ind, info, type);
     _ind_to_mgr.insert({ind, *_nonconst_mgr});
   }
 }
+ir::OperandIndex TensorManager::addTensor(const ir::Shape &shape)
+{
+  auto ind = _nonconst_mgr->addTensor(shape);
+  _ind_to_mgr.insert({ind, *_nonconst_mgr});
+
+  return ind;
+}
 
 void TensorManager::startLifetime(const ir::OperandIndex &ind)
 {
@@ -96,29 +101,6 @@ ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::nonconst
   return _nonconst_mgr->tensors();
 }
 
-std::shared_ptr<InferenceContextEx::DummyTensor> TensorManager::atR(const ir::OperandIndex &ind)
-{
-  if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
-  {
-    return _nonconst_mgr->tensorReservers().Get(ind.value());
-  }
-  else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
-  {
-    return _const_mgr->tensorReservers().Get(ind.value());
-  }
-  return nullptr;
-}
-
-InferenceContextEx::TensorReserverEx &TensorManager::constTensorReservers(void)
-{
-  return _const_mgr->tensorReservers();
-}
-
-InferenceContextEx::TensorReserverEx &TensorManager::nonconstTensorReservers(void)
-{
-  return _nonconst_mgr->tensorReservers();
-}
-
 void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
 {
   for (auto it : _nonconst_mgr->tensors())
index 52abc57..5b09ac1 100644 (file)
 
 #include "MemoryManager.h"
 
+#include "Utils.h"
+
 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
 
 #include "ir/OperandInfo.h"
 #include "ir/OperandIndexMap.h"
@@ -44,10 +46,8 @@ public:
   void deallocateConsts(void);
   void deallocateNonconsts(void);
 
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                   tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
-                   std::shared_ptr<tflite::gpu::cl::Environment> environment,
-                   tflite::gpu::cl::DeviceInfo &device_info, TensorType type);
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, TensorType type);
+  ir::OperandIndex addTensor(const ir::Shape &shape);
 
   std::shared_ptr<operand::ICLTensor> findTensorAsParent(const ir::OperandIndex &ind);
 
@@ -55,10 +55,6 @@ public:
   void finishLifetime(const ir::OperandIndex &ind);
 
   std::shared_ptr<operand::ICLTensor> at(const ir::OperandIndex &ind);
-  std::shared_ptr<InferenceContextEx::DummyTensor> atR(const ir::OperandIndex &ind);
-
-  InferenceContextEx::TensorReserverEx &constTensorReservers(void);
-  InferenceContextEx::TensorReserverEx &nonconstTensorReservers(void);
 
   ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &constTensors(void);
   ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &nonconstTensors(void);
@@ -73,10 +69,14 @@ private:
   ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
 };
 
-inline TensorManager *createTensorManager(tflite::gpu::cl::CLContext *context)
+inline TensorManager *
+createTensorManager(tflite::gpu::cl::CLContext *context,
+                    tflite::gpu::CreateGpuModelInfo create_info,
+                    const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
 {
   VERBOSE(createTensorManager) << "GPU-CL TensorManager" << std::endl;
-  return new TensorManager(new MemoryManager(context), new MemoryManager(context));
+  return new TensorManager(new MemoryManager(context, create_info, environment),
+                           new MemoryManager(context, create_info, environment));
 }
 
 } // namespace gpu_cl
index 6f17aff..be342e9 100644 (file)
@@ -44,7 +44,7 @@ public:
 
   auto getClTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind).get(); }
 
-  auto getClTensorReserver(const ir::OperandIndex &ind) { return _tensor_mgr->atR(ind); }
+  ir::OperandIndex addNewClTensor(const ir::Shape &shape) { return _tensor_mgr->addTensor(shape); }
 
 private:
   TensorManager *_tensor_mgr;
diff --git a/runtime/onert/backend/gpu_cl/Utils.h b/runtime/onert/backend/gpu_cl/Utils.h
new file mode 100644 (file)
index 0000000..1953c0e
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+
+#include "absl/status/status.h"
+
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+
+#include "ir/operation/BinaryArithmetic.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
+#include "ir/operation/Pool2D.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+inline tflite::gpu::HW ToHW(int32_t h, int32_t w)
+{
+  return tflite::gpu::HW(h > 0 ? h : 1, w > 0 ? w : 1);
+}
+
+template <typename AttrT>
+inline void UpdatePadding(const ir::PaddingType type, const tflite::gpu::BHWC &input_shape,
+                          AttrT *attr)
+{
+  if (type == ir::PaddingType::SAME)
+  {
+    attr->padding = CalculateSamePadding(input_shape, *attr);
+  }
+  else
+  {
+    attr->padding.prepended = tflite::gpu::HW(0, 0);
+    attr->padding.appended = tflite::gpu::HW(0, 0);
+  }
+}
+
+inline tflite::gpu::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      return tflite::gpu::PoolingType::AVERAGE;
+    case ir::operation::Pool2D::PoolType::MAX:
+      return tflite::gpu::PoolingType::MAX;
+    default:
+      throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
+  }
+}
+
+inline tflite::gpu::BHWC ToBHWC(ir::Shape shape)
+{
+  switch (shape.rank())
+  {
+    case 1:
+      // B layout
+      return tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
+      break;
+    case 2:
+      // BC layout
+      return tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
+      break;
+    case 3:
+      // BWC layout
+      return tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
+      break;
+    case 4:
+      // BHWC layout
+      return tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
+      break;
+    default:
+      break;
+  }
+  return tflite::gpu::BHWC();
+}
+
+inline bool CheckIfLinearConvertible(const ir::Shape *shape)
+{
+  if (shape->num_elements() <= 0)
+  {
+    return false;
+  }
+  for (int i = 0; i < shape->rank() - 1; ++i)
+  {
+    if (shape->dim(i) != 1)
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline tflite::gpu::OperationType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+  switch (arithmetic_type_ir)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+      return tflite::gpu::OperationType::ADD;
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+      return tflite::gpu::OperationType::SUB;
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+      return tflite::gpu::OperationType::MUL;
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+      return tflite::gpu::OperationType::DIV;
+    default:
+      throw std::runtime_error("Unsupported ArithmeticType");
+  }
+}
+
+inline tflite::gpu::OperationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      return tflite::gpu::OperationType::SIGMOID;
+    default:
+      throw std::runtime_error("Unsupported ElementwiseActivationType");
+  }
+}
+
+enum TensorType
+{
+  TENSOR_TYPE_VALID = 0,
+  TENSOR_TYPE_INPUT = 1,
+  TENSOR_TYPE_OUTPUT = 2,
+  TENSOR_TYPE_DELETE = 3
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
diff --git a/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h
deleted file mode 100644 (file)
index f673879..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
-#define __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
-
-#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/common/model.h"
-#include "absl/strings/str_cat.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class InferenceContextEx : public tflite::gpu::cl::InferenceContext
-{
-public:
-  struct DummyTensor
-  {
-    tflite::gpu::BHWC shape;
-    tflite::gpu::cl::TensorDescriptor descriptor;
-
-    bool operator==(const DummyTensor &b) const
-    {
-      return shape == b.shape && descriptor == b.descriptor;
-    }
-  };
-
-  class TensorReserverEx
-  {
-  public:
-    tflite::gpu::ValueId Add(const std::shared_ptr<DummyTensor> &dummy)
-    {
-      reservations_[next_] = dummy;
-      return next_++;
-    }
-    void Add(tflite::gpu::ValueId id, const std::shared_ptr<DummyTensor> &dummy)
-    {
-      reservations_[id] = dummy;
-    }
-    void SetNext(tflite::gpu::ValueId id) { next_ = id; }
-    bool HaveTensor(tflite::gpu::ValueId id)
-    {
-      return reservations_.find(id) != reservations_.end();
-    }
-    std::shared_ptr<DummyTensor> Get(tflite::gpu::ValueId id) { return reservations_[id]; }
-
-    std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
-    GetTensorDescs() const
-    {
-      std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> result;
-      for (auto &v : reservations_)
-      {
-        tflite::gpu::cl::TensorDescriptor desc = v.second->descriptor;
-        desc.shape.b = v.second->shape.b;
-        desc.shape.h = v.second->shape.h;
-        desc.shape.w = v.second->shape.w;
-        desc.shape.d = 1;
-        desc.shape.c = v.second->shape.c;
-        result.push_back({v.first, desc});
-      }
-      return result;
-    }
-
-    void Add(const std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
-               &tensors)
-    {
-      for (auto &v : tensors)
-      {
-        auto dummy = std::make_shared<DummyTensor>();
-        dummy->descriptor = v.second;
-        dummy->shape.b = v.second.shape.b;
-        dummy->shape.h = v.second.shape.h;
-        dummy->shape.w = v.second.shape.w;
-        dummy->shape.c = v.second.shape.c;
-        Add(v.first, dummy);
-      }
-    }
-
-  private:
-    // absl::flat_hash_map<ValueId, DummyTensor> reservations_;
-    std::unordered_map<tflite::gpu::ValueId, std::shared_ptr<DummyTensor>> reservations_;
-    tflite::gpu::ValueId next_ = 0;
-  };
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
index d3ed102..1b19b10 100644 (file)
@@ -19,7 +19,7 @@
 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
 
 using namespace tflite::gpu::cl;
 
@@ -32,9 +32,9 @@ namespace gpu_cl
 namespace operand
 {
 
-CLTensor::CLTensor(size_t rank, ir::Shape shape,
-                   std::shared_ptr<tflite::gpu::cl::Environment> environment, TensorType type)
-  : ICLTensor{rank, shape, environment, type}, _tensor(std::make_shared<Tensor>())
+CLTensor::CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+                   tflite::gpu::TensorDescriptor desc)
+  : ICLTensor{rank, type, shape, desc}, _tensor(std::make_shared<Tensor>())
 {
 }
 
index f2153f4..269551d 100644 (file)
@@ -38,8 +38,8 @@ public:
   CLTensor() = delete;
 
 public:
-  CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
-           TensorType type);
+  CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+           tflite::gpu::TensorDescriptor desc);
 
 public:
   const tflite::gpu::cl::Tensor *handle() const override;
index a95f780..ef71bbc 100644 (file)
@@ -43,8 +43,10 @@ void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
   fn(*this);
 }
 
-void ICLTensor::writeConvertInit()
+void ICLTensor::writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                                 std::shared_ptr<tflite::gpu::cl::Environment> environment)
 {
+  _environment = environment;
   TensorObjectDef input_def;
   input_def.dimensions.b = handle()->Batch();
   input_def.dimensions.h = handle()->Height();
@@ -74,21 +76,20 @@ void ICLTensor::writeConvertInit()
   output_def.object_def.data_type = handle()->GetDataType();
   input_def.object_def.user_provided = false;
 
-  _converter_builder = NewConverterBuilder(_environment.get());
-  if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
+  if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
   {
     throw std::runtime_error("Failed to make converter_to");
   }
-  if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
+  if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
   {
     throw std::runtime_error("Failed to make converter_from");
   }
 }
 
-void ICLTensor::readConvertInit()
+void ICLTensor::readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                                std::shared_ptr<tflite::gpu::cl::Environment> environment)
 {
-  _converter_builder = NewConverterBuilder(_environment.get());
-
+  _environment = environment;
   TensorObjectDef input_def;
   input_def.dimensions.b = handle()->Batch();
   input_def.dimensions.h = handle()->Height();
@@ -118,20 +119,20 @@ void ICLTensor::readConvertInit()
   TensorObjectDef output_def = permute_def;
   output_def.object_def.object_type = ObjectType::CPU_MEMORY;
 
-  if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
+  if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
   {
     throw std::runtime_error("Failed to make converter_from");
   }
-  if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
+  if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
   {
     throw std::runtime_error("Failed to make converter_to");
   }
 }
 
-void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool blocking)
 {
-  TensorObject input_obj =
-    MakeReadableCpuMemory(absl::MakeSpan(static_cast<const float *>(ptr), _shape.num_elements()));
+  TensorObject input_obj = MakeReadableCpuMemory(
+    absl::MakeSpan(static_cast<const float *>(ptr), _info._shape.DimensionsProduct()));
 
   TensorObject output_obj;
 
@@ -162,13 +163,19 @@ void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
   {
     throw std::runtime_error("Failed to write cl buffer from cpu memory");
   }
+
+  if (blocking && !_environment->queue()->WaitForCompletion().ok())
+  {
+    throw std::runtime_error("Failed to WaitForCompletion");
+  }
+
   if (!_converter_from->Convert(permute_obj, output_obj).ok())
   {
     throw std::runtime_error("Failed to change layout");
   }
 }
 
-void ICLTensor::enqueueReadBuffer(void *ptr, bool)
+void ICLTensor::enqueueReadBuffer(void *ptr, bool blocking)
 {
   TensorObject input_obj;
 
@@ -196,7 +203,7 @@ void ICLTensor::enqueueReadBuffer(void *ptr, bool)
   }
 
   TensorObject output_obj =
-    MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _shape.num_elements()));
+    MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _info._shape.DimensionsProduct()));
 
   if (!_converter_from->Convert(input_obj, permute_obj).ok())
   {
@@ -206,6 +213,11 @@ void ICLTensor::enqueueReadBuffer(void *ptr, bool)
   {
     throw std::runtime_error("Failed to read cl buffer");
   }
+
+  if (blocking && !_environment->queue()->WaitForCompletion().ok())
+  {
+    throw std::runtime_error("Failed to WaitForCompletion");
+  }
 }
 
 } // namespace operand
index b8ad446..47420a1 100644 (file)
@@ -26,7 +26,7 @@
 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
 
-#include "TensorBuilderHelper.h"
+#include "Utils.h"
 
 namespace onert
 {
@@ -37,6 +37,12 @@ namespace gpu_cl
 namespace operand
 {
 
+struct TensorInfo
+{
+  tflite::gpu::BHWC _shape;
+  tflite::gpu::TensorDescriptor _desc;
+};
+
 class ICLTensor : public ITensor
 {
 public:
@@ -46,15 +52,15 @@ public:
   ICLTensor(ICLTensor &&) = default;
   ICLTensor &operator=(ICLTensor &&) = default;
 
-  ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
-            TensorType type)
-    : _rank{rank}, _shape{shape}, _environment(environment), _type(type)
+  ICLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+            tflite::gpu::TensorDescriptor desc)
+    : _rank{rank}, _type(type), _info{shape, desc}
   {
   }
 
 public:
   uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); }
-  size_t total_size() const final { return _shape.num_elements() * sizeof(float); }
+  size_t total_size() const final { return _info._shape.DimensionsProduct() * sizeof(float); }
   size_t calcOffset(const ir::Coordinates &) const final
   {
     throw std::runtime_error("ICLTensor::calcOffset() is not supported.");
@@ -78,16 +84,38 @@ public:
     throw std::runtime_error("ICLTensor::data_zero_points() is not supported.");
   }
   bool is_dynamic() const override { return false; }
-  ir::Shape getShape() const override { return _shape; }
+  ir::Shape getShape() const override
+  {
+    tflite::gpu::BHWC shape = _info._shape;
+    switch (_rank)
+    {
+      case 1:
+        return ir::Shape{shape.b};
+      case 2:
+        return ir::Shape{shape.b, shape.c};
+      case 3:
+        return ir::Shape{shape.b, shape.w, shape.c};
+      case 4:
+        return ir::Shape{shape.b, shape.h, shape.w, shape.c};
+      default:
+        break;
+    }
+    return ir::Shape{};
+  }
   bool has_padding() const override { return false; }
   void access(const std::function<void(ITensor &tensor)> &fn) final;
   bool needMemoryMap() const final { return true; }
   void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
   void enqueueReadBuffer(void *ptr, bool blocking = true) final;
 
-  void writeConvertInit();
-  void readConvertInit();
+  void writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                        std::shared_ptr<tflite::gpu::cl::Environment> environment);
+  void readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                       std::shared_ptr<tflite::gpu::cl::Environment> environment);
+
   TensorType get_type() { return _type; }
+  TensorType set_type(TensorType type) { return _type = type; }
+  const TensorInfo get_info() { return _info; }
 
 public:
   virtual const tflite::gpu::cl::Tensor *handle() const = 0;
@@ -96,11 +124,10 @@ public:
 private:
 protected:
   size_t _rank; // Actual rank (reflects extended rank)
-  ir::Shape _shape;
-  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
   TensorType _type;
-  std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> _converter_builder;
+  TensorInfo _info;
   tflite::gpu::cl::CLMemory _cl_memory;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
   std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to;
   std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from;
 };
index 8777726..48da91b 100644 (file)
@@ -50,7 +50,7 @@ FunctionMap BackendContext::genKernels()
     .operands()
     .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
 
-  for (auto &it : ret)
+  for (auto &&it : ret)
   {
     auto &fn_seq = it.second;
     fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
index e46b11d..39048f2 100644 (file)
@@ -50,7 +50,7 @@ FunctionMap BackendContext::genKernels()
     .operands()
     .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
 
-  for (auto &it : ret)
+  for (auto &&it : ret)
   {
     auto &fn_seq = it.second;
     fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
diff --git a/runtime/onert/backend/trix/BatchThreadPool.cc b/runtime/onert/backend/trix/BatchThreadPool.cc
new file mode 100644 (file)
index 0000000..3c2001d
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchThreadPool.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+BatchThreadPool::BatchThreadPool(size_t num_threads) : _num_threads(num_threads), _stop_all(false)
+{
+  _worker_threads.reserve(_num_threads);
+  for (uint32_t thread_num = 0; thread_num < _num_threads; ++thread_num)
+  {
+    _worker_threads.emplace_back([this, thread_num]() { this->worker(thread_num); });
+  }
+}
+
+void BatchThreadPool::worker(uint32_t thread_num)
+{
+  while (true)
+  {
+    std::unique_lock<std::mutex> lock(_m_job_queue);
+    _cv_job_queue.wait(lock, [this]() { return !this->_job_queue.empty() || _stop_all; });
+    if (_stop_all && this->_job_queue.empty())
+    {
+      return;
+    }
+
+    // Pop a job in front of queue
+    auto job = std::move(_job_queue.front());
+    _job_queue.pop();
+    lock.unlock();
+
+    // Run the job
+    job(thread_num);
+  }
+}
+
+BatchThreadPool::~BatchThreadPool()
+{
+  _stop_all = true;
+  _cv_job_queue.notify_all();
+
+  for (auto &&t : _worker_threads)
+  {
+    t.join();
+  }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/BatchThreadPool.h b/runtime/onert/backend/trix/BatchThreadPool.h
new file mode 100644 (file)
index 0000000..bc2936f
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
+#define __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
+
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <vector>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+/**
+ * @brief Class that has a threadpool for batch-by-batch multi-threading
+ *
+ */
+class BatchThreadPool
+{
+public:
+  BatchThreadPool(size_t num_threads);
+  ~BatchThreadPool();
+
+  /**
+   * @brief
+   *
+   * @tparam F    Type of the function for job
+   * @tparam Args Type of arguments  of job
+   * @param f     Function for job
+   * @param args  Arguments of job
+   * @return std::future<typename std::result_of<F(uint32_t, Args...)>::type>
+   */
+  template <class F, class... Args>
+  std::future<typename std::result_of<F(uint32_t, Args...)>::type> enqueueJob(F &&f,
+                                                                              Args &&... args)
+  {
+    if (_stop_all)
+    {
+      throw std::runtime_error("Stop all threads in BatchThreadPool");
+    }
+
+    using return_type = typename std::result_of<F(uint32_t, Args...)>::type;
+    auto job = std::make_shared<std::packaged_task<return_type(uint32_t)>>(
+      std::bind(std::forward<F>(f), std::placeholders::_1, std::forward<Args>(args)...));
+    std::future<return_type> job_result_future = job->get_future();
+    {
+      // Push job in the assigned queue
+      std::lock_guard<std::mutex> lock(_m_job_queue);
+
+      // Push job
+      _job_queue.push([job](uint32_t thread_num) { (*job)(thread_num); });
+    }
+    _cv_job_queue.notify_one();
+
+    return job_result_future;
+  }
+
+private:
+  /**
+   * @brief Worker to run jobs
+   *
+   * @param thread_num Thread number on which worker is running
+   */
+  void worker(uint32_t thread_num);
+
+private:
+  /**
+   * @brief The number of threads
+   *
+   */
+  size_t _num_threads;
+
+  /**
+   * @brief Threads worked for jobs
+   *
+   */
+  std::vector<std::thread> _worker_threads;
+
+  /**
+   * @brief Queue for jobs
+   *
+   */
+  std::queue<std::function<void(uint32_t)>> _job_queue;
+
+  /**
+   * @brief condition_variables for _job_queue and _worker_threads
+   *
+   */
+  std::condition_variable _cv_job_queue;
+
+  /**
+   * @brief Mutex for the queue _job_queue
+   *
+   */
+  std::mutex _m_job_queue;
+
+  /**
+   * @brief Whether all threads are stopped
+   *
+   */
+  bool _stop_all;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
diff --git a/runtime/onert/backend/trix/Convert.cc b/runtime/onert/backend/trix/Convert.cc
new file mode 100644 (file)
index 0000000..fe003e7
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+data_layout convertDataLayout(const ir::Layout layout)
+{
+  switch (layout)
+  {
+    case ir::Layout::NCHW:
+      return DATA_LAYOUT_NCHW;
+    case ir::Layout::NHWC:
+      return DATA_LAYOUT_NHWC;
+    default:
+      throw std::runtime_error("Unknown Layout");
+  }
+}
+
+data_type convertDataType(const ir::DataType type)
+{
+  switch (type)
+  {
+    case ir::DataType::QUANT_UINT8_ASYMM:
+      return DATA_TYPE_QASYMM8;
+    case ir::DataType::QUANT_INT16_SYMM:
+      return DATA_TYPE_QSYMM16;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/Convert.h b/runtime/onert/backend/trix/Convert.h
new file mode 100644 (file)
index 0000000..662ed44
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_CONVERT_H__
+#define __ONERT_BACKEND_TRIX_CONVERT_H__
+
+#include <backend/IPortableTensor.h>
+#include <ir/DataType.h>
+#include <ir/Layout.h>
+
+#include <libnpuhost.h>
+#include <type_traits>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+/**
+ * @brief Convert type of layout from onert type to npu type
+ *
+ * @param layout Layout type in onert
+ * @return data_layout Layout type in npu
+ */
+data_layout convertDataLayout(const ir::Layout layout);
+
+/**
+ * @brief Convert type of data from onert type to npu type
+ *
+ * @param type Data type in onert
+ * @return data_type Data type in npu
+ */
+data_type convertDataType(const ir::DataType type);
+
+/**
+ * @brief Set the tensors_data_info object
+ *
+ * @tparam T Type of tensor based of IPortableTensor
+ * @param tensors Tensors that have data information
+ * @param info    tensors_data_info to be set
+ */
+template <typename T, std::enable_if_t<std::is_base_of<IPortableTensor, T>::value, bool> = true>
+void setDataInfo(const std::vector<T *> &tensors, tensors_data_info *info)
+{
+  info->num_info = static_cast<uint32_t>(tensors.size());
+
+  for (uint32_t idx = 0; idx < info->num_info; ++idx)
+  {
+    info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
+    info->info[idx].type = convertDataType(tensors[idx]->data_type());
+  }
+}
+
+/**
+ * @brief Set the generic_buffers object
+ *
+ * @tparam T Type of tensor based of IPortableTensor
+ * @param tensors Tensors that have buffer information
+ * @param buf     generic_buffers to be set
+ */
+template <typename T, std::enable_if_t<std::is_base_of<IPortableTensor, T>::value, bool> = true>
+void setBuffers(const std::vector<T *> &tensors, generic_buffers *buf)
+{
+  buf->num_buffers = static_cast<uint32_t>(tensors.size());
+
+  for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
+  {
+    buf->bufs[idx].addr = tensors[idx]->buffer();
+    buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+    buf->bufs[idx].type = BUFFER_MAPPED;
+  }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_CONVERT_H__
diff --git a/runtime/onert/backend/trix/DevContext.cc b/runtime/onert/backend/trix/DevContext.cc
new file mode 100644 (file)
index 0000000..0595148
--- /dev/null
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DevContext.h"
+
+#include "Convert.h"
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+// All things related to npu device handle are gathered this Class, but when implementing npu
+// deamon, others except the context roles should be seperated.
+DevContext::DevContext() : _dev_handles{}, _model_ids{}, _meta_map{}
+{
+  auto dev_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+  if (dev_count <= 0)
+  {
+    throw std::runtime_error("Unable to find TRIX NPU device");
+  }
+
+  // Get NPU device handles
+  for (int i = 0; i < dev_count; ++i)
+  {
+    npudev_h handle;
+    if (getNPUdeviceByType(&handle, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
+    {
+      throw std::runtime_error("Failed to get TRIX NPU device handle");
+    }
+    _dev_handles.emplace_back(handle);
+  }
+
+  // NOTE Do not change the number of threads as long as jobs in thread call
+  //      the synchronous APIs such as submitNPU_request()
+  _batch_thread_pool = std::make_unique<BatchThreadPool>(_dev_handles.size());
+  // We need to careful not to create multiple `BatchThreadPool`. In case of multiple models, there
+  // may be a problem having multiple `BatchThreadPool` in current implementation. But if this
+  // creating thread pool is moved to npu deamon, I think this problem will be solved smoothly.
+}
+
+DevContext::~DevContext()
+{
+  // NOTE Must release _batch_thread_pool before releasing _dev_handles to wait for all threads to
+  //      be terminated
+  _batch_thread_pool.reset(nullptr);
+
+  for (const auto &dev_handle : _dev_handles)
+  {
+    unregisterNPUmodel_all(dev_handle);
+    putNPUdevice(dev_handle);
+  }
+}
+
+ModelID DevContext::registerModel(const std::string &model_file_path)
+{
+  auto meta = getNPUmodel_metadata(model_file_path.c_str(), false);
+
+  if (meta == nullptr)
+  {
+    throw std::runtime_error("Unable to extract the model metadata");
+  }
+
+  generic_buffer file_info;
+  file_info.type = BUFFER_FILE;
+  file_info.filepath = model_file_path.c_str();
+  file_info.size = meta->size;
+
+  ModelID model_id;
+
+  for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
+  {
+    // Register model for each device
+    uint32_t model_id_at_device;
+    if (registerNPUmodel(_dev_handles.at(dev_num), &file_info, &model_id_at_device) < 0)
+    {
+      throw std::runtime_error("Failed to register npu model");
+    }
+
+    if (dev_num == 0)
+    {
+      model_id = model_id_at_device;
+      _meta_map[model_id_at_device] = std::shared_ptr<npubin_meta>(meta);
+    }
+    else
+    {
+      _meta_map[model_id_at_device] = _meta_map[model_id];
+    }
+
+    _model_ids[model_id].resize(dev_num + 1);
+    _model_ids[model_id].at(dev_num) = model_id_at_device;
+  }
+
+  // Return the model id for device 0 only
+  return model_id;
+}
+
+void DevContext::unRegisterModel(ModelID model_id)
+{
+  for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
+  {
+    const auto model_id_at_device = _model_ids.at(model_id).at(dev_num);
+    const auto &dev_handle = _dev_handles.at(dev_num);
+
+    // Remove meta data
+    _meta_map.erase(model_id_at_device);
+
+    // Unregister Model for each device
+    unregisterNPUmodel(dev_handle, model_id_at_device);
+  }
+  // Remove model IDs
+  _model_ids.erase(model_id);
+}
+
+void DevContext::requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
+                            output_buffers *output_bufs, tensors_data_info *out_info,
+                            size_t batch_size)
+{
+  if (batch_size > 1)
+  {
+    if (in_info->num_info != 1)
+    {
+      throw std::runtime_error("Supported only an input that has batch now");
+    }
+    if (out_info->num_info != 1)
+    {
+      throw std::runtime_error("Supported only one output now");
+    }
+
+    if (input_bufs->bufs[0].size % batch_size != 0)
+    {
+      throw std::runtime_error("Invalid batch size. batch size :" + std::to_string(batch_size) +
+                               ", input buffer size : " + std::to_string(input_bufs->bufs[0].size));
+    }
+
+    if (output_bufs->bufs[0].size % batch_size != 0)
+    {
+      throw std::runtime_error(
+        "Invalid batch size. batch size :" + std::to_string(batch_size) +
+        ", output tensor size : " + std::to_string(output_bufs->bufs[0].size));
+    }
+
+    // inputs/outputs for each batch
+    std::vector<input_buffers> in_buffers_vec(batch_size);
+    std::vector<output_buffers> out_buffers_vec(batch_size);
+
+    // Run on thread pool
+    std::vector<std::future<int32_t>> batch_futures;
+    for (uint32_t batch_num = 0; batch_num < batch_size; ++batch_num)
+    {
+      // Enqueue jobs
+      // The in_info and out_info are always the same even if they are divided by batch, so they are
+      // used as they are.
+      auto future = _batch_thread_pool->enqueueJob(
+        [batch_size, in_info, out_info,
+         this](uint32_t dev_num, ModelID model_id, const input_buffers *input_bufs,
+               const output_buffers *output_bufs, uint32_t batch_num) -> int32_t {
+          // Set buffers of inputs/outputs for each batch
+          // TODO Support multiple inputs/outputs
+          input_buffers in_batch_buffers;
+          in_batch_buffers.num_buffers = input_bufs->num_buffers;
+          const uint64_t in_batch_offset = input_bufs->bufs[0].size / batch_size;
+          setBufferByBatch(input_bufs->bufs[0], batch_num, in_batch_offset,
+                           &in_batch_buffers.bufs[0]);
+
+          output_buffers out_batch_buffers;
+          out_batch_buffers.num_buffers = output_bufs->num_buffers;
+          const uint64_t out_batch_offset = output_bufs->bufs[0].size / batch_size;
+          setBufferByBatch(output_bufs->bufs[0], batch_num, out_batch_offset,
+                           &out_batch_buffers.bufs[0]);
+
+          try
+          {
+            // dev_num is the same as the thread number in _batch_thread_pool
+            this->runOneBatch(dev_num, model_id, &in_batch_buffers, in_info, &out_batch_buffers,
+                              out_info);
+          }
+          catch (...)
+          {
+            _eptr = std::current_exception();
+          }
+
+          return batch_num;
+        },
+        model_id, input_bufs, output_bufs, batch_num);
+      batch_futures.emplace_back(std::move(future));
+    }
+
+    for (auto &&future : batch_futures)
+    {
+      future.get();
+    }
+
+    if (_eptr)
+    {
+      std::exception_ptr eptr(nullptr);
+      _eptr.swap(eptr);
+      std::rethrow_exception(eptr);
+    }
+  }
+  else
+  {
+    runOneBatch(0, model_id, input_bufs, in_info, output_bufs, out_info);
+  }
+}
+
+void DevContext::runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
+                             tensors_data_info *in_info, output_buffers *output_bufs,
+                             tensors_data_info *out_info)
+{
+  const auto &model_id_at_device = _model_ids.at(model_id).at(dev_num);
+
+  const auto meta = _meta_map.at(model_id_at_device);
+  if (meta->input_seg_num != in_info->num_info)
+  {
+    throw std::runtime_error("The number of inputs does not match to model input seg num");
+  }
+
+  if (meta->output_seg_num != out_info->num_info)
+  {
+    throw std::runtime_error("The number of outputs does not match to model output seg num");
+  }
+
+  const auto &dev_handle = _dev_handles.at(dev_num);
+  int req_id;
+
+  if (auto error_code = createNPU_request(dev_handle, model_id_at_device, &req_id))
+  {
+    throw std::runtime_error("Unable to create NPU request with model id (" +
+                             std::to_string(model_id_at_device) + ")" +
+                             " error code : " + std::to_string(error_code));
+  }
+
+  if (auto error_code =
+        setNPU_requestData(dev_handle, req_id, input_bufs, in_info, output_bufs, out_info))
+  {
+    removeNPU_request(dev_handle, req_id);
+    throw std::runtime_error("Unable to create NPU request for model id (" +
+                             std::to_string(model_id_at_device) + ")" +
+                             " error code : " + std::to_string(error_code));
+  }
+
+  // NOTE submitNPU_request is not thread-safe(?). It is rarely hanging(unresponsive).
+  //      Ultimately, to solve this problem, we have to either use other thread-safe API or
+  //      change submitNPU_request to be thread-safe, but both works take time.
+  //      As a workaround, let's allow hanging thread.
+  // TODO Change submitNPU_request to be thread-safe or replaced with other thread-safe API
+  std::packaged_task<int(npudev_h, int)> task(submitNPU_request);
+  auto f = task.get_future();
+  std::thread thread_submit_request(std::move(task), dev_handle, req_id);
+  auto status = f.wait_until(std::chrono::system_clock::now() + std::chrono::seconds(60));
+  if (status == std::future_status::timeout)
+  {
+    // There is no way to terminate hanging submitNPU_request from the outside.
+    // If a hanging thread is detached, it will remain as a hanging thread. Even so, it's better
+    // than having the main thread hanging.
+    thread_submit_request.detach();
+
+    // TODO Enable removeNPU_request after resolving hanging.
+    // removeNPU_request(dev_handle, req_id);
+    throw std::runtime_error("The npu API \"submitNPU_request\" timeout");
+  }
+
+  auto error_code = f.get();
+  thread_submit_request.join();
+  if (error_code != 0)
+  {
+    removeNPU_request(dev_handle, req_id);
+    throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+                             ")" + " error code : " + std::to_string(error_code));
+  }
+
+  if (auto error_code = removeNPU_request(dev_handle, req_id))
+  {
+    throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
+                             ")" + " error code : " + std::to_string(error_code));
+  }
+}
+
+void DevContext::setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num,
+                                  uint64_t batch_offset, generic_buffer *batch_buf)
+{
+  batch_buf->addr = reinterpret_cast<uint8_t *>(origin_buf.addr) + batch_num * batch_offset;
+  batch_buf->size = batch_offset;
+  batch_buf->type = BUFFER_MAPPED;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
index a7dbd7a..cd8de97 100644 (file)
 #ifndef __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
 #define __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
 
+#include "BatchThreadPool.h"
+
 #include <libnpuhost.h>
+#include <memory>
+#include <string>
+#include <unordered_map>
 
 namespace onert
 {
@@ -26,103 +31,117 @@ namespace backend
 namespace trix
 {
 
+using ModelID = uint32_t;
+
+/**
+ * @brief NPU device context of trix backend
+ *
+ */
 class DevContext
 {
 public:
-  DevContext()
-  {
-    auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
-    // TODO: x64 platform has 3 cores. We do not support more that 2 cores for now.
-    if (device_count > 2)
-    {
-      device_count = 2;
-    }
-
-    if (device_count <= 0)
-    {
-      throw std::runtime_error("Unable to find TRIX NPU device");
-    }
-
-    for (int i = 0; i < device_count; i++)
-    {
-      npudev_h h;
-      if (getNPUdeviceByType(&h, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
-      {
-        throw std::runtime_error("Failed to get TRIX NPU device handle");
-      }
-      _dev_handles.push_back(h);
-    }
-  }
-
-  ~DevContext()
-  {
-    for (auto h : _dev_handles)
-    {
-      if (h != nullptr)
-      {
-        unregisterNPUmodel_all(h);
-        putNPUdevice(h);
-      }
-    }
-  }
-
-  npudev_h getDev(int i) { return _dev_handles[i]; }
-  int getDevSize() { return _dev_handles.size(); }
-
-  template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
-  {
-    info->num_info = static_cast<uint32_t>(tensors.size());
-
-    for (uint32_t idx = 0; idx < info->num_info; ++idx)
-    {
-      info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
-      info->info[idx].type = convertDataType(tensors[idx]->data_type());
-    }
-  }
-
-  template <typename T>
-  void setBuffer(generic_buffers *buf, std::vector<T *> &tensors, int batch_size, int batch_index)
-  {
-    buf->num_buffers = static_cast<uint32_t>(tensors.size());
-
-    for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
-    {
-      buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size() / batch_size);
-      buf->bufs[idx].addr = tensors[idx]->buffer() + (batch_index * buf->bufs[idx].size);
-      buf->bufs[idx].type = BUFFER_MAPPED;
-    }
-  }
+  /**
+   * @brief Construct a new device Context object
+   *
+   */
+  DevContext();
+
+  /**
+   * @brief Destroy the device Context object
+   *
+   */
+  ~DevContext();
+
+  DevContext(const DevContext &) = delete;
+  DevContext &operator=(const DevContext &) = delete;
+
+  /**
+   * @brief Register a trix model for all NPU devices
+   *
+   * @param model_file_path File path of a trix model
+   * @return ModelID Internal ID of the trix model
+   */
+  ModelID registerModel(const std::string &model_file_path);
+
+  /**
+   * @brief Unregister a trix model
+   *
+   * @param model_id Internal ID of the trix model to be unregistered
+   */
+  void unRegisterModel(ModelID model_id);
+
+  /**
+   * @brief Request a trix model to be run on NPU
+   *
+   * @param model_id    Internal ID of a trix model
+   * @param input_bufs  Buffer data of inputs
+   * @param in_info     Data info of inputs
+   * @param output_bufs Buffer data of outputs
+   * @param out_info    data info of outputs
+   * @param batch_size  Batch size
+   */
+  void requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
+                  output_buffers *output_bufs, tensors_data_info *out_info, size_t batch_size);
 
 private:
-  data_layout convertDataLayout(const ir::Layout layout)
-  {
-    switch (layout)
-    {
-      case ir::Layout::NCHW:
-        return DATA_LAYOUT_NCHW;
-      case ir::Layout::NHWC:
-        return DATA_LAYOUT_NHWC;
-      default:
-        throw std::runtime_error("Unknown Layout");
-    }
-  }
-
-  data_type convertDataType(const ir::DataType type)
-  {
-    switch (type)
-    {
-      case ir::DataType::QUANT_UINT8_ASYMM:
-        return DATA_TYPE_QASYMM8;
-      case ir::DataType::QUANT_INT16_SYMM:
-        return DATA_TYPE_QSYMM16;
-      default:
-        throw std::runtime_error("Unsupported data type");
-    }
-  }
+  /**
+   * @brief Rquest one batch of a trix model to be run on a device of NPU
+   *
+   * @param dev_num     Device number
+   * @param model_id    Internal ID of a trix model
+   * @param input_bufs  Buffer data of inputs
+   * @param in_info     Data info of inputs
+   * @param output_bufs Buffer data of outputs
+   * @param out_info    data info of outputs
+   */
+  void runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
+                   tensors_data_info *in_info, output_buffers *output_bufs,
+                   tensors_data_info *out_info);
+
+  /**
+   * @brief Set the buffer object by batch
+   *
+   * @param origin_buf   Buffer object that has all batches
+   * @param batch_num    Batch number
+   * @param batch_offset Size of a batch
+   * @param batch_buf    One batch buffer object to be set
+   */
+  void setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num, uint64_t batch_offset,
+                        generic_buffer *batch_buf);
 
 private:
-  // NPU device handles
+  /**
+   * @brief NPU device handles
+   *
+   */
   std::vector<npudev_h> _dev_handles;
+
+  /**
+   * @brief Threadpool for batch-by-batch multi-threading
+   *
+   */
+  std::unique_ptr<BatchThreadPool> _batch_thread_pool;
+
+  // TODO Change key to internal trix model context(?) if it is needed
+  /**
+   * @brief Map for ID of models
+   *        Internal Model ID : Model ID array for each device
+   *
+   */
+  std::unordered_map<ModelID, std::vector<uint32_t>> _model_ids;
+
+  /**
+   * @brief Map for meta data
+   *        Model ID at each device : meta data
+   *
+   */
+  std::unordered_map<uint32_t, std::shared_ptr<npubin_meta>> _meta_map;
+
+  /**
+   * @brief Exception pointer captured whthin threads
+   *
+   */
+  std::exception_ptr _eptr;
 };
 
 } // namespace trix
index 68e6840..2783bd7 100644 (file)
@@ -61,11 +61,11 @@ void KernelGenerator::visit(const ir::operation::Bulk &node)
   using ir::operation::Bulk;
 
   std::vector<IPortableTensor *> output_tensors;
-  for (auto &ofm_idx : node.getOutputs())
+  for (const auto &ofm_idx : node.getOutputs())
     output_tensors.emplace_back(_tensor_reg->getPortableTensor(ofm_idx));
 
   std::vector<const IPortableTensor *> input_tensors;
-  for (auto &ifm_idx : node.getInputs())
+  for (const auto &ifm_idx : node.getInputs())
     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 
   // parameters
index 3c49da9..db5c81b 100644 (file)
  */
 
 #include "BulkLayer.h"
-#include <util/logging.h>
 
-#include <libnpuhost.h>
-#include <future>
+#include "../Convert.h"
 
 namespace onert
 {
@@ -29,12 +27,12 @@ namespace trix
 namespace ops
 {
 
-BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _meta(nullptr), _dev_context(nullptr)
+BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _dev_context(nullptr)
 {
   // DO NOTHING
 }
 
-BulkLayer::~BulkLayer() { free(_meta); }
+BulkLayer::~BulkLayer() { _dev_context->unRegisterModel(_model_id); }
 
 void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
                           std::vector<IPortableTensor *> &outputs, std::string binary_path,
@@ -43,133 +41,28 @@ void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
   _inputs = inputs;
   _outputs = outputs;
   _dev_context = dev_context;
-
-  _meta = getNPUmodel_metadata(binary_path.c_str(), false);
-  if (_meta == nullptr)
-  {
-    throw std::runtime_error("Unable to extract the model metadata");
-  }
-
-  _model_id.resize(_dev_context->getDevSize());
-
-  generic_buffer model_file;
-  model_file.type = BUFFER_FILE;
-  model_file.filepath = binary_path.c_str();
-  model_file.size = _meta->size;
-
-  for (int i = 0; i < _dev_context->getDevSize(); i++)
-  {
-    if (registerNPUmodel(dev_context->getDev(i), &model_file, &_model_id[i]) < 0)
-    {
-      throw std::runtime_error("Failed to register npu model");
-    }
-  }
-}
-
-void single_job(npudev_h dev, int req_id, input_buffers *input_buf, tensors_data_info *in_info,
-                output_buffers *output_buf, tensors_data_info *out_info)
-{
-  if (setNPU_requestData(dev, req_id, input_buf, in_info, output_buf, out_info))
-  {
-    throw std::runtime_error("Unable to create NPU request for red_id (" + std::to_string(req_id) +
-                             ")");
-  }
-
-  if (submitNPU_request(dev, req_id))
-  {
-    throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
-                             ")");
-  }
+  _model_id = _dev_context->registerModel(binary_path);
 }
 
 void BulkLayer::run()
 {
-  // TODO: Remove too many assumption
-  // We assume user wants batch execution if user's input size is multiples of model's input size
-  int user_input_batch = (_inputs[0]->get_info().shape()).dim(0);
-  int model_input_batch = _meta->input_seg_dims[0][0];
-  int batch_size = user_input_batch / model_input_batch;
-  bool is_batch_execution = (batch_size != 1 ? true : false);
-
-  std::vector<int> req_id(_dev_context->getDevSize());
-
-  for (int i = 0; i < _dev_context->getDevSize(); i++)
-  {
-    if (createNPU_request(_dev_context->getDev(i), _model_id[i], &req_id[i]))
-    {
-      throw std::runtime_error("Unable to create NPU request with model id (" +
-                               std::to_string(_model_id[i]) + ")");
-    }
-  }
-
-  if (_meta->input_seg_num != _inputs.size())
-  {
-    throw std::runtime_error("input size does not match to model input seg num");
-  }
-
-  if (_meta->output_seg_num != _outputs.size())
-  {
-    throw std::runtime_error("output size does not match to model output seg num");
-  }
-
   tensors_data_info in_info;
   tensors_data_info out_info;
-  _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
-  _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
+  setDataInfo(_inputs, &in_info);
+  setDataInfo(_outputs, &out_info);
 
-  std::vector<input_buffers> input_buf;
-  std::vector<output_buffers> output_buf;
-  input_buf.resize(_dev_context->getDevSize());
-  output_buf.resize(_dev_context->getDevSize());
-
-  std::vector<std::future<void>> f(_dev_context->getDevSize());
-
-  const int num_cores = _dev_context->getDevSize();
-  if (is_batch_execution)
-  {
-    // TODO: Support for general number of cores(>2)
-    // Here we assume that 2 trix cores
-    for (int i = 0; i < (batch_size); i = i + num_cores)
-    {
-      for (int core = 0; core < num_cores; core++)
-      {
-        _dev_context->setBuffer<const IPortableTensor>(&input_buf[core], _inputs, batch_size,
-                                                       i + core);
-        _dev_context->setBuffer<IPortableTensor>(&output_buf[core], _outputs, batch_size, i + core);
-      }
-      for (int core = 0; core < num_cores; core++)
-      {
-
-        if (i + core < batch_size)
-        {
-          f[core] =
-            std::async(std::launch::async, &single_job, _dev_context->getDev(core), req_id[core],
-                       &input_buf[core], &in_info, &output_buf[core], &out_info);
-        }
-      }
-      for (int core = 0; core < num_cores; core++)
-      {
-        f[core].wait();
-      }
-    }
-  }
-  else
-  {
-    _dev_context->setBuffer<const IPortableTensor>(&input_buf[0], _inputs, batch_size, 0);
-    _dev_context->setBuffer<IPortableTensor>(&output_buf[0], _outputs, batch_size, 0);
-
-    single_job(_dev_context->getDev(0), req_id[0], &input_buf[0], &in_info, &output_buf[0],
-               &out_info);
-  }
+  input_buffers input_bufs;
+  output_buffers output_bufs;
+  setBuffers(_inputs, &input_bufs);
+  setBuffers(_outputs, &output_bufs);
 
-  for (int i = 0; i < _dev_context->getDevSize(); i++)
+  size_t batch_size = 1;
+  // TODO Remove this assumption
+  if (_inputs.size() == 1 && _outputs.size() == 1 && _inputs.at(0)->getShape().dim(0) > 1)
   {
-    if (removeNPU_request(_dev_context->getDev(i), req_id[i]))
-    {
-      throw std::runtime_error("Unable to remove NPU request with req id (" +
-                               std::to_string(req_id[i]) + ")");
-    }
+    batch_size = _inputs.at(0)->getShape().dim(0);
   }
+  _dev_context->requestRun(_model_id, &input_bufs, &in_info, &output_bufs, &out_info, batch_size);
 }
 
 void BulkLayer::prepare()
index 614c0f7..6590b69 100644 (file)
@@ -50,8 +50,7 @@ private:
   std::vector<const IPortableTensor *> _inputs;
   std::vector<IPortableTensor *> _outputs;
 
-  std::vector<uint32_t> _model_id;
-  npubin_meta *_meta;
+  ModelID _model_id;
   std::shared_ptr<DevContext> _dev_context;
 };
 
index 42fffb6..c52e275 100644 (file)
@@ -50,7 +50,7 @@ FunctionMap BackendContext::genKernels()
     .operands()
     .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
 
-  for (auto &it : ret)
+  for (auto &&it : ret)
   {
     auto &fn_seq = it.second;
     fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
index 87c7a13..8041ab5 100644 (file)
@@ -57,4 +57,4 @@ target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage)
 target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
 
 add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
-install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest_standalone)
+install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest)
index cf2da4c..970a9f7 100644 (file)
@@ -84,19 +84,23 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
     tensor_builder->notifyFirstUse(ind);
   }
 
-  for (auto &pair : def_map)
+  for (const auto &pair : def_map)
   {
-    if (pair.second == 0)
-      tensor_builder->notifyFirstUse(pair.first);
+    const auto &ind = pair.first;
+    const auto def_count = pair.second;
+    if (def_count == 0)
+      tensor_builder->notifyFirstUse(ind);
   }
 
   // This is a workaround to keep the operands over the execution
   // (the operands look like they are unused)
   std::vector<ir::OperandIndex> operands_last_until_end;
-  for (auto &pair : uses_map)
+  for (const auto &pair : uses_map)
   {
-    if (pair.second == 0)
-      operands_last_until_end.push_back(pair.first);
+    const auto &ind = pair.first;
+    const auto use_count = pair.second;
+    if (use_count == 0)
+      operands_last_until_end.push_back(ind);
   }
 
   // At each operation,
@@ -161,7 +165,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
     }
   }
 
-  for (auto &ind : operands_last_until_end)
+  for (const auto &ind : operands_last_until_end)
   {
     tensor_builder->notifyLastUse(ind);
   }
index f05d63c..9a86f40 100644 (file)
 #ifndef __ONERT_COMPILER_COMPILE_H_
 #define __ONERT_COMPILER_COMPILE_H_
 
+#include "CompilerOptions.h"
+#include "ICompiler.h"
 #include "ir/NNPkg.h"
-#include "exec/Executors.h"
-#include "util/TracingCtx.h"
 
 namespace onert
 {
-
 namespace compiler
 {
 
-enum class State
-{
-  CREATED, // Before compilation
-  COMPILED // Success compilation
-};
-
-struct ManualSchedulerOptions
-{
-public:
-  void setBackendMap(const std::string &str);
-
-public:
-  std::string backend_for_all;
-  std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
-  std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
-};
-
-struct PartialGraphOptions
-{
-  std::unordered_map<ir::OperationIndex, ir::SubgraphIndex> index_to_graph;
-};
-
-class CompilerOptions
-{
-public:
-  // Set default values for CompilerOptions
-  // All these default values should not be fetched from Env, when we stop supporting Android NNAPI.
-  static std::unique_ptr<CompilerOptions> fromGlobalConfig();
-
-public:
-  // GENERAL OPTIONS
-  std::vector<std::string> backend_list;
-
-  // OPTIONS ONLY FOR DEBUGGING/PROFILING
-  std::string trace_filepath; //< File path to save trace records
-  int graph_dump_level;       //< Graph dump level, values between 0 and 2 are valid
-  std::string executor;       //< Executor name to use
-  ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
-  bool he_scheduler;      //< HEScheduler if true, ManualScheduler otherwise
-  bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
-  bool disable_compile;   //< Run with Interpreter if true, try compilation otherwise
-  bool fp16_enable;       //< Whether fp16 mode ON/OFF
-  PartialGraphOptions partial_graph_options;
-};
-
-struct CompilerArtifact
-{
-  CompilerArtifact(void) = delete;
-  CompilerArtifact(std::shared_ptr<exec::Executors> executors,
-                   std::unique_ptr<const util::TracingCtx> tracing_ctx)
-    : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
-
-  std::shared_ptr<exec::Executors> _executors;
-  std::unique_ptr<const util::TracingCtx> _tracing_ctx;
-};
-
 /**
  * @brief Class to compile NN package
  */
-class Compiler
+class Compiler : public ICompiler
 {
 public:
   /**
@@ -109,55 +52,25 @@ public:
   Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
            std::vector<std::unique_ptr<CompilerOptions>> &copts);
 
-public:
   /**
-   * @brief   Do compilation with the options
-   *
-   * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+   * @brief Destroy the Compiler object
    */
-  std::shared_ptr<CompilerArtifact> compile(void);
+  ~Compiler() = default;
 
+public:
   /**
    * @brief   Do compilation with the options
    *
-   * @return std::vector<std::shared_ptr<CompilerArtifact>> Executors as a result of compilation
-   * for pipeline
-   */
-  std::vector<std::shared_ptr<CompilerArtifact>> compile(const char *package_file_path,
-                                                         const char *map_file_path);
-
-  State state(void) const { return _state; }
-
-  /**
-   * @brief   Allow to compute float32 using float16 data type
-   */
-  void enableToFp16();
-
-  /**
-   * @brief   Build the partial graphs to compile with original graph
+   * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
    */
-  bool buildPartialGraph(uint32_t num_graphs);
-
-private:
-  void checkProfilerConditions();
-  std::shared_ptr<ir::Graph> &primary_subgraph()
-  {
-    return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
-  }
+  std::shared_ptr<CompilerArtifact> compile(void);
 
 private:
-  std::shared_ptr<ir::NNPkg> _nnpkg;
-  // NOTE These executors does not have duplicated subgraph. This mean they do not allow support
-  // subgraphs being called recursively because data of non-constant tensor of parent executor will
-  // be updated by child executor. If you want to support subgraphs being called recursively, you
-  // have to add allocate non-constant tensor memory of executors in execution time when each
-  // subgraph is called.
-  State _state;
-  std::vector<CompilerOptions *> _voptions;
+  std::shared_ptr<ir::Model> _model;
+  CompilerOptions *_options;
 };
 
 } // namespace compiler
-
 } // namespace onert
 
 #endif // __ONERT_COMPILER_COMPILE_H_
diff --git a/runtime/onert/core/include/compiler/CompilerFactory.h b/runtime/onert/core/include/compiler/CompilerFactory.h
new file mode 100644 (file)
index 0000000..4894366
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_FACTORY_H__
+#define __ONERT_COMPILER_COMPILER_FACTORY_H__
+
+#include "ICompiler.h"
+#include "CompilerOptions.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+// TODO Support register and use compiler plugin
+class CompilerFactory
+{
+public:
+  static CompilerFactory &get();
+
+public:
+  std::unique_ptr<ICompiler> create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                                    std::vector<std::unique_ptr<CompilerOptions>> &copts);
+
+private:
+  // It is not allowed to use CompilerFactory without get()
+  CompilerFactory() = default;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_FACTORY_H__
diff --git a/runtime/onert/core/include/compiler/CompilerOptions.h b/runtime/onert/core/include/compiler/CompilerOptions.h
new file mode 100644 (file)
index 0000000..bbe15fc
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_OPTIONS_H_
+#define __ONERT_COMPILER_COMPILER_OPTIONS_H_
+
+#include "ir/OpCode.h"
+#include "ir/Index.h"
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace onert
+{
+namespace compiler
+{
+
+struct ManualSchedulerOptions
+{
+public:
+  void setBackendMap(const std::string &str);
+
+public:
+  std::string backend_for_all;
+  std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
+  std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
+};
+
+class CompilerOptions
+{
+public:
+  /**
+   * @brief   Set default values for CompilerOptions
+   * @return  Generated CompileOption
+   *
+   * @note    All these default values should not be fetched from Env
+   *          when we stop supporting Android NNAPI.
+   */
+  static std::unique_ptr<CompilerOptions> fromGlobalConfig();
+
+  /**
+   * @brief Allow to compute float32 using float16 data type
+   */
+  void enableToFp16() { fp16_enable = true; }
+
+  /**
+   * @brief Force default values of CompilerOptions for correct compilations
+   *
+   * @note  This should be called after CompilerOptions setting is finished
+   *        to prevent value overwriting
+   */
+  void forceInternalOptions();
+
+  /**
+   * @brief Print option value
+   */
+  void verboseOptions();
+
+public:
+  // GENERAL OPTIONS
+  std::vector<std::string> backend_list;
+
+  // OPTIONS ONLY FOR DEBUGGING/PROFILING
+  std::string trace_filepath; //< File path to save trace records
+  int graph_dump_level;       //< Graph dump level, values between 0 and 2 are valid
+  std::string executor;       //< Executor name to use
+  ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
+  bool he_scheduler;      //< HEScheduler if true, ManualScheduler otherwise
+  bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
+  bool fp16_enable;       //< Whether fp16 mode ON/OFF
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_OPTIONS_H_
diff --git a/runtime/onert/core/include/compiler/ICompiler.h b/runtime/onert/core/include/compiler/ICompiler.h
new file mode 100644 (file)
index 0000000..255e050
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file  ICompiler.h
+ * @brief This file contains ICompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_I_COMPILER_H_
+#define __ONERT_COMPILER_I_COMPILER_H_
+
+#include "exec/IExecutors.h"
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct CompilerArtifact
+{
+  CompilerArtifact(void) = delete;
+  CompilerArtifact(std::shared_ptr<exec::IExecutors> executors,
+                   std::unique_ptr<const util::TracingCtx> tracing_ctx)
+    : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
+
+  std::shared_ptr<exec::IExecutors> _executors;
+  std::unique_ptr<const util::TracingCtx> _tracing_ctx;
+};
+
+class ICompiler
+{
+public:
+  /**
+   * @brief Virtual ICompiler destructor
+   * @note  Require derived class destructor
+   */
+  virtual ~ICompiler() = default;
+
+  /**
+   * @brief   Do compilation
+   * @return  std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+   */
+  virtual std::shared_ptr<CompilerArtifact> compile(void) = 0;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_I_COMPILER_H_
index 7264f2a..e9f0ae0 100644 (file)
@@ -36,13 +36,9 @@ class LoweredGraph
 {
 public:
   LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
-  LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph,
-               const compiler::CompilerOptions &options);
 
   ir::Graph &graph() { return _graph; }
   const ir::Graph &graph() const { return _graph; }
-  ir::Graph &parent_graph() { return _parent_graph; }
-  const ir::Graph &parent_graph() const { return _parent_graph; }
   const compiler::GraphLowerInfo &lower_info() const { return _lower_info_map; }
   compiler::GraphLowerInfo &lower_info() { return _lower_info_map; }
   std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
@@ -69,7 +65,6 @@ private:
    *          It allows the original graph can be compiled multiple times.
    */
   ir::Graph _graph;
-  ir::Graph _parent_graph;
   std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
   compiler::GraphLowerInfo _lower_info_map;
   ir::OperationIndexMap<bool> _has_dynamic_tensor_map;
index f701dc2..94d6ba1 100644 (file)
@@ -101,6 +101,15 @@ public:
 
   void dump();
 
+  /**
+   * @brief     Create a lowered model shape inferer map
+   * @param[in] lowered_subgs lowered model subgraph map
+   * @return    Shape inferer map
+   */
+  static std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+  createStaticShapeInferers(
+    const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraph>> &lowered_subgs);
+
 private:
   bool checkDynamicInput(const ir::Operation &op);
   bool checkDynamicOutput(const ir::Operation &op);
index 1e8083c..ba3edcd 100644 (file)
@@ -22,7 +22,7 @@
 #define __ONERT_EXEC_EXECUTION_H__
 
 #include "ir/Layout.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
 #include "IODescription.h"
 
 #include <thread>
@@ -46,16 +46,15 @@ public:
    * @brief     Construct a new Execution object
    * @param[in] executor  Model executor
    */
-  Execution(const std::shared_ptr<Executors> &executors);
+  Execution(const std::shared_ptr<IExecutors> &executors);
 
 public:
   /**
    * @brief   Returns primary graph object
    * @return  Graph object
    */
-  const ir::Graph &primary_subgraph() const { return primary_executor()->graph(); }
+  const ir::Graph &primary_subgraph() const { return entryExecutor()->graph(); }
 
-  const ir::Graph &primary_parentgraph() const { return primary_executor()->parent_graph(); }
   /**
    * @brief     Change input shape
    * @param[in] index   Input index
@@ -146,121 +145,15 @@ public:
   ir::Shape getInputShape(ir::IOIndex ind) const;
   ir::Shape getOutputShape(ir::IOIndex ind) const;
 
-  //
-  // Experimental API
-  //
-
-  // accessor
-  std::vector<
-    std::tuple<std::shared_ptr<onert::exec::Execution>, onert::ir::IOIndex, onert::ir::IOIndex>>
-  getNextExes()
-  {
-    return next_exes;
-  }
-  std::deque<std::pair<IODescription *, uint32_t>> *getAsyncIoDescs() { return &_async_io_descs; }
-  std::deque<std::vector<void *>> *getAsyncResults() { return &_async_results; }
-
-  /**
-   * @brief     Push IO information between related executions into next_exes
-   * @param[in] next   address of next execution
-   * @param[in] o_index  Output index of current execution (it will be the input of next execution)
-   * @param[in] i_index  Input index of next execution
-   */
-  void pushNextExe(std::shared_ptr<onert::exec::Execution> next, onert::ir::IOIndex o_index,
-                   onert::ir::IOIndex i_index)
-  {
-    next_exes.push_back({next, o_index, i_index});
-  }
-
-  /**
-   * @brief     Create New IODescription instance for new inputs outputs
-   * @param[in] index   instance count number
-   */
-  void createNewAsyncDesc(uint32_t count = 0);
-
-  /**
-   * @brief     Set async input data's information
-   * @param[in] index   Input index
-   * @param[in] buffer  Input data's buffer pointer
-   * @param[in] length  Input data's length
-   * @param[in] layout  Input data's data format
-   */
-  void executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
-                         ir::Layout layout = ir::Layout::NHWC);
-
-  /**
-   * @brief     Set async output data's information
-   * @param[in] index   Output index
-   * @param[in] buffer  Output data's buffer pointer
-   * @param[in] length  Output data's length
-   * @param[in] layout  Output data's data format
-   */
-  void executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
-                          ir::Layout layout = ir::Layout::NHWC);
-
-  /**
-   * @brief  Async execution
-   * @note   It should be called after setting input and output buffer
-   */
-  void AsyncExecute();
-
-  /**
-   * @brief   Set finish
-   */
-  void setFinish();
-
-  /**
-   * @brief   Check if input queue is empty
-   * @return  @c true if queue is empty, otherwise @c false
-   */
-  bool isEmptyQueue();
-
-  /**
-   * @brief   Wait semaphore to prevent race condition
-   */
-  void asyncIoDescSemWait();
-
-  /**
-   * @brief   Post semaphore to prevent race condition
-   */
-  void asyncIoDescSemPost();
-
-  /**
-   * @brief   Inference
-   * @note    this function provided to the thread for pipelining
-   */
-  void runInference();
-
-  /**
-   * @brief   Check if stop_wait is true
-   * @return  @c true if stop_wait is true, otherwise @c false
-   */
-  bool stopWait(void) const;
-
-  /**
-   * @brief   Set stop_wait to terminate consumer thread
-   */
-  void sholudStop();
-
 private:
-  const std::unique_ptr<IExecutor> &primary_executor() const
-  {
-    return _executors->at(ir::SubgraphIndex{0});
-  };
-  std::unique_ptr<IExecutor> &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); };
+  const IExecutor *entryExecutor() const { return _executors->entryExecutor(); };
+  IExecutor *entryExecutor() { return _executors->entryExecutor(); };
 
 private:
-  const std::shared_ptr<Executors> _executors;
+  const std::shared_ptr<IExecutors> _executors;
   IODescription _io_desc;
-  std::deque<std::pair<IODescription *, uint32_t>> _async_io_descs;
-  sem_t _async_io_descs_sem;
-  std::deque<std::vector<void *>> _async_results;
-  std::vector<
-    std::tuple<std::shared_ptr<onert::exec::Execution>, onert::ir::IOIndex, onert::ir::IOIndex>>
-    next_exes;
   std::unique_ptr<std::thread> _exec_thread;
   bool finished{false};
-  bool stop_wait{false};
 };
 
 } // namespace exec
diff --git a/runtime/onert/core/include/exec/Executors.h b/runtime/onert/core/include/exec/Executors.h
deleted file mode 100644 (file)
index 5adb0ed..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_EXECUTORS_H__
-#define __ONERT_EXEC_EXECUTORS_H__
-
-#include "IExecutor.h"
-#include "ir/NNPkg.h"
-
-namespace onert
-{
-namespace exec
-{
-
-/**
- * @brief Class to gather executors
- */
-class Executors
-{
-public:
-  Executors(void) = default;
-  Executors(std::unique_ptr<ir::ModelEdges> model_edges) { _model_edges = std::move(model_edges); }
-  Executors(const Executors &) = delete;
-  Executors(Executors &&) = default;
-
-  // TODO Use Executor index
-  void emplace(ir::SubgraphIndex idx, std::unique_ptr<IExecutor> exec)
-  {
-    _executors.emplace(idx, std::move(exec));
-  }
-
-  std::unique_ptr<IExecutor> &at(ir::SubgraphIndex idx) { return _executors.at(idx); }
-
-  uint32_t inputSize() const;
-
-  uint32_t outputSize() const;
-
-  const ir::OperandInfo inputInfo(const ir::IOIndex &index);
-
-  const ir::OperandInfo outputInfo(const ir::IOIndex &index);
-
-  void execute(const IODescription &desc);
-
-private:
-  void executeEntries(const IODescription &desc);
-
-private:
-  // TODO Use Executor index
-  //      Changing index will effect if/while compile and kernel implementation
-  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
-  // NOTE _model_edges may use different struct type for executor implementation
-  std::unique_ptr<ir::ModelEdges> _model_edges;
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_EXECUTORS_H__
index 7ff6d8b..a7020d4 100644 (file)
@@ -66,7 +66,7 @@ public:
 
   template <typename T, typename... Args> void wrap(Args &&... args)
   {
-    for (auto &function : _functions)
+    for (auto &&function : _functions)
     {
       function = std::make_unique<T>(std::move(function), args...);
     }
index bb5b5af..46dbcd0 100644 (file)
@@ -46,7 +46,6 @@ namespace onert
 {
 namespace exec
 {
-class IExecutionObserver;
 /**
  * @brief Struct to define interface of Executor
  */
@@ -66,14 +65,7 @@ struct IExecutor
    *
    * @return Graph object
    */
-  virtual const ir::Graph &graph() = 0;
-
-  /**
-   * @brief Returns parent graph object
-   *
-   * @return Graph object
-   */
-  virtual const ir::Graph &parent_graph() = 0;
+  virtual const ir::Graph &graph() const = 0;
 
   /**
    * @brief     Set an ordering on operations
@@ -100,6 +92,13 @@ struct IExecutor
                        const std::vector<backend::IPortableTensor *> &outputs) = 0;
 
   /**
+   * @brief Get input tensor objects
+   *
+   * @return Vector of @c IOTensor
+   */
+  virtual const std::vector<backend::builtin::IOTensor *> &getInputTensors() const = 0;
+
+  /**
    * @brief Get output tensor objects
    *
    * @return Vector of @c IOTensor
diff --git a/runtime/onert/core/include/exec/IExecutors.h b/runtime/onert/core/include/exec/IExecutors.h
new file mode 100644 (file)
index 0000000..013da71
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_I_EXECUTORS_H__
+#define __ONERT_EXEC_I_EXECUTORS_H__
+
+#include "IExecutor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather NN package's executor set
+ */
+class IExecutors
+{
+public:
+  /**
+   * @brief Virtual IExecutors destructor
+   * @note  Require derived class destructor
+   */
+  virtual ~IExecutors() = default;
+
+public:
+  /**
+   * @brief     Insert executor in executor set
+   * @param[in] model_index Model index
+   * @param[in] subg_index  Subgraph index
+   * @param[in] exec        Executor to insert
+   *
+   * @todo      Use Executor index
+   */
+  virtual void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                       std::unique_ptr<IExecutor> exec) = 0;
+
+  /**
+   * @brief     Return executor of index
+   * @param[in] model_index Model index
+   * @param[in] subg_index  Subgraph index
+   * @return    Executor
+   */
+  virtual IExecutor *at(const ir::ModelIndex &model_index,
+                        const ir::SubgraphIndex &subg_index) const = 0;
+
+  IExecutor *entryExecutor() const { return at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); }
+
+  /**
+   * @brief   Return executor set's number of input
+   * @return  Number of input
+   */
+  virtual uint32_t inputSize() const = 0;
+
+  /**
+   * @brief   Return executor set's number of output
+   * @return  Number of output
+   */
+  virtual uint32_t outputSize() const = 0;
+
+  /**
+   * @brief     Return NN package input tensor info
+   * @param[in] index Input index
+   * @return    Tensor info
+   */
+  virtual const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const = 0;
+
+  /**
+   * @brief     Return NN package output tensor info
+   * @param[in] index Output index
+   * @return    Tensor info
+   */
+  virtual const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const = 0;
+
+  /**
+   * @brief     Execute NN package executor set
+   * @param[in] desc  Input and output buffer description
+   */
+  virtual void execute(const IODescription &desc) = 0;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_I_EXECUTORS_H__
index 286caf7..1783cdc 100644 (file)
@@ -89,15 +89,6 @@ public:
   void verify(void);
   void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
   void setLayout(Layout layout) { _layout = layout; }
-  void setPartialModel(const std::shared_ptr<Model> &partial_model)
-  {
-    _partialgraphs = partial_model;
-  }
-  void
-  setTensorName(std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names)
-  {
-    _tensor_names = tensor_names;
-  }
 
 private:
   bool checkOperandsForOperation(const Operation &operation);
@@ -136,29 +127,6 @@ public:
   const Operations &operations() const { return _operations; }
   Operations &operations() { return _operations; }
   Layout layout() const { return _layout; }
-  std::shared_ptr<Model> &partialgraphs() { return _partialgraphs; }
-  std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names()
-  {
-    return _tensor_names;
-  }
-  std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_begin() const
-  {
-    return _name_to_input.begin();
-  }
-  std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_end() const
-  {
-    return _name_to_input.end();
-  }
-  std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_begin() const
-  {
-    return _name_to_output.begin();
-  }
-  std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_end() const
-  {
-    return _name_to_output.end();
-  }
-  void input_sort() { _inputs.sort(); }
-  void output_sort() { _outputs.sort(); }
 
   // Topological sort
 public:
@@ -173,10 +141,6 @@ private:
   std::unordered_map<std::string, IOIndex> _name_to_output;
   // TFLite and circle's default layout is NHWC;
   Layout _layout{Layout::NHWC};
-
-  // model for partial graphs
-  std::shared_ptr<ir::Model> _partialgraphs;
-  std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
 };
 
 } // namespace ir
index f01a4c8..1864c3b 100644 (file)
@@ -36,10 +36,10 @@ struct IOIndexTag;
 using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
 
 struct SubgraphIndexTag;
-using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
+using SubgraphIndex = ::onert::util::Index<uint16_t, SubgraphIndexTag>;
 
 struct ModelIndexTag;
-using ModelIndex = ::onert::util::Index<uint32_t, ModelIndexTag>;
+using ModelIndex = ::onert::util::Index<uint16_t, ModelIndexTag>;
 
 template <typename IndexType>
 std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
index d9f825e..b23745d 100644 (file)
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <vector>
 
+#include "ir/Graph.h"
 #include "ir/Index.h"
 #include "ir/Model.h"
 
@@ -89,7 +90,7 @@ public:
   ~NNPkg() = default;
 
   NNPkg(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; }
-  std::shared_ptr<Model> primary_model() { return _models.at(onert::ir::ModelIndex{0}); }
+  std::shared_ptr<Model> primary_model() const { return _models.at(onert::ir::ModelIndex{0}); }
 
   /**
    * @brief Put model at index
@@ -180,6 +181,91 @@ public:
    */
   const ModelEdges &model_edges() { return _edges; }
 
+  /**
+   * @brief Verify NNPkg
+   *
+   */
+  void verify(void)
+  {
+    // Verify edges information
+    //
+    // Only duplicates of nnpkg output and Edge `from` are possible.
+    // | Whether duplicates are possible   | Edge `to` | Edge `from` |
+    // | nnpkg input  (input of subgraph)  | X (*1)    | X (*2)      |
+    // | nnpkg output (output of subgraph) | X (*2)    | O           |
+    // *1. The subjects who determine values of each buffer are different.
+    //    - nnpkg input : user input
+    //    - Edge `to`   : output of another subgraph
+    // *2. `IOIndex` of inputs and outputs of subgraph is distinct.
+    //
+    for (const auto &edge : _edges.edges)
+    {
+      if (std::find(_edges.pkg_inputs.begin(), _edges.pkg_inputs.end(), edge.to) !=
+          _edges.pkg_inputs.end())
+      {
+        throw std::runtime_error{
+          "Invalid edge information. NNPkg inputs and Edge `to` cannot be duplicated"};
+      }
+    }
+  }
+
+  // TODO Find better way to handle single model NNPackage and multi model NNPackage on inputSize(),
+  //      outputSize(), inputInfo(), outputInfo()
+
+  /**
+   * @brief   Get model input size
+   */
+  uint32_t inputSize() const
+  {
+    return _models.size() == 1 ? primary_model()->primary_subgraph()->getInputs().size()
+                               : _edges.pkg_inputs.size();
+  }
+
+  /**
+   * @brief   Get model output size
+   */
+  uint32_t outputSize() const
+  {
+    return _models.size() == 1 ? primary_model()->primary_subgraph()->getOutputs().size()
+                               : _edges.pkg_outputs.size();
+  }
+
+  /**
+   * @brief   Get model input info
+   */
+  OperandInfo &inputInfo(uint32_t index) const
+  {
+    if (_models.size() == 1)
+    {
+      auto const graph = primary_model()->primary_subgraph();
+      auto const operand_index = graph->getInputs().at(index);
+      return graph->operands().at(operand_index).info();
+    }
+
+    auto const &desc = input(index);
+    auto const graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+    auto const operand_index = graph->getInputs().at(std::get<IOIndex>(desc).value());
+    return graph->operands().at(operand_index).info();
+  }
+
+  /**
+   * @brief   Get model output info
+   */
+  OperandInfo &outputInfo(uint32_t index) const
+  {
+    if (_models.size() == 1)
+    {
+      auto const graph = primary_model()->primary_subgraph();
+      auto const operand_index = graph->getOutputs().at(index);
+      return graph->operands().at(operand_index).info();
+    }
+
+    auto const &desc = output(index);
+    auto const graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+    auto const operand_index = graph->getOutputs().at(std::get<IOIndex>(desc).value());
+    return graph->operands().at(operand_index).info();
+  }
+
   // TODO: Add iterate() or getter for edges
 
 private:
@@ -190,4 +276,18 @@ private:
 } // namespace ir
 } // namespace onert
 
+namespace std
+{
+
+template <> struct hash<onert::ir::IODesc>
+{
+  size_t operator()(const ::onert::ir::IODesc &iodesc) const noexcept
+  {
+    return (std::get<0>(iodesc).value() << 24) | (std::get<1>(iodesc).value() << 16) |
+           std::get<2>(iodesc).value();
+  }
+};
+
+} // namespace std
+
 #endif // __ONERT_IR_NNPKG_H__
index dd39074..846c3f9 100644 (file)
@@ -19,7 +19,6 @@
 
 #include <initializer_list>
 #include <vector>
-#include <algorithm>
 
 #include "ir/Index.h"
 
@@ -46,12 +45,6 @@ public:
   void append(const OperandIndex &index) { _vec.emplace_back(index); }
   void append(const OperandIndexSequence &l) { _vec.insert(_vec.end(), l.begin(), l.end()); }
 
-  void sort()
-  {
-    std::sort(_vec.begin(), _vec.end(),
-              [](const auto &lhs, const auto &rhs) { return lhs.value() < rhs.value(); });
-  }
-
 public:
   uint32_t size() const { return static_cast<uint32_t>(_vec.size()); }
   const OperandIndex &at(IOIndex set_index) const { return _vec.at(set_index.value()); }
index ec6dd07..cf84e26 100644 (file)
@@ -70,8 +70,8 @@ struct FeatureShape
 struct Shape
 {
 public:
-  static int32_t const UNSPECIFIED_DIM;
-  static int32_t const MAX_RANK;
+  static int32_t const kUnspecifiedDim;
+  static int32_t const kMaxRank;
 
   Shape() = default;
 
@@ -126,7 +126,7 @@ public:
    */
   bool hasUnspecifiedDims() const
   {
-    return (std::find(_dimensions.begin(), _dimensions.end(), UNSPECIFIED_DIM) !=
+    return (std::find(_dimensions.begin(), _dimensions.end(), kUnspecifiedDim) !=
             _dimensions.end());
   }
 
index 4bbc02a..b9bad1b 100644 (file)
@@ -23,7 +23,6 @@ CONFIG(GRAPH_DOT_DUMP          , int          , "0")
 CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq
 CONFIG(OP_BACKEND_ALLOPS       , std::string  , "")
 CONFIG(OP_BACKEND_MAP          , std::string  , "")
-CONFIG(DISABLE_COMPILE         , bool         , "0")
 CONFIG(ONERT_LOG_ENABLE        , bool         , "0")
 CONFIG(CPU_MEMORY_PLANNER      , std::string  , "WIC")
 CONFIG(EXECUTOR                , std::string  , "Linear")
index d3f3dcb..49c5f4c 100644 (file)
@@ -138,6 +138,13 @@ public:
    */
   T value() const { return _index; }
 
+  /**
+   * @brief Return max index value
+   *
+   * @return Maximum valid index value
+   */
+  static T max() { return UNDEFINED - 1; }
+
 private:
   T _index;
 };
index 36b6c85..077a4c2 100644 (file)
@@ -202,12 +202,12 @@ public:
     // This implementation is a workaround in case of adding operands while iteration
     std::list<Index> l;
 
-    for (auto &e : _objects)
+    for (const auto &e : _objects)
     {
       l.push_back(e.first);
     }
 
-    for (auto &index : l)
+    for (const auto &index : l)
     {
       fn(index, *_objects[index]);
     }
index 8a4eea3..505f5a9 100644 (file)
@@ -29,9 +29,9 @@
 
 template <size_t from, size_t to, typename Enable = void> struct ForEachDimension
 {
-  template <typename L, typename... Args>
+  template <typename L>
   static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
-                     L &&lambda_function, Args &&... args)
+                     L lambda_function)
   {
     static_assert(from < to, "from must not be less than to");
     assert(static_cast<int>(to) <= shape.rank());
@@ -40,8 +40,7 @@ template <size_t from, size_t to, typename Enable = void> struct ForEachDimensio
     for (auto v = 0; v < d; v++)
     {
       coords.set(from, v);
-      ForEachDimension<from + 1, to>::unroll(shape, coords, std::forward<L>(lambda_function),
-                                             std::forward<Args>(args)...);
+      ForEachDimension<from + 1, to>::unroll(shape, coords, lambda_function);
     }
   }
 };
@@ -49,18 +48,17 @@ template <size_t from, size_t to, typename Enable = void> struct ForEachDimensio
 template <size_t from, size_t to>
 struct ForEachDimension<from, to, typename std::enable_if<from == to>::type>
 {
-  template <typename L, typename... Args>
+  template <typename L>
   static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
-                     L &&lambda_function, Args &&... args)
+                     L lambda_function)
   {
     UNUSED_RELEASE(shape);
     assert(static_cast<int>(to) <= shape.rank());
-    lambda_function(coords, std::forward<Args>(args)...);
+    lambda_function(coords);
   }
 };
 
-template <typename L, typename... Args>
-inline void ShapeLoop(const onert::ir::Shape &shape, L &&lambda_function, Args &&... args)
+template <typename L> inline void ShapeLoop(const onert::ir::Shape &shape, L lambda_function)
 {
   assert(shape.rank() > 0);
   for (auto i = 0; i < shape.rank(); ++i)
@@ -73,32 +71,25 @@ inline void ShapeLoop(const onert::ir::Shape &shape, L &&lambda_function, Args &
   {
     case 0:
       coords.set(0, 0);
-      ForEachDimension<0, 0>::unroll(shape, coords, std::forward<L>(lambda_function),
-                                     std::forward<Args>(args)...);
+      ForEachDimension<0, 0>::unroll(shape, coords, lambda_function);
       break;
     case 1:
-      ForEachDimension<0, 1>::unroll(shape, coords, std::forward<L>(lambda_function),
-                                     std::forward<Args>(args)...);
+      ForEachDimension<0, 1>::unroll(shape, coords, lambda_function);
       break;
     case 2:
-      ForEachDimension<0, 2>::unroll(shape, coords, std::forward<L>(lambda_function),
-                                     std::forward<Args>(args)...);
+      ForEachDimension<0, 2>::unroll(shape, coords, lambda_function);
       break;
     case 3:
-      ForEachDimension<0, 3>::unroll(shape, coords, std::forward<L>(lambda_function),
-                                     std::forward<Args>(args)...);
+      ForEachDimension<0, 3>::unroll(shape, coords, lambda_function);
       break;
     case 4:
-      ForEachDimension<0, 4>::unroll(shape, coords, std::forward<L>(lambda_function),
-                                     std::forward<Args>(args)...);
+      ForEachDimension<0, 4>::unroll(shape, coords, lambda_function);
       break;
     case 5:
-      ForEachDimension<0, 5>::unroll(shape, coords, std::forward<L>(lambda_function),
-                                     std::forward<Args>(args)...);
+      ForEachDimension<0, 5>::unroll(shape, coords, lambda_function);
       break;
     case 6:
-      ForEachDimension<0, 6>::unroll(shape, coords, std::forward<L>(lambda_function),
-                                     std::forward<Args>(args)...);
+      ForEachDimension<0, 6>::unroll(shape, coords, lambda_function);
       break;
     default:
       assert(false && "ShapeLoop, 1 <= Shape'rank <= 6");
index c468ee4..05fd9cc 100644 (file)
@@ -94,7 +94,7 @@ void DynamicMemoryManager::deallocate(const ITensor *tensor)
 
 void DynamicMemoryManager::deallocate(void)
 {
-  for (auto &mem_alloc : _mem_alloc_map)
+  for (auto &&mem_alloc : _mem_alloc_map)
   {
     // Release memory buffer of mem_alloc
     mem_alloc.second->release();
index 1fda57b..1c04804 100644 (file)
@@ -58,7 +58,7 @@ void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
 {
   // Find the right position for claiming
   uint32_t next_offset = 0;
-  for (auto &mem_claim : _claim_table)
+  for (const auto &mem_claim : _claim_table)
   {
     auto claimed_base_offset = mem_claim.first;
     auto claimed_size = _mem_plans[mem_claim.second].size;
index d891814..b03eb60 100644 (file)
@@ -39,7 +39,7 @@ void StaticTensorManager::allocateNonconsts(void)
 {
   _nonconst_mgr->allocate();
 
-  for (auto &pair : _tensors->native_tensors())
+  for (auto &&pair : _tensors->native_tensors())
   {
     const auto &ind = pair.first;
     auto tensor = pair.second.get();
index 8a6cddc..c1a2ed5 100644 (file)
@@ -44,7 +44,7 @@ FunctionMap BackendContext::genKernels()
   const_cast<ir::Graph *>(graph())->operands().iterate(
     [&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
 
-  for (auto &it : ret)
+  for (auto &&it : ret)
   {
     auto &fn_seq = it.second;
     fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
index a1b2064..d94ed0b 100644 (file)
@@ -47,7 +47,7 @@ public:
 public:
   void setTensor(IPortableTensor *tensor);
   void setUserTensor(uint8_t *buffer, size_t size);
-  ir::OperandInfo orig_info() const { return _orig_info; }
+  const ir::OperandInfo &orig_info() const { return _orig_info; }
   ir::Layout orig_layout() const { return _orig_layout; }
 
 public:
index fa2fc0b..4533703 100644 (file)
@@ -33,8 +33,8 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *d
                                  const std::shared_ptr<TensorRegistry> &tensor_reg,
                                  const std::shared_ptr<ExternalContext> &external_context)
   : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
-    _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _external_context{
-                                                                          external_context}
+    _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _model_index{},
+    _external_context{external_context}
 {
   UNUSED_RELEASE(_graph);
   UNUSED_RELEASE(_tensor_registries);
@@ -90,7 +90,7 @@ void KernelGenerator::visit(const ir::operation::If &node)
   input_tensors.erase(input_tensors.begin());
   auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
     cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors,
-    _external_context);
+    _model_index, _external_context);
 
   _return_fn = std::move(fn);
 }
@@ -133,7 +133,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
   // WhileLayer just set Executors instead of cond and body executor to avoid complexity of
   // creating executor recusively
   auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
-    input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors,
+    input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors, _model_index,
     _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
 
   _return_fn = std::move(fn);
index d5931ca..3c86fe3 100644 (file)
@@ -23,7 +23,7 @@
 #include "../../compiler/TensorRegistries.h"
 
 #include "backend/basic/KernelGeneratorBase.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
 #include "ir/Graph.h"
 
 namespace onert
@@ -44,12 +44,14 @@ public:
   {
     _tensor_registries = tensor_registries;
   }
-  void setExecutors(const std::shared_ptr<exec::Executors> &executors)
+  void setExecutors(const std::shared_ptr<exec::IExecutors> &executors)
   {
     // FIXME Using shared_ptr's raw pointer!
     _executors = executors.get();
   }
 
+  void setModelIndex(const ir::ModelIndex &index) { _model_index = index; }
+
   std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
 
 private:
@@ -65,7 +67,8 @@ private:
   DynamicTensorManager *_dyn_tensor_manager;
   std::shared_ptr<TensorRegistry> _tensor_reg;
   compiler::TensorRegistries _tensor_registries;
-  exec::Executors *_executors;
+  exec::IExecutors *_executors;
+  ir::ModelIndex _model_index;
   const std::shared_ptr<ExternalContext> _external_context;
 };
 
index cdb4196..51bc5a8 100644 (file)
@@ -29,11 +29,11 @@ IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
                  const std::vector<backend::IPortableTensor *> input_tensors,
                  const std::vector<backend::IPortableTensor *> output_tensors,
                  const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
-                 exec::Executors *executors,
+                 exec::IExecutors *executors, const ir::ModelIndex &model_index,
                  const std::shared_ptr<ExternalContext> &external_context)
   : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
     _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors},
-    _external_context{external_context}
+    _model_index{model_index}, _external_context{external_context}
 {
   // At this point, executors may not have executors of then subg and else subg
 }
@@ -61,12 +61,12 @@ void IfLayer::run()
   if (cond_result)
   {
     VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
-    subg_exec = _executors->at(_then_subg_index).get();
+    subg_exec = _executors->at(_model_index, _then_subg_index);
   }
   else
   {
     VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
-    subg_exec = _executors->at(_else_subg_index).get();
+    subg_exec = _executors->at(_model_index, _else_subg_index);
   }
 
   subg_exec->execute(_input_tensors, _output_tensors);
index fa5537a..8f639ce 100644 (file)
@@ -18,7 +18,7 @@
 #define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
 
 #include <backend/IPortableTensor.h>
-#include <exec/Executors.h>
+#include <exec/IExecutors.h>
 #include "../ExternalContext.h"
 
 namespace onert
@@ -37,7 +37,8 @@ public:
           const std::vector<backend::IPortableTensor *> input_tensors,
           const std::vector<backend::IPortableTensor *> output_tensors,
           const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
-          exec::Executors *executors, const std::shared_ptr<ExternalContext> &external_context);
+          exec::IExecutors *executors, const ir::ModelIndex &model_index,
+          const std::shared_ptr<ExternalContext> &external_context);
 
 public:
   void run() override;
@@ -48,7 +49,8 @@ private:
   const std::vector<backend::IPortableTensor *> _output_tensors;
   const ir::SubgraphIndex _then_subg_index;
   const ir::SubgraphIndex _else_subg_index;
-  exec::Executors *_executors;
+  exec::IExecutors *_executors;
+  ir::ModelIndex _model_index;
   const std::shared_ptr<ExternalContext> _external_context;
 };
 
index ddaecdf..6001800 100644 (file)
@@ -64,7 +64,7 @@ void PermuteLayer::optimize()
       src_offsets_it->resize(0);
       dst_offsets_it->resize(0);
       if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
-        throw std::runtime_error("data type does not match");
+        continue;
       const auto permute_type = [&]() -> PermuteType {
         if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NHWC &&
             dst->layout() == ir::Layout::NCHW)
@@ -81,6 +81,8 @@ void PermuteLayer::optimize()
           return PermuteType::COPY;
         }
       }();
+
+      // TODO Support different types
       auto fn = [&](backend::ITensor &src_tensor) {
         dst->access([&](backend::ITensor &dst_tensor) {
           // NOTE The buffer of both tensor can be nullptr in this step
@@ -260,8 +262,10 @@ void PermuteLayer::run()
         // 1. The tasks for multithreathing was created
         // 2. The tasks's size > 1
         // 3. Both tensors are not dynamic
+        // 4. Data types of both tensors are different
         if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
-            src->is_dynamic() || dst->is_dynamic())
+            src->is_dynamic() || dst->is_dynamic() ||
+            underlying_type(src->data_type()) != underlying_type(dst->data_type()))
         {
           permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets);
         }
index 8e006c5..c0ca404 100644 (file)
@@ -35,12 +35,14 @@ namespace kernel
 WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
                        const std::vector<backend::IPortableTensor *> output_tensors,
                        const ir::SubgraphIndex &cond_subg_index,
-                       const ir::SubgraphIndex &body_subg_index, exec::Executors *executors,
+                       const ir::SubgraphIndex &body_subg_index, exec::IExecutors *executors,
+                       const ir::ModelIndex &model_index,
                        basic::DynamicMemoryManager *dyn_memory_manager,
                        const std::shared_ptr<ExternalContext> &external_context)
   : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
     _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors},
-    _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
+    _model_index{model_index}, _dyn_memory_manager{dyn_memory_manager}, _external_context{
+                                                                          external_context}
 {
   // At this point, executors may not have executors of cond subg and body subg
 }
@@ -57,8 +59,8 @@ void WhileLayer::run()
   // // Run cond subg
   // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
   // "_dst_tensors"
-  auto cond_exec = _executors->at(_cond_subg_index).get();
-  auto body_exec = _executors->at(_body_subg_index).get();
+  auto cond_exec = _executors->at(_model_index, _cond_subg_index);
+  auto body_exec = _executors->at(_model_index, _body_subg_index);
 
   // Need a temp tensor to hold the cond subgraph output
   assert(cond_exec->getOutputTensors().size() == 1);
index 8551b3d..40ca4fe 100644 (file)
@@ -18,7 +18,7 @@
 #define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
 
 #include <backend/IPortableTensor.h>
-#include <exec/Executors.h>
+#include <exec/IExecutors.h>
 #include <exec/IFunction.h>
 #include <ir/OperandIndexSequence.h>
 #include <ir/Graph.h>
@@ -41,7 +41,8 @@ public:
   WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
              const std::vector<backend::IPortableTensor *> output_tensors,
              const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
-             exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager,
+             exec::IExecutors *executors, const ir::ModelIndex &model_index,
+             basic::DynamicMemoryManager *dyn_memory_manager,
              const std::shared_ptr<ExternalContext> &external_context);
 
 public:
@@ -52,7 +53,8 @@ private:
   const ir::SubgraphIndex _body_subg_index;
   const std::vector<backend::IPortableTensor *> _input_tensors;
   const std::vector<backend::IPortableTensor *> _output_tensors;
-  exec::Executors *_executors;
+  exec::IExecutors *_executors;
+  const ir::ModelIndex _model_index;
   basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
   const std::shared_ptr<ExternalContext> _external_context;
 };
index 7be9c1e..4512455 100644 (file)
 #include "pass/OddOutputPass.h"
 #include "pass/PassRunner.h"
 #include "pass/UnusedOperandEliminationPass.h"
-#include "../backend/builtin/Config.h"
 #include "../dumper/dot/DotDumper.h"
-#include "../interp/InterpExecutor.h"
-#include "../ir/OperationCloner.h"
+#include "../exec/SingleModelExecutors.h"
 #include "../ir/OperationDumper.h"
 #include "../ir/verifier/Verifier.h"
 
 #include "compiler/StaticShapeInferer.h"
-#include "util/ConfigSource.h"
-#include "util/logging.h"
 
-#include <misc/polymorphic_downcast.h>
 #include <misc/string_helpers.h>
-#include <json/json.h>
-
-// TODO Remove using fstream header
-#include <fstream>
-
-namespace
-{
-
-using namespace onert;
-
-std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
-{
-  std::unordered_map<ir::OpCode, std::string>::iterator it;
-  std::string opbackends;
-
-  for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
-  {
-    if (!opbackends.empty())
-      opbackends = opbackends + ", ";
-
-    auto opcode = it->first;
-    const std::string opname = ir::toString(opcode);
-    opbackends += opname + "=" + it->second;
-  }
-  return opbackends;
-}
-
-void verboseOptions(compiler::CompilerOptions &options)
-{
-  VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
-  VERBOSE(Compiler) << "backend_list             : "
-                    << nnfw::misc::join(options.backend_list.begin(), options.backend_list.end(),
-                                        "/")
-                    << std::endl;
-  VERBOSE(Compiler) << "trace_filepath           : " << options.trace_filepath << std::endl;
-  VERBOSE(Compiler) << "graph_dump_level         : " << options.graph_dump_level << std::endl;
-  VERBOSE(Compiler) << "executor                 : " << options.executor << std::endl;
-  VERBOSE(Compiler) << "manual backend_for_all   : "
-                    << options.manual_scheduler_options.backend_for_all << std::endl;
-  VERBOSE(Compiler) << "manual_scheduler_options : "
-                    << getOpBackends(options.manual_scheduler_options.opcode_to_backend)
-                    << std::endl;
-  VERBOSE(Compiler) << "he_scheduler             : " << options.he_scheduler << std::endl;
-  VERBOSE(Compiler) << "he_profiling_mode        : " << options.he_profiling_mode << std::endl;
-  VERBOSE(Compiler) << "disable_compile          : " << options.disable_compile << std::endl;
-  VERBOSE(Compiler) << "fp16_enable              : " << options.fp16_enable << std::endl
-                    << std::noboolalpha;
-}
-
-std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>>
-createStaticShapeInferers(
-  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
-    &lowered_subgs)
-{
-  // Allocate StaticShapeInferer per each subgraph
-  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>> inferers;
-  for (auto &pair : lowered_subgs)
-  {
-    const auto &subg_index = pair.first;
-    auto &lowered_subg = pair.second;
-    inferers[subg_index] = std::make_unique<compiler::StaticShapeInferer>(lowered_subg.get());
-  }
-
-  // Append observers in all StaticShapeInferers
-  for (auto &pair : lowered_subgs)
-  {
-    const auto &subg_index = pair.first;
-    auto &lowered_subg = pair.second;
-
-    // TODO: Change this iteration for all to controlflow iteration
-    lowered_subg->graph().operations().iterate([&](const ir::OperationIndex &,
-                                                   const ir::Operation &op) {
-      // A Function to append child inferers. These make it possible for a StaticShapeInferer to
-      // call StaticShapeInferes of child subgraphs recursively
-      auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
-        auto *child_inferer = inferers.at(child_subg_idx).get();
-        inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
-      };
-
-      // A Function to appaend subg input observers. This makes it possible for a StaticShapeInferer
-      // to update inputs of child subgraphs
-      auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
-        std::vector<ir::Operand *> child_subg_inputs;
-        auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
-        for (const auto &input_idx : child_subg.getInputs())
-        {
-          auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
-          child_subg_inputs.emplace_back(operand_ptr);
-        }
-        inferers.at(subg_index)
-          ->appendSubgInputObserver(child_subg_idx,
-                                    std::make_unique<compiler::OperandObserver>(child_subg_inputs));
-      };
-
-      // A Function to set controlflow output observers. This makes it possible for a
-      // StaticShapeInferer to update outputs of parent controlflow opeerations
-      auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
-        std::vector<ir::Operand *> cf_outputs;
-        auto &subg = lowered_subg->graph();
-        for (const auto &output_idx : op.getOutputs())
-        {
-          auto operand_ptr = subg.operands().getRawPtr(output_idx);
-          cf_outputs.emplace_back(operand_ptr);
-        }
-        inferers.at(child_subg_idx)
-          ->setControlflowOutputObserver(std::make_unique<compiler::OperandObserver>(cf_outputs));
-      };
-
-      // Append Observers in a StaticShapeInferer
-      if (op.opcode() == ir::OpCode::If)
-      {
-        const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
-
-        appendChildInferer(if_op.param().then_subg_index);
-        appendChildInferer(if_op.param().else_subg_index);
-
-        appendSubgraphInputObserver(if_op.param().then_subg_index);
-        appendSubgraphInputObserver(if_op.param().else_subg_index);
-
-        setControlFlowOutputObserver(if_op.param().then_subg_index);
-      }
-      else if (op.opcode() == ir::OpCode::While)
-      {
-        const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
-
-        appendChildInferer(while_op.param().cond_subg_index);
-        appendChildInferer(while_op.param().body_subg_index);
-
-        appendSubgraphInputObserver(while_op.param().cond_subg_index);
-        appendSubgraphInputObserver(while_op.param().body_subg_index);
-
-        setControlFlowOutputObserver(while_op.param().body_subg_index);
-      }
-    });
-  }
-
-  return inferers;
-}
-
-} // namespace
 
 namespace onert
 {
-
 namespace compiler
 {
-void ManualSchedulerOptions::setBackendMap(const std::string &str)
-{
-  // TODO Support multiple subgraphs for manual scheduling
-  auto key_val_list = nnfw::misc::split(str, ';');
-  for (const auto &key_val_str : key_val_list)
-  {
-    if (key_val_str.empty())
-    {
-      continue;
-    }
-
-    auto key_val = nnfw::misc::split(key_val_str, '=');
-    const auto &key_str = key_val.at(0);
-    const auto &val = key_val.at(1);
-    auto key = static_cast<uint32_t>(std::stoi(key_str));
-    this->index_to_backend.emplace(ir::OperationIndex{key}, val);
-  }
-}
-
-std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
-{
-  auto o = std::make_unique<CompilerOptions>();
-  o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
-  o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
-  o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
-  o->executor = util::getConfigString(util::config::EXECUTOR);
-  o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
-  o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
-  o->disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
-  o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
-  {
-    // Backend for all
-    auto &ms_options = o->manual_scheduler_options;
-
-    // Default value for op_backend_all is first element in the backend list
-    ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
-
-// Opcode to Backend
-#define OP(OpName)                                                                      \
-  {                                                                                     \
-    const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
-    if (!backend_str.empty())                                                           \
-    {                                                                                   \
-      ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str;                   \
-    }                                                                                   \
-  }
-#include "ir/Operations.lst"
-#undef OP
-
-    // Index to Backend
-    auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
-    ms_options.setBackendMap(map_str);
-  }
-  return o;
-}
 
 Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt)
-  : _nnpkg{std::make_shared<ir::NNPkg>(model)}, _state{State::CREATED}, _voptions{&copt}
+  : _model{model}, _options{&copt}
 {
   // DO NOTHING
 }
 
 Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
                    std::vector<std::unique_ptr<CompilerOptions>> &copts)
-  : _nnpkg{nnpkg}, _state{State::CREATED}, _voptions{}
+  : _model{nnpkg->primary_model()}, _options{copts[0].get()}
 {
-  for (uint32_t i = 0; i < copts.size(); i++)
-  {
-    _voptions.push_back(copts[i].get());
-  }
-}
-
-void Compiler::enableToFp16()
-{
-  for (auto options : _voptions)
-    options->fp16_enable = true;
-}
-
-void Compiler::checkProfilerConditions()
-{
-  if (_nnpkg->model_count() != 1)
-    throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
-
-  auto &options = *_voptions[0];
-
-  if (options.he_scheduler)
-    throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
-
-  if (options.executor != "Dataflow")
-    throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
-}
-
-bool Compiler::buildPartialGraph(uint32_t num_graphs)
-{
-  // Use 1st model and options only on partial graph (pipeline) compile
-  assert(_nnpkg->model_count() == 1);
-  assert(_voptions.size() == 1);
-
-  auto model = _nnpkg->primary_model();
-  auto &options = *_voptions[0];
-
-  if (model->subgraphs_count() > 1)
-    return false;
-
-  auto partialgraphs = std::make_shared<ir::Model>();
-
-  for (uint32_t idx = 0; idx < num_graphs; idx++)
-  {
-    auto partialgraph = std::make_unique<ir::Graph>();
-    partialgraphs->push(ir::SubgraphIndex{idx}, std::move(partialgraph));
-  }
-  model->primary_subgraph()->setPartialModel(partialgraphs);
-
-  auto partial_graph = primary_subgraph()->partialgraphs();
-
-  primary_subgraph()->operands().iterate(
-    [&](const ir::OperandIndex &operand_index, const ir::Operand &operand) {
-      auto use_operations = operand.getUses();
-
-      for (auto use_operation : use_operations)
-      {
-        auto graph_index = options.partial_graph_options.index_to_graph.find(use_operation);
-        if (graph_index == options.partial_graph_options.index_to_graph.end())
-        {
-          throw std::runtime_error("Invalid Partition Map");
-        }
-        auto partition = partial_graph->at(graph_index->second);
-
-        if (partition->operands().exist(operand_index))
-        {
-          continue;
-        }
-
-        auto new_operand = std::make_unique<ir::Operand>(operand);
-        new_operand->clearDefUse();
-        auto new_operand_ind = partition->addOperand(operand_index, std::move(new_operand));
-        UNUSED_RELEASE(new_operand_ind);
-        assert(new_operand_ind == operand_index);
-      }
-    });
-
-  primary_subgraph()->operations().iterate(
-    [&](const ir::OperationIndex &operation_index, const ir::Operation &operation) {
-      auto graph_index = options.partial_graph_options.index_to_graph.find(operation_index);
-      if (graph_index == options.partial_graph_options.index_to_graph.end())
-      {
-        throw std::runtime_error("Invalid Partition Map");
-      }
-      auto partition = partial_graph->at(graph_index->second);
-
-      auto operand_io = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED |
-                        ir::Remove::UNDEFINED;
-      for (auto operand_index : operand_io)
-      {
-        if (partition->operands().exist(operand_index))
-          continue;
-
-        const auto &operand = primary_subgraph()->operands().at(operand_index);
-
-        auto new_operand = std::make_unique<ir::Operand>(operand);
-        new_operand->clearDefUse();
-
-        auto new_operand_index = partition->addOperand(operand_index, std::move(new_operand));
-        UNUSED_RELEASE(new_operand_index);
-        assert(new_operand_index == operand_index);
-      }
-
-      auto new_operation_index = partition->addOperation(operation_index, clone(operation));
-      UNUSED_RELEASE(new_operation_index);
-      assert(new_operation_index == operation_index);
-    });
-
-  for (uint32_t idx = 0; idx < partial_graph->subgraphs_count(); idx++)
-  {
-    auto partition = partial_graph->at(ir::SubgraphIndex{idx});
-
-    partition->operands().iterate([&](const ir::OperandIndex &operand_index,
-                                      const ir::Operand &operand) {
-      if (primary_subgraph()->getInputs().contains(operand_index) ||
-          (!operand.getDef().valid() && !operand.isConstant()))
-      {
-        partition->addInput(operand_index, primary_subgraph()->tensor_names()->at(operand_index));
-      }
-      if (primary_subgraph()->getOutputs().contains(operand_index) || operand.getUses().size() == 0)
-      {
-        partition->addOutput(operand_index, primary_subgraph()->tensor_names()->at(operand_index));
-      }
-
-      if (primary_subgraph()->operands().at(operand_index).getUses().size() > 1 &&
-          !primary_subgraph()->operands().at(operand_index).isConstant() &&
-          !partition->getInputs().contains(operand_index))
-      {
-        auto use_operations = primary_subgraph()->operands().at(operand_index).getUses();
-        auto iter = use_operations.begin();
-        ir::SubgraphIndex graph_index =
-          options.partial_graph_options.index_to_graph.find(*iter++)->second;
-        while (iter != use_operations.end())
-        {
-          if (graph_index != options.partial_graph_options.index_to_graph.find(*iter)->second &&
-              !partition->getOutputs().contains(operand_index))
-          {
-            partition->addOutput(operand_index,
-                                 primary_subgraph()->tensor_names()->at(operand_index));
-          }
-          iter++;
-        }
-      }
-    });
-
-    partition->verify();
-
-    bool same = true;
-    if (partition->getInputs().size() == primary_subgraph()->getInputs().size())
-    {
-      for (auto iter = partition->getInputs().begin(); iter != partition->getInputs().end(); ++iter)
-      {
-        if (!primary_subgraph()->getInputs().contains(*iter))
-        {
-          same = false;
-          break;
-        }
-      }
-      if (same == true)
-      {
-        partition->getInputs() = primary_subgraph()->getInputs();
-      }
-      else
-      {
-        partition->input_sort();
-      }
-    }
-
-    same = true;
-    if (partition->getOutputs().size() == primary_subgraph()->getOutputs().size())
-    {
-      for (auto iter = partition->getOutputs().begin(); iter != partition->getOutputs().end();
-           ++iter)
-      {
-        if (!primary_subgraph()->getOutputs().contains(*iter))
-        {
-          same = false;
-          break;
-        }
-      }
-      if (same == true)
-      {
-        partition->getOutputs() = primary_subgraph()->getOutputs();
-      }
-      else
-      {
-        partition->output_sort();
-      }
-    }
-  }
-  return true;
+  // Use for single model only
+  assert(nnpkg->model_count() == 1);
 }
 
 std::shared_ptr<CompilerArtifact> Compiler::compile(void)
 {
-  for (auto options : _voptions)
-  {
-    // Set control flow backend for control flow operators
-    auto &builtin_id = backend::builtin::Config::ID;
-    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
-    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
-    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
-
-    // FIXME This is a workaround for bcq operations, should remove it
-    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
-    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
-
-    // FIXME This is a workaround for bulk operations, should remove it
-    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
-
-    verboseOptions(*options);
-  }
-
-  // NYI: allow one model compilation
-  auto const model_count = _nnpkg->model_count();
-  if (model_count != _voptions.size())
-    throw std::runtime_error{"Model count and option vector size mismatch"};
-
-  for (uint32_t i = 0; i < model_count; i++)
-  {
-    _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
-      // Mandatory passes
-      pass::PassRunner{}
-        .append(std::make_unique<pass::ConstantOutputPass>(subg))
-        .append(std::make_unique<pass::OddOutputPass>(subg))
-        .run();
-
-      // Optimizations
-      pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
-    });
-  }
-
   /***************************************************
    * Prepare compilation phase
    ***************************************************/
-  // Compilable check
-  // TODO: Support hybrid execution -
-  //       execution between interpreter and compiled executor (including control flow)
-  if (_voptions[0]->disable_compile)
-  {
-    if (model_count > 1)
-      throw std::runtime_error{"NYI: Disable compilation for multi model is not supported yet"};
+  if (!_options)
+    throw std::runtime_error{"Empty compile option"};
 
-    auto executors = std::make_shared<exec::Executors>();
+  // Mode check
+  // TODO handle option for each model
+  if (_options->he_profiling_mode)
+  {
+    if (!_options->he_scheduler)
+      throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
 
-    _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
-      executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
-    });
-    _state = State::COMPILED;
-    return std::make_shared<CompilerArtifact>(executors, nullptr);
+    if (_options->executor != "Dataflow")
+      throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
   }
 
-  // Mode check
-  // TODO handle option for each model
-  if (_voptions[0]->he_profiling_mode)
-    checkProfilerConditions();
+  _options->forceInternalOptions();
+  _options->verboseOptions();
+
+  _model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+    // Mandatory passes
+    pass::PassRunner{}
+      .append(std::make_unique<pass::ConstantOutputPass>(subg))
+      .append(std::make_unique<pass::OddOutputPass>(subg))
+      .run();
+
+    // Optimizations
+    pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+  });
 
   /***************************************************
    * Backend independent analysis & optimization phase
    ***************************************************/
   // TODO Handle dump level for each model
-  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level);
   onert::dumper::dot::DotDumper dot_dumper(dump_level);
 
   // Tracing context
   auto tracing_ctx = std::make_unique<util::TracingCtx>();
 
-  // Model edge context
-  std::unique_ptr<ir::ModelEdges> model_edges = nullptr;
-
   // Lower: Assign backend
   std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
-
-  if (model_count == 1)
   {
-    _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
-      dot_dumper.dump(subg, nnfw::misc::str("before_lower_subg-", index.value()));
+    _model->iterate([&](const ir::SubgraphIndex &subg_index, ir::Graph &subg) {
       // Lower: Assign backend
-      lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, *_voptions[0]);
+      lowered_subgs[subg_index] = std::make_unique<compiler::LoweredGraph>(subg, *_options);
       // Set tracing_ctx for copied graph
-      tracing_ctx->setSubgraphIndex(&(lowered_subgs[index]->graph()), index.value());
+      if (tracing_ctx != nullptr)
+        tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value());
     });
   }
-  else
-  {
-    // TODO Support tracing_ctx for multiple model
-    tracing_ctx = nullptr;
 
-    // Copy model edge context
-    model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
+  _model.reset();
 
-    for (uint32_t i = 0; i < model_count; i++)
-    {
-      auto model = _nnpkg->model(ir::ModelIndex{i});
-      if (model->subgraphs_count() != 1)
-        throw std::runtime_error{"NYI: Lowering subgraphs for multiple model is not supported yet"};
-      auto subg = model->primary_subgraph();
-      dot_dumper.dump(*subg, nnfw::misc::str("before_lower_model-", i));
-
-      // For multimodel, model index is used for lowered graph index in lowered graph map
-      // and index type is SubgraphIndex
-      // TODO Find better way to represent lowered graph index for multimodel's subgraph
-      lowered_subgs[ir::SubgraphIndex{i}] =
-        std::make_unique<compiler::LoweredGraph>(*model->primary_subgraph(), *_voptions[i]);
-    }
-  }
-
-  _nnpkg.reset();
-
-  for (auto &pair : lowered_subgs)
+  for (const auto &pair : lowered_subgs)
   {
     const auto &subg_index = pair.first;
-    auto &lowered_subg = pair.second;
-    dot_dumper.dump(*lowered_subg, "after_lower_subg-" + std::to_string(subg_index.value()));
+    const auto &lowered_subg = pair.second;
+    dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value()));
   }
 
   // Shape inference.
@@ -566,28 +119,15 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void)
     // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
     // recursively
     std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
-      createStaticShapeInferers(lowered_subgs);
+      StaticShapeInferer::createStaticShapeInferers(lowered_subgs);
 
-    if (model_count == 1)
-    {
-      const auto primary_subg_idx = ir::SubgraphIndex{0};
-      inferers.at(primary_subg_idx)->infer();
+    const auto primary_subg_idx = ir::SubgraphIndex{0};
+    inferers.at(primary_subg_idx)->infer();
 
-      for (const auto &pair : inferers)
-      {
-        const auto inferer = pair.second.get();
-        inferer->dump();
-      }
-    }
-    else
+    for (const auto &pair_inferer : inferers)
     {
-      // Assume multi model has only one subgraph on each model
-      for (const auto &pair : inferers)
-      {
-        const auto inferer = pair.second.get();
-        inferer->infer();
-        inferer->dump();
-      }
+      const auto inferer = pair_inferer.second.get();
+      inferer->dump();
     }
   }
 
@@ -598,7 +138,7 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void)
   //      - Check parameter value validation which valid value is depend on input tensor shape
   //      - Output tensor shape validation check is needless because
   //        static/dynamic shape inferer will make valid output shape
-  for (auto &pair : lowered_subgs)
+  for (const auto &pair : lowered_subgs)
   {
     auto &lowered_subg = pair.second;
     compiler::ShapeValidator{lowered_subg->graph()}();
@@ -607,240 +147,30 @@ std::shared_ptr<CompilerArtifact> Compiler::compile(void)
   /*************************************************************
    *  Backend independent analysis & optimization phase finished
    *************************************************************/
-  auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
-  for (auto &pair : lowered_subgs)
+  auto executors = std::make_shared<exec::SingleModelExecutors>();
+  for (auto &&pair : lowered_subgs)
   {
-    const auto &subg_index = pair.first;
+    auto const model_index = ir::ModelIndex{0};
+    auto const subg_index = pair.first;
     auto &lowered_subg = pair.second;
-    auto indexed_ranks = lowered_subg->indexed_ranks();
+    auto const indexed_ranks = lowered_subg->indexed_ranks();
 
     ir::OperationDumper dumper("Executor generation of Subgraph " +
                                std::to_string(subg_index.value()));
     lowered_subg->graph().operations().iterate(
       [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
 
-    auto &options = (model_count > 1) ? *_voptions[subg_index.value()] : *_voptions[0];
     auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
-      std::move(lowered_subg), tracing_ctx.get(), options, executors)};
+      std::move(lowered_subg), tracing_ctx.get(), *_options, executors, model_index)};
     executor->setIndexedRanks(indexed_ranks);
-    executors->emplace(subg_index, std::move(executor));
+    executors->emplace(model_index, subg_index, std::move(executor));
   }
 
   /********************************
    * Code generation phase finished
    ********************************/
-  _state = State::COMPILED;
   return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
 }
 
-std::vector<std::shared_ptr<CompilerArtifact>> Compiler::compile(const char *package_file_path,
-                                                                 const char *map_file_path)
-{
-  // Allow one model compilation for pipeline
-  if (_nnpkg->model_count() != 1)
-    throw std::runtime_error{"Multiple models compilation for pipeline is not supported yet."};
-  assert(_voptions.size() == 1);
-
-  auto model = _nnpkg->primary_model();
-  auto &options = *_voptions[0];
-
-  std::string package_path(package_file_path);
-  std::string partition_map_file;
-
-  if (map_file_path)
-  {
-    partition_map_file = map_file_path;
-  }
-  else
-  {
-    partition_map_file = package_path + "/partition_map.json";
-  }
-
-  std::ifstream pmfs(partition_map_file);
-  Json::Value root;
-  pmfs >> root;
-  const Json::Value &map = root["partition_map"];
-  const Json::Value &np = root["num_partitions"];
-
-  uint32_t num_graphs = 1;
-
-  if (pmfs.is_open())
-  {
-    num_graphs = np.asUInt();
-    for (uint32_t i = 0; i < (uint32_t)map.size(); ++i)
-    {
-      options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
-        ir::SubgraphIndex{map[i].asUInt()};
-    }
-  }
-  else
-  {
-    throw std::runtime_error("There is no partition map file");
-  }
-
-  if (!buildPartialGraph(num_graphs))
-  {
-    throw std::runtime_error("It doesn't support in case there are subgraphs");
-  }
-
-  // Set control flow backend for control flow operators
-  {
-    auto &builtin_id = backend::builtin::Config::ID;
-    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
-    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
-    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
-  }
-
-  // FIXME This is a workaround for bcq operations, should remove it
-  {
-    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
-    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
-  }
-
-  // FIXME This is a workaround for bulk operations, should remove it
-  {
-    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
-  }
-
-  verboseOptions(options);
-
-  model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
-    // Mandatory passes
-    auto part = subg.partialgraphs();
-    part->iterate([&](const ir::SubgraphIndex &, ir::Graph &partialgraph) {
-      pass::PassRunner{}
-        .append(std::make_unique<pass::ConstantOutputPass>(partialgraph))
-        .append(std::make_unique<pass::OddOutputPass>(partialgraph))
-        .run();
-
-      // Optimizations
-      pass::PassRunner{}
-        .append(std::make_unique<pass::UnusedOperandEliminationPass>(partialgraph))
-        .run();
-    });
-  });
-
-  /***************************************************
-   * Prepare compilation phase
-   ***************************************************/
-
-  // Compilable check
-  // TODO: Support hybrid execution -
-  //       execution between interpreter and compiled executor (including control flow)
-  if (options.disable_compile)
-  {
-    std::vector<std::shared_ptr<CompilerArtifact>> results;
-    auto executors = std::make_shared<exec::Executors>();
-
-    model->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
-      executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
-    });
-    results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
-    _state = State::COMPILED;
-    return results;
-  }
-
-  // Mode check
-  if (options.he_profiling_mode)
-    checkProfilerConditions();
-
-  /***************************************************
-   * Backend independent analysis & optimization phase
-   ***************************************************/
-  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(options.graph_dump_level);
-  onert::dumper::dot::DotDumper dot_dumper_part(dump_level);
-
-  // Lower: Assign backend
-  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
-    lowered_partialgraphs;
-  model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
-    auto part = subg.partialgraphs();
-    part->iterate([&](const ir::SubgraphIndex &pindex, ir::Graph &partialgraph) {
-      dot_dumper_part.dump(partialgraph,
-                           nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
-
-      // // Lower: Assign backend
-      lowered_partialgraphs[pindex] =
-        std::make_unique<compiler::LoweredGraph>(subg, partialgraph, options);
-    });
-  });
-
-  for (auto &pair : lowered_partialgraphs)
-  {
-
-    const auto &partialgraph_index = pair.first;
-    auto &lowered_partialgraph = pair.second;
-    dot_dumper_part.dump(*lowered_partialgraph, "after_lower_subg_partialgraph-" +
-                                                  std::to_string(partialgraph_index.value()));
-  }
-
-  // Partial Graph shape inference
-  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
-    createStaticShapeInferers(lowered_partialgraphs);
-  // NOTE If partialgraph has subgraphs StaticShapeInferer may be called multiple times
-  for (auto &pair : lowered_partialgraphs)
-  {
-    const auto &partialgraph_index = pair.first;
-    const auto partial_inferer = inferers.at(partialgraph_index).get();
-    partial_inferer->infer();
-    partial_inferer->dump();
-  }
-
-  // Shape validation
-  // TODO Move shape independent feature check from ShapeValidator to OperationValidator
-  // TODO Move ShapeValidator into shape inference
-  //      - Check input tensor shape validation
-  //      - Check parameter value validation which valid value is depend on input tensor shape
-  //      - Output tensor shape validation check is needless because
-  //        static/dynamic shape inferer will make valid output shape
-  for (auto &pair : lowered_partialgraphs)
-  {
-    auto &lowered_partialgraph = pair.second;
-    compiler::ShapeValidator{lowered_partialgraph->graph()}();
-  }
-
-  /*************************************************************
-   *  Backend independent analysis & optimization phase finished
-   *************************************************************/
-  std::map<uint32_t, std::unique_ptr<compiler::LoweredGraph>> ordered;
-  for (auto &pair : lowered_partialgraphs)
-  {
-    // const auto &partialgraph_index = pair.first;
-    auto &lowered_partialgraph = pair.second;
-
-    ordered.insert(make_pair(pair.first.value(), std::move(lowered_partialgraph)));
-  }
-
-  std::vector<std::shared_ptr<CompilerArtifact>> results;
-  for (auto &pair : ordered)
-  {
-    auto executors = std::make_shared<exec::Executors>();
-
-    const auto &partialgraph_index = ir::SubgraphIndex(pair.first);
-    auto &lowered_partialgraph = pair.second;
-    auto indexed_ranks = lowered_partialgraph->indexed_ranks();
-    ir::OperationDumper dumper("Executor generation of Subgraph " +
-                               std::to_string(partialgraph_index.value()));
-    lowered_partialgraph->graph().operations().iterate(
-      [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
-    auto executor = std::unique_ptr<exec::IExecutor>{
-      ExecutorFactory::get().create(std::move(lowered_partialgraph), nullptr, options, executors)};
-    executor->setIndexedRanks(indexed_ranks);
-    executors->emplace(ir::SubgraphIndex{0}, std::move(executor));
-
-    // It doesn't support tracing in case of partial graph
-    results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
-  }
-
-  _nnpkg.reset();
-  /********************************
-   * Code generation phase finished
-   ********************************/
-  _state = State::COMPILED;
-
-  return results;
-}
-
 } // namespace compiler
-
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/CompilerFactory.cc b/runtime/onert/core/src/compiler/CompilerFactory.cc
new file mode 100644 (file)
index 0000000..d8d4bb2
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerFactory.h"
+
+#include "MultiModelCompiler.h"
+
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+CompilerFactory &CompilerFactory::get()
+{
+  static CompilerFactory singleton;
+  return singleton;
+}
+
+std::unique_ptr<ICompiler>
+CompilerFactory::create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                        std::vector<std::unique_ptr<CompilerOptions>> &copts)
+{
+  if (nnpkg->model_count() == 1)
+    return std::make_unique<Compiler>(nnpkg, copts);
+
+  return std::make_unique<MultiModelCompiler>(nnpkg, copts);
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/CompilerOptions.cc b/runtime/onert/core/src/compiler/CompilerOptions.cc
new file mode 100644 (file)
index 0000000..b5fd392
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerOptions.h"
+
+#include "../backend/builtin/Backend.h"
+
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/string_helpers.h>
+
+namespace
+{
+
+using namespace onert;
+
+std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
+{
+  std::unordered_map<ir::OpCode, std::string>::iterator it;
+  std::string opbackends;
+
+  for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
+  {
+    if (!opbackends.empty())
+      opbackends = opbackends + ", ";
+
+    auto opcode = it->first;
+    const std::string opname = ir::toString(opcode);
+    opbackends += opname + "=" + it->second;
+  }
+  return opbackends;
+}
+
+} // namespace
+
+namespace onert
+{
+namespace compiler
+{
+
+void ManualSchedulerOptions::setBackendMap(const std::string &str)
+{
+  // TODO Support multiple subgraphs for manual scheduling
+  auto key_val_list = nnfw::misc::split(str, ';');
+  for (const auto &key_val_str : key_val_list)
+  {
+    if (key_val_str.empty())
+    {
+      continue;
+    }
+
+    auto key_val = nnfw::misc::split(key_val_str, '=');
+    const auto &key_str = key_val.at(0);
+    const auto &val = key_val.at(1);
+    auto key = static_cast<uint32_t>(std::stoi(key_str));
+    this->index_to_backend.emplace(ir::OperationIndex{key}, val);
+  }
+}
+
+std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
+{
+  auto o = std::make_unique<CompilerOptions>();
+  o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+  o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+  o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+  o->executor = util::getConfigString(util::config::EXECUTOR);
+  o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+  o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+  o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
+  {
+    // Backend for all
+    auto &ms_options = o->manual_scheduler_options;
+
+    // Default value for op_backend_all is first element in the backend list
+    ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
+
+// Opcode to Backend
+#define OP(OpName)                                                                      \
+  {                                                                                     \
+    const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
+    if (!backend_str.empty())                                                           \
+    {                                                                                   \
+      ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str;                   \
+    }                                                                                   \
+  }
+#include "ir/Operations.lst"
+#undef OP
+
+    // Index to Backend
+    auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
+    ms_options.setBackendMap(map_str);
+  }
+  return o;
+}
+
+void CompilerOptions::forceInternalOptions()
+{
+  // Set control flow backend for control flow operators
+  auto &builtin_id = backend::builtin::Config::ID;
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
+
+  // FIXME This is a workaround for bcq operations, should remove it
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+
+  // FIXME This is a workaround for bulk operations, should remove it
+  manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
+}
+
+void CompilerOptions::verboseOptions()
+{
+  VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
+  VERBOSE(Compiler) << "backend_list             : "
+                    << nnfw::misc::join(backend_list.begin(), backend_list.end(), "/") << std::endl;
+  VERBOSE(Compiler) << "trace_filepath           : " << trace_filepath << std::endl;
+  VERBOSE(Compiler) << "graph_dump_level         : " << graph_dump_level << std::endl;
+  VERBOSE(Compiler) << "executor                 : " << executor << std::endl;
+  VERBOSE(Compiler) << "manual backend_for_all   : " << manual_scheduler_options.backend_for_all
+                    << std::endl;
+  VERBOSE(Compiler) << "manual_scheduler_options : "
+                    << getOpBackends(manual_scheduler_options.opcode_to_backend) << std::endl;
+  VERBOSE(Compiler) << "he_scheduler             : " << he_scheduler << std::endl;
+  VERBOSE(Compiler) << "he_profiling_mode        : " << he_profiling_mode << std::endl;
+  VERBOSE(Compiler) << "fp16_enable              : " << fp16_enable << std::endl
+                    << std::noboolalpha;
+}
+
+} // namespace compiler
+} // namespace onert
index 024556e..b09d6b0 100644 (file)
@@ -196,7 +196,7 @@ backend::BackendContexts createBackendContexts(compiler::LoweredGraph &lgraph, b
 
   // Create contexts
   auto whole_op_order = lgraph.graph().topolSortOperations();
-  for (auto &pair : context_data_map)
+  for (auto &&pair : context_data_map)
   {
     auto backend = pair.first;
     auto &data = pair.second;
@@ -240,18 +240,22 @@ ExecutorFactory &ExecutorFactory::get()
 ExecutorFactory::ExecutorFactory()
 {
   _map["Linear"] = createLinearExecutor;
-  _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
-                               std::placeholders::_3, std::placeholders::_4, false);
-  _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
-                               std::placeholders::_3, std::placeholders::_4, true);
+  _map["Dataflow"] =
+    std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
+              std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, false);
+  _map["Parallel"] =
+    std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
+              std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, true);
 }
 
 exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                                          const util::TracingCtx *tracing_ctx,
                                          const compiler::CompilerOptions &options,
-                                         const std::shared_ptr<exec::Executors> &executors)
+                                         const std::shared_ptr<exec::IExecutors> &executors,
+                                         const ir::ModelIndex &index)
 {
-  return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors);
+  return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors,
+                                   index);
 }
 
 void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
@@ -282,10 +286,11 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap
 }
 
 void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
-                                            const std::shared_ptr<exec::Executors> &executors,
-                                            const backend::BackendContexts &backend_contexts)
+                                            const std::shared_ptr<exec::IExecutors> &executors,
+                                            const backend::BackendContexts &backend_contexts,
+                                            const ir::ModelIndex &index)
 {
-  for (auto &pair : backend_contexts)
+  for (auto &&pair : backend_contexts)
   {
     auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
     if (builtin_context != nullptr)
@@ -293,6 +298,7 @@ void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
       auto builtin_kernel_gen = builtin_context->kernel_gen;
       builtin_kernel_gen->setTensorRegistries(tensor_regs);
       builtin_kernel_gen->setExecutors(executors);
+      builtin_kernel_gen->setModelIndex(index);
     }
   }
 }
@@ -302,7 +308,7 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con
 {
   std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
 
-  for (auto &pair : backend_contexts)
+  for (auto &&pair : backend_contexts)
   {
     // NOTE builtin backend must be processed lastly.
     // This is because of Permute layer's specialty which is the only operation that could have
@@ -319,7 +325,8 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con
 
 exec::IExecutor *ExecutorFactory::createLinearExecutor(
   std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
-  const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)
+  const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+  const ir::ModelIndex &index)
 {
   auto &graph = lowered_graph->graph();
 
@@ -337,7 +344,7 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(
   auto order = Linear::linearize(*lowered_graph);
   Linear::dump(*lowered_graph, order);
 
-  for (auto &pair : backend_contexts)
+  for (auto &&pair : backend_contexts)
   {
     pair.second->genTensors();
   }
@@ -345,7 +352,7 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(
   prepareMigrantTensors(*lowered_graph, backend_contexts);
 
   // Give some runtime objects to builtin KernelGenerator
-  prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
+  prepareBuiltinBackend(tensor_regs, executors, backend_contexts, index);
 
   ExecutionBuilder builder;
 
@@ -406,10 +413,10 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(
   }
 
   // Generate kernels
-  for (auto &pair : ordered_contexts)
+  for (auto &&pair : ordered_contexts)
   {
     auto codes = pair.second->genKernels();
-    for (auto &pair : codes)
+    for (auto &&pair : codes)
     {
       auto &op_ind = pair.first;
       auto &fn_seq = pair.second;
@@ -444,8 +451,8 @@ exec::IExecutor *ExecutorFactory::createLinearExecutor(
 
 exec::IExecutor *ExecutorFactory::createDataflowExecutor(
   std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
-  const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors,
-  bool parallel)
+  const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+  const ir::ModelIndex &index, bool parallel)
 {
   backend::BackendContexts backend_contexts =
     createBackendContexts(*lowered_graph, options.executor == "Linear");
@@ -457,7 +464,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
     (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
       ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
 
-  for (auto &pair : backend_contexts)
+  for (auto &&pair : backend_contexts)
   {
     pair.second->genTensors();
   }
@@ -465,7 +472,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
   prepareMigrantTensors(*lowered_graph, backend_contexts);
 
   // Give some runtime objects to builtin KernelGenerator
-  prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
+  prepareBuiltinBackend(tensor_regs, executors, backend_contexts, index);
 
   ExecutionBuilder builder;
 
@@ -473,10 +480,10 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
   auto ordered_contexts = orderBackendContext(backend_contexts);
 
   // Generate kernels
-  for (auto &pair : ordered_contexts)
+  for (auto &&pair : ordered_contexts)
   {
     auto codes = pair.second->genKernels();
-    for (auto &pair : codes)
+    for (auto &&pair : codes)
     {
       auto &op_ind = pair.first;
       auto &fn_seq = pair.second;
index 70c089f..f8f9890 100644 (file)
@@ -21,7 +21,7 @@
 
 #include "backend/ITensor.h"
 #include "compiler/LoweredGraph.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
 
 #include <deque>
 #include <unordered_map>
@@ -40,7 +40,8 @@ public:
   exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                           const util::TracingCtx *tracing_ctx,
                           const compiler::CompilerOptions &options,
-                          const std::shared_ptr<exec::Executors> &executors);
+                          const std::shared_ptr<exec::IExecutors> &executors,
+                          const ir::ModelIndex &index);
 
 private:
   ExecutorFactory();
@@ -49,26 +50,28 @@ private:
   static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
                                     const backend::BackendContexts &backend_contexts);
   static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
-                                    const std::shared_ptr<exec::Executors> &executors,
-                                    const backend::BackendContexts &backend_contexts);
+                                    const std::shared_ptr<exec::IExecutors> &executors,
+                                    const backend::BackendContexts &backend_contexts,
+                                    const ir::ModelIndex &index);
   static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
   orderBackendContext(const backend::BackendContexts &backend_contexts);
 
   static exec::IExecutor *createLinearExecutor(
     std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
-    const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors);
-  static exec::IExecutor *
-  createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                         const util::TracingCtx *tracing_ctx,
-                         const compiler::CompilerOptions &options,
-                         const std::shared_ptr<exec::Executors> &executors, bool parallel);
+    const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+    const ir::ModelIndex &index);
+  static exec::IExecutor *createDataflowExecutor(
+    std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+    const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+    const ir::ModelIndex &index, bool parallel);
 
 private:
   std::unordered_map<
     std::string,
     std::function<exec::IExecutor *(
       std::unique_ptr<compiler::LoweredGraph>, const util::TracingCtx *tracing_ctx,
-      const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)>>
+      const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+      const ir::ModelIndex &index)>>
     _map;
 };
 
index 98dc906..fdf4e24 100644 (file)
@@ -393,10 +393,10 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
   const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
   const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
 
-  for (auto &op_idx : op_seq)
+  for (const auto &op_idx : op_seq)
   {
     const auto &node = operations.at(op_idx);
-    for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
+    for (const auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
     {
       if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
         continue;
@@ -410,7 +410,7 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
       VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl;
     }
 
-    for (auto &ind : node.getOutputs())
+    for (const auto &ind : node.getOutputs())
     {
       if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
         continue;
@@ -747,7 +747,7 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_
     //    |                       |
     // [OPERATION]             [OPERATION]
     //
-    for (auto &op_seq_ind : found_input_in_op_seqs->second)
+    for (const auto &op_seq_ind : found_input_in_op_seqs->second)
     {
       auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
       if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
@@ -799,13 +799,13 @@ Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_
   OpSeqIndexList list;
   for (const auto &it : opseq_map_to_delete)
   {
-    auto &opseq_ind_fp16_to_fp32 = it.first;
+    const auto &opseq_ind_fp16_to_fp32 = it.first;
     if (list.find(opseq_ind_fp16_to_fp32) == list.end())
     {
       list.emplace(opseq_ind_fp16_to_fp32);
     }
 
-    for (auto &opseq_ind_fp32_to_fp16 : it.second)
+    for (const auto &opseq_ind_fp32_to_fp16 : it.second)
     {
       if (list.find(opseq_ind_fp32_to_fp16) == list.end())
       {
@@ -869,7 +869,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
     auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
     auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
 
-    for (auto &op_seq_ind_fp32_to_fp16 : it.second)
+    for (const auto &op_seq_ind_fp32_to_fp16 : it.second)
     {
       auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
       assert(op_seq_fp32_to_fp16.size() == 1);
@@ -879,7 +879,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
       auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
       assert(found_next_to_fp16 != input_to_op_seqs.end());
 
-      for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
+      for (const auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
       {
         manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
       }
@@ -901,7 +901,7 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
   auto &operations = _lowered_graph.graph().operations();
   auto &op_seqs = _lowered_graph.op_seqs();
 
-  for (auto &op_seq_ind : list_to_delete_op_seqs)
+  for (const auto &op_seq_ind : list_to_delete_op_seqs)
   {
     auto &op_seq = op_seqs.at(op_seq_ind);
     assert(op_seq.size() == 1);
@@ -914,7 +914,7 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
     VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl;
 
     // Uses
-    for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+    for (const auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
     {
       auto &obj = operands.at(ind);
       obj.removeUse(first_node_ind);
@@ -923,7 +923,7 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
     }
 
     // Def
-    for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+    for (const auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
     {
       auto &obj = operands.at(ind);
       assert(obj.getDef() == first_node_ind);
@@ -942,7 +942,7 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
   }
 
   // Operand
-  for (auto &ind : list_to_delete_ops)
+  for (const auto &ind : list_to_delete_ops)
   {
     operands.remove(ind);
     VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl;
index c4bfddb..65fd4cd 100644 (file)
@@ -512,7 +512,7 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation
   // Find free time for data transferring and insert it into backend taskset. This is needed:
   //  1. Time for multiple permutations for this node's input is found correctly
   //  2. If backend==cpu, then free time for this node must come after permutations
-  for (auto &it : transfer_st_exec_time)
+  for (auto &&it : transfer_st_exec_time)
   {
     if (_is_parallel_exec)
     {
index c4a2df0..589331b 100644 (file)
@@ -163,7 +163,7 @@ void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
   ExecTime et(backends);
   for (int i = 0; i < op_names.size(); ++i)
   {
-    for (auto &backend : backends)
+    for (const auto backend : backends)
       setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
   }
   et.storeOperationsExecTime();
@@ -189,7 +189,7 @@ void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
   ExecTime et(backends);
   for (const auto &backend : backends)
   {
-    for (auto &other_backend : backends)
+    for (const auto other_backend : backends)
     {
       if (backend == other_backend)
         continue;
index 9e84753..d53d0ed 100644 (file)
@@ -44,14 +44,6 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
   lowerGraph(options);
 }
 
-// TODO Design better class and constructor to represent parent_graph
-LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph,
-                           const CompilerOptions &options)
-  : _graph{graph}, _parent_graph{parent_graph}
-{
-  lowerGraph(options);
-}
-
 void LoweredGraph::lowerGraph(const CompilerOptions &options)
 {
   // Build backend contexts
index af2d84c..621f0c7 100644 (file)
@@ -64,7 +64,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
 
   // 2. Backend per operation type
   std::unordered_map<ir::OpCode, backend::Backend *> op_type_map;
-  for (auto &pair : manual_options.opcode_to_backend)
+  for (const auto &pair : manual_options.opcode_to_backend)
   {
     op_type_map.emplace(pair.first, BackendManager::get().get(pair.second));
   }
@@ -80,7 +80,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
   });
 
   // 3. Backend per operation
-  for (auto &pair : manual_options.index_to_backend)
+  for (const auto &pair : manual_options.index_to_backend)
   {
     const auto &key = pair.first;
     const auto &val = pair.second;
diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.cc b/runtime/onert/core/src/compiler/MultiModelCompiler.cc
new file mode 100644 (file)
index 0000000..fea6a7f
--- /dev/null
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MultiModelCompiler.h"
+
+#include "ExecutorFactory.h"
+#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../exec/Executors.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
+
+#include "compiler/StaticShapeInferer.h"
+
+#include <misc/string_helpers.h>
+
+namespace onert
+{
+namespace compiler
+{
+
+MultiModelCompiler::MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                                       std::vector<std::unique_ptr<CompilerOptions>> &copts)
+  : _nnpkg{nnpkg}, _voptions{}
+{
+  assert(nnpkg->model_count() != 1);
+
+  for (uint32_t i = 0; i < copts.size(); i++)
+  {
+    _voptions.push_back(copts[i].get());
+  }
+}
+
+std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void)
+{
+  /***************************************************
+   * Prepare compilation phase
+   ***************************************************/
+  for (auto options : _voptions)
+  {
+    if (!options)
+      throw std::runtime_error{"Empty compile option"};
+
+    // Mode check
+    // TODO handle option for each model
+    if (options->he_profiling_mode)
+      throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
+
+    options->forceInternalOptions();
+    options->verboseOptions();
+  }
+
+  // NYI: allow one model compilation
+  auto const model_count = _nnpkg->model_count();
+  if (model_count != _voptions.size())
+    throw std::runtime_error{"Model count and option vector size mismatch"};
+
+  for (uint16_t i = 0; i < model_count; i++)
+  {
+    _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+      // Mandatory passes
+      pass::PassRunner{}
+        .append(std::make_unique<pass::ConstantOutputPass>(subg))
+        .append(std::make_unique<pass::OddOutputPass>(subg))
+        .run();
+
+      // Optimizations
+      pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+    });
+  }
+
+  /***************************************************
+   * Backend independent analysis & optimization phase
+   ***************************************************/
+  // TODO Handle dump level for each model
+  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+  onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+  // Tracing context
+  // TODO Support tracing_ctx for multiple model
+  std::unique_ptr<util::TracingCtx> tracing_ctx = nullptr;
+
+  // Model edge context: copy model edge context
+  auto model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
+
+  // Lower: Assign backend
+  std::unordered_map<ir::ModelIndex,
+                     std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>>
+    lowered_subgs;
+
+  for (uint16_t i = 0; i < model_count; i++)
+  {
+    auto const model_index = ir::ModelIndex{i};
+    auto model = _nnpkg->model(model_index);
+
+    model->iterate([&](const ir::SubgraphIndex &subg_index, ir::Graph &subg) {
+      dot_dumper.dump(subg,
+                      nnfw::misc::str("before_lower_model-", i, "-subg-", subg_index.value()));
+      // Lower: Assign backend
+      lowered_subgs[model_index][subg_index] =
+        std::make_unique<compiler::LoweredGraph>(subg, *_voptions[i]);
+      // Set tracing_ctx for copied graph
+      if (tracing_ctx != nullptr)
+        tracing_ctx->setSubgraphIndex(&(lowered_subgs[model_index][subg_index]->graph()),
+                                      subg_index.value());
+    });
+  }
+
+  _nnpkg.reset();
+
+  for (const auto &pair : lowered_subgs)
+  {
+    const auto &model_index = pair.first;
+    const auto &model_lsubg = pair.second;
+
+    for (const auto &pair_inner : model_lsubg)
+    {
+      const auto &subg_index = pair_inner.first;
+      const auto &lowered_subg = pair_inner.second;
+      dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_model-", model_index.value(),
+                                                     "-subg-", subg_index.value()));
+    }
+  }
+
+  // Shape inference.
+  for (auto &&pair : lowered_subgs)
+  {
+    auto &model_lsubgs = pair.second;
+    // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+    // recursively
+    std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+      StaticShapeInferer::createStaticShapeInferers(model_lsubgs);
+
+    const auto primary_subg_idx = ir::SubgraphIndex{0};
+    inferers.at(primary_subg_idx)->infer();
+
+    for (const auto &pair_inferer : inferers)
+    {
+      const auto inferer = pair_inferer.second.get();
+      inferer->dump();
+    }
+  }
+
+  // Shape validation
+  // TODO Move shape independent feature check from ShapeValidator to OperationValidator
+  // TODO Move ShapeValidator into shape inference
+  //      - Check input tensor shape validation
+  //      - Check parameter value validation which valid value is depend on input tensor shape
+  //      - Output tensor shape validation check is needless because
+  //        static/dynamic shape inferer will make valid output shape
+  for (const auto &pair : lowered_subgs)
+  {
+    const auto &model_lsubgs = pair.second;
+
+    for (const auto &pair_inner : model_lsubgs)
+    {
+      const auto &lowered_subg = pair_inner.second;
+      compiler::ShapeValidator{lowered_subg->graph()}();
+    }
+  }
+
+  /*************************************************************
+   *  Backend independent analysis & optimization phase finished
+   *************************************************************/
+  auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
+  for (auto &&pair : lowered_subgs)
+  {
+    auto const &model_index = pair.first;
+    auto &model_lsubgs = pair.second;
+
+    for (auto &&pair_inner : model_lsubgs)
+    {
+      auto const subg_index = pair_inner.first;
+      auto &lowered_subg = pair_inner.second;
+      auto const indexed_ranks = lowered_subg->indexed_ranks();
+
+      ir::OperationDumper dumper("Executor generation of Subgraph " +
+                                 std::to_string(subg_index.value()));
+      lowered_subg->graph().operations().iterate(
+        [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
+
+      auto &options = *_voptions[model_index.value()];
+      auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
+        std::move(lowered_subg), tracing_ctx.get(), options, executors, model_index)};
+      executor->setIndexedRanks(indexed_ranks);
+      executors->emplace(model_index, subg_index, std::move(executor));
+    }
+  }
+
+  /********************************
+   * Code generation phase finished
+   ********************************/
+  return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.h b/runtime/onert/core/src/compiler/MultiModelCompiler.h
new file mode 100644 (file)
index 0000000..89af664
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file  MultiModelCompiler.h
+ * @brief This file contains MultiModelCompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+#define __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+
+#include "compiler/CompilerOptions.h"
+#include "compiler/ICompiler.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class to compile NN package
+ */
+class MultiModelCompiler final : public ICompiler
+{
+public:
+  /**
+   * @brief     Construct a new Compiler object for NN package
+   * @param[in] nnpkg    NN package to compile
+   * @param[in] coptions Compiler option vector for each model in package
+   */
+  MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                     std::vector<std::unique_ptr<CompilerOptions>> &copts);
+
+  /**
+   * @brief Destroy the MultiModelCompiler object
+   */
+  ~MultiModelCompiler() = default;
+
+public:
+  /**
+   * @brief   Do compilation with the options
+   *
+   * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+   */
+  std::shared_ptr<CompilerArtifact> compile(void);
+
+private:
+  std::shared_ptr<ir::Graph> &primary_subgraph()
+  {
+    return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
+  }
+
+private:
+  std::shared_ptr<ir::NNPkg> _nnpkg;
+  std::vector<CompilerOptions *> _voptions;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
index 4854505..25747d9 100644 (file)
@@ -18,6 +18,8 @@
 #include "util/ShapeInference.h"
 #include "util/logging.h"
 
+#include <misc/polymorphic_downcast.h>
+
 #include <sstream>
 #include <stdexcept>
 
@@ -188,6 +190,95 @@ void StaticShapeInferer::dump()
     });
 }
 
+std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+StaticShapeInferer::createStaticShapeInferers(
+  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraph>> &lowered_subgs)
+{
+  // Allocate StaticShapeInferer per each subgraph
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers;
+  for (auto &&pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    auto &lowered_subg = pair.second;
+    inferers[subg_index] = std::make_unique<StaticShapeInferer>(lowered_subg.get());
+  }
+
+  // Append observers in all StaticShapeInferers
+  for (auto &&pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    auto &lowered_subg = pair.second;
+
+    // TODO: Change this iteration for all to controlflow iteration
+    lowered_subg->graph().operations().iterate(
+      [&](const ir::OperationIndex &, const ir::Operation &op) {
+        // A Function to append child inferers. These make it possible for a StaticShapeInferer to
+        // call StaticShapeInferes of child subgraphs recursively
+        auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
+          auto *child_inferer = inferers.at(child_subg_idx).get();
+          inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
+        };
+
+        // A Function to appaend subg input observers. This makes it possible for a
+        // StaticShapeInferer to update inputs of child subgraphs
+        auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+          std::vector<ir::Operand *> child_subg_inputs;
+          auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
+          for (const auto &input_idx : child_subg.getInputs())
+          {
+            auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
+            child_subg_inputs.emplace_back(operand_ptr);
+          }
+          inferers.at(subg_index)
+            ->appendSubgInputObserver(child_subg_idx,
+                                      std::make_unique<OperandObserver>(child_subg_inputs));
+        };
+
+        // A Function to set controlflow output observers. This makes it possible for a
+        // StaticShapeInferer to update outputs of parent controlflow opeerations
+        auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+          std::vector<ir::Operand *> cf_outputs;
+          auto &subg = lowered_subg->graph();
+          for (const auto &output_idx : op.getOutputs())
+          {
+            auto operand_ptr = subg.operands().getRawPtr(output_idx);
+            cf_outputs.emplace_back(operand_ptr);
+          }
+          inferers.at(child_subg_idx)
+            ->setControlflowOutputObserver(std::make_unique<OperandObserver>(cf_outputs));
+        };
+
+        // Append Observers in a StaticShapeInferer
+        if (op.opcode() == ir::OpCode::If)
+        {
+          const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
+
+          appendChildInferer(if_op.param().then_subg_index);
+          appendChildInferer(if_op.param().else_subg_index);
+
+          appendSubgraphInputObserver(if_op.param().then_subg_index);
+          appendSubgraphInputObserver(if_op.param().else_subg_index);
+
+          setControlFlowOutputObserver(if_op.param().then_subg_index);
+        }
+        else if (op.opcode() == ir::OpCode::While)
+        {
+          const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
+
+          appendChildInferer(while_op.param().cond_subg_index);
+          appendChildInferer(while_op.param().body_subg_index);
+
+          appendSubgraphInputObserver(while_op.param().cond_subg_index);
+          appendSubgraphInputObserver(while_op.param().body_subg_index);
+
+          setControlFlowOutputObserver(while_op.param().body_subg_index);
+        }
+      });
+  }
+
+  return inferers;
+}
+
 void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
 {
   auto &operands = _lowered_subg->graph().operands();
@@ -1306,8 +1397,11 @@ void StaticShapeInferer::visit(const ir::operation::Bulk &op)
   auto origin_output_shape = op.param().origin_output_shapes[0];
 
   // TODO: more check for valid batch request
-  assert(cur_input_shape.dim(0) >= origin_output_shape.dim(0));
-  assert(cur_input_shape.dim(0) % origin_output_shape.dim(0) == 0);
+  if ((cur_input_shape.dim(0) < origin_output_shape.dim(0)) ||
+      (cur_input_shape.dim(0) % origin_output_shape.dim(0) != 0))
+  {
+    throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported batch size");
+  }
   size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0);
 
   ir::Shape new_shape;
index b3cc0bb..c7e06e8 100644 (file)
@@ -71,7 +71,7 @@ public:
 
   backend::ITensor *getITensor(ir::OperandIndex ind) const
   {
-    for (auto &tensor_reg : _tensor_regs)
+    for (auto &&tensor_reg : _tensor_regs)
     {
       auto tensor = tensor_reg->getITensor(ind);
       if (tensor)
index f50fae0..e2b3f61 100644 (file)
@@ -34,7 +34,7 @@ void OddOutputPass::run()
 
   VERBOSE(OddOutputPass) << "Case 1 : An operand which is a model output and a model input"
                          << std::endl;
-  for (auto &ind : outputs)
+  for (const auto &ind : outputs)
   {
     if (_graph.getInputs().contains(ind))
     {
@@ -46,7 +46,7 @@ void OddOutputPass::run()
 
   VERBOSE(OddOutputPass) << "Case 2 : Two or more duplicated outputs" << std::endl;
   std::unordered_set<ir::OperandIndex> occurence;
-  for (auto &ind : outputs)
+  for (auto &&ind : outputs)
   {
     auto &obj = _graph.operands().at(ind);
     if (occurence.count(ind) == 0)
index 1be6d77..2d11be2 100644 (file)
@@ -31,7 +31,7 @@ PassRunner &PassRunner::append(std::unique_ptr<Pass> pass)
 
 void PassRunner::run()
 {
-  for (auto &pass : _passes)
+  for (auto &&pass : _passes)
   {
     VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl;
     pass->run();
index 71efa1b..0da1e54 100644 (file)
@@ -105,9 +105,9 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
       }
     }
 
-    for (auto &operation : remove_list)
+    for (const auto &operation_index : remove_list)
     {
-      object.removeUse(operation);
+      object.removeUse(operation_index);
     }
   }
 }
index 9d1e06d..7d5b406 100644 (file)
@@ -23,13 +23,12 @@ namespace onert
 namespace exec
 {
 
-Execution::Execution(const std::shared_ptr<Executors> &executors) : _executors{executors}
+Execution::Execution(const std::shared_ptr<IExecutors> &executors) : _executors{executors}
 {
   assert(executors != nullptr);
-  assert(executors->at(ir::SubgraphIndex{0}) != nullptr);
+  assert(executors->entryExecutor() != nullptr);
   _io_desc.inputs.resize(_executors->inputSize());
   _io_desc.outputs.resize(_executors->outputSize());
-  sem_init(&_async_io_descs_sem, 0, 1);
 }
 
 void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_shape)
@@ -70,80 +69,6 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
   _io_desc.inputs.at(index.value()) = std::make_unique<InputDesc>(info, buffer, length, layout);
 }
 
-void Execution::createNewAsyncDesc(uint32_t count)
-{
-  IODescription *_async_io_desc = new IODescription;
-  _async_io_desc->inputs.resize(primary_subgraph().getInputs().size());
-  _async_io_desc->outputs.resize(primary_subgraph().getOutputs().size());
-
-  _async_io_descs.push_back({_async_io_desc, count});
-}
-
-void Execution::setFinish() { finished = true; }
-
-bool Execution::isEmptyQueue()
-{
-  asyncIoDescSemWait();
-  bool ret = _async_io_descs.empty();
-  if (!ret)
-  {
-    for (uint32_t idx = 0; idx < _async_io_descs.front().first->inputs.size(); idx++)
-    {
-      if (_async_io_descs.front().first->inputs.at(idx).get() == nullptr)
-      {
-        ret = true;
-        break;
-      }
-    }
-  }
-  asyncIoDescSemPost();
-  return ret;
-}
-
-void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
-                                  ir::Layout layout)
-{
-  const auto info = _executors->inputInfo(index);
-  IODescription *_async_io_desc = _async_io_descs.back().first;
-
-  {
-    auto input_shape_sig = _async_io_desc->dynamic_input_shapes.find(index);
-    auto size_required =
-      (input_shape_sig != _async_io_desc->dynamic_input_shapes.end())
-        ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type())
-        : info.total_size();
-
-    if (length < size_required)
-    {
-      throw std::runtime_error{"Too small length"};
-    }
-  }
-  void *_buffer = (void *)malloc(length);
-  if (_buffer == NULL)
-  {
-    throw std::runtime_error{"malloc failed"};
-  }
-  memcpy(_buffer, buffer, length);
-
-  _async_io_desc->inputs.at(index.value()) =
-    std::make_unique<InputDesc>(info, _buffer, length, layout);
-}
-
-void Execution::executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
-                                   ir::Layout layout)
-{
-  const auto info = _executors->outputInfo(index);
-  IODescription *_async_io_desc = _async_io_descs.front().first;
-
-  if (length < info.total_size())
-  {
-    throw std::runtime_error{"Too small length"};
-  }
-
-  _async_io_desc->outputs.at(index.value()) =
-    std::make_unique<OutputDesc>(info, buffer, length, layout);
-}
-
 // TODO Remove default parameter
 void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
                          const void *buffer, size_t length, ir::Layout layout)
@@ -209,18 +134,6 @@ void Execution::execute()
   VERBOSE(Execution) << "Execution finished" << std::endl;
 }
 
-void Execution::AsyncExecute()
-{
-  VERBOSE(Execution) << "Start Async execution" << std::endl;
-  if (_async_io_descs.empty())
-  {
-    VERBOSE(Execution) << "The input is not ready" << std::endl;
-    return;
-  }
-
-  primary_executor()->execute(*_async_io_descs.front().first);
-}
-
 void Execution::startExecute()
 {
   VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl;
@@ -251,163 +164,21 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const
   }
 }
 
+// NNAPI return fail if ANeuralNetworksExecution_getOutputOperandRank or
+// ANeuralNetworksExecution_getOutputOperandDimensions is called before execution.
+// On the other hand, NNFW API return static shape inference result if nnfw_output_tensorinfo is
+// called before execution.
+// To handle both case, this method retun static shape inference result and fail will be handled on
+// NNAPI frontend.
 ir::Shape Execution::getOutputShape(ir::IOIndex ind) const
 {
   if (!isFinished())
-    throw std::runtime_error("Cannot get output shape before execution is finished");
+    return _executors->outputInfo(ind).shape();
 
   const auto &output_desc = _io_desc.outputs.at(ind.value());
 
   return output_desc->info.shape();
 }
 
-void Execution::asyncIoDescSemWait() { sem_wait(&_async_io_descs_sem); }
-
-void Execution::asyncIoDescSemPost() { sem_post(&_async_io_descs_sem); }
-
-void Execution::runInference()
-{
-  uint32_t inference_cnt;
-  uint32_t output_sz = primary_subgraph().getOutputs().size();
-  while (true)
-  {
-    if (isEmptyQueue())
-    {
-      if (isFinished())
-      {
-        if (!next_exes.empty())
-        {
-          for (uint32_t i = 0; i < next_exes.size(); i++)
-          {
-            std::get<0>(next_exes[i])->setFinish();
-          }
-        }
-        else
-        {
-          sholudStop();
-        }
-        break;
-      }
-    }
-    else
-    {
-      for (uint32_t i = 0; i < output_sz; i++)
-      {
-        auto opidx = primary_subgraph().getOutputs().at(i);
-        auto shape = primary_subgraph().operands().at(opidx).shape();
-        auto dtype = primary_subgraph().operands().at(opidx).typeInfo().type();
-        auto rank = shape.rank();
-        uint32_t tensor_size = 1;
-        for (int32_t j = 0; j < rank; j++)
-        {
-          tensor_size *= shape.dim(j);
-        }
-        if (dtype == onert::ir::DataType::FLOAT32 || dtype == onert::ir::DataType::INT32 ||
-            dtype == onert::ir::DataType::UINT32)
-          tensor_size *= 4;
-        else if (dtype == onert::ir::DataType::INT64)
-          tensor_size *= 8;
-        void *_buffer = (void *)malloc(tensor_size);
-        if (_buffer == NULL)
-        {
-          throw std::runtime_error{"malloc failed"};
-        }
-        executeAsyncOutput(onert::ir::IOIndex(i), _buffer, tensor_size);
-      }
-      AsyncExecute();
-
-      // set inputs of next execution
-      auto _io_desc = getAsyncIoDescs()->front().first;
-      inference_cnt = getAsyncIoDescs()->front().second;
-      getAsyncIoDescs()->pop_front();
-
-      for (uint32_t i = 0; i < next_exes.size(); i++)
-      {
-        auto next_exe = std::get<0>(next_exes[i]);
-        auto o_index = std::get<1>(next_exes[i]);
-        auto i_index = std::get<2>(next_exes[i]);
-
-        next_exe->asyncIoDescSemWait();
-        auto next_io_descs = next_exe->getAsyncIoDescs();
-        bool exist = false;
-        for (auto iter = next_io_descs->begin(); iter != next_io_descs->end(); iter++)
-        {
-          if (inference_cnt == iter->second)
-          {
-            exist = true;
-          }
-        }
-
-        if (!exist)
-        {
-          next_exe->createNewAsyncDesc(inference_cnt);
-        }
-        for (auto iter = next_io_descs->begin(); iter != next_io_descs->end(); iter++)
-        {
-          if (inference_cnt == iter->second)
-          {
-            const auto input_index = next_exe->primary_subgraph().getInputs().at(i_index.value());
-            const auto info = next_exe->primary_subgraph().operands().at(input_index).info();
-
-            size_t length = _io_desc->outputs[o_index.value()]->size;
-            void *_buffer = (void *)malloc(length);
-            if (_buffer == NULL)
-            {
-              throw std::runtime_error{"malloc failed"};
-            }
-            memcpy(_buffer, _io_desc->outputs[o_index.value()]->buffer, length);
-
-            iter->first->inputs.at(i_index.value()) = std::make_unique<onert::exec::InputDesc>(
-              info, _buffer, length, onert::ir::Layout::NHWC);
-            break;
-          }
-        }
-        next_exe->asyncIoDescSemPost();
-      }
-
-      if (next_exes.empty())
-      {
-        std::vector<void *> results;
-        for (uint32_t i = 0; i < _io_desc->outputs.size(); i++)
-        {
-          size_t length = _io_desc->outputs[i]->size;
-          void *_buffer = (void *)malloc(length);
-          if (_buffer == NULL)
-          {
-            throw std::runtime_error{"malloc failed"};
-          }
-          memcpy(_buffer, _io_desc->outputs[i]->buffer, length);
-          results.push_back(_buffer);
-        }
-        _async_results.push_back(results);
-      }
-
-      for (uint32_t i = 0; i < _io_desc->inputs.size(); i++)
-      {
-        auto p = _io_desc->inputs.at(i).release();
-        if (p)
-        {
-          free((void *)p->buffer);
-          delete p;
-        }
-      }
-      for (uint32_t i = 0; i < _io_desc->outputs.size(); i++)
-      {
-        auto p = _io_desc->outputs.at(i).release();
-        if (p)
-        {
-          free(p->buffer);
-          delete p;
-        }
-      }
-      delete _io_desc;
-    }
-  }
-}
-
-bool Execution::stopWait(void) const { return stop_wait; }
-
-void Execution::sholudStop() { stop_wait = true; }
-
 } // namespace exec
 } // namespace onert
index e3ea494..fefe8a3 100644 (file)
@@ -17,6 +17,7 @@
 #include "exec/Execution.h"
 
 #include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
 #include "ir/Graph.h"
 #include "ir/operation/BinaryArithmetic.h"
 #include "util/TracingCtx.h"
@@ -90,6 +91,161 @@ public:
   std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
 };
 
+class CompiledMockUpMultiModel
+{
+public:
+  CompiledMockUpMultiModel()
+  {
+    // Model0: a float elementwise add operation
+    // Model0 input: lhs0, rhs0
+    // Model0 output: add result (result0)
+
+    // Model1: a qasymm8 elementwise add operation
+    // Model1 input: result0, rhs1
+    // Model1 output: add result (result1)
+
+    // Model2: a float elementwise add operation
+    // Model2 input: result0, result1
+    // Model2 output: add result (result2)
+
+    // constant: rhs2
+    // result0 <= (lhs0 + rhs0)
+    // result1 <= (result0 + rhs1)
+    // result2 <= (result0 + result1)
+    // lhs0, rhs0, rh1, result0, result1, result2 shape: {1, 2, 2, 1}
+    // activation: none (constant)
+
+    // Update edge information
+    edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{0});
+    edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{1});
+    edges.pkg_outputs.emplace_back(ModelIndex{2}, SubgraphIndex{0}, IOIndex{0});
+    // From
+    const auto result0 = IODesc{ModelIndex{0}, SubgraphIndex{0}, IOIndex{0}};
+    const auto result1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+    // To
+    const auto lhs1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+    const auto lhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{0}};
+    const auto rhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{1}};
+    edges.edges.insert({result0, lhs1});
+    edges.edges.insert({result0, lhs2});
+    edges.edges.insert({result1, rhs2});
+
+    for (size_t i = 0; i < 3; ++i)
+    {
+      graphs.emplace_back(std::make_shared<Graph>());
+    }
+    Shape shape{1, 2, 2, 1};
+
+    // Model0's add operands (result1 <= lhs0 + rhs0)
+    DataType types[3] = {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::FLOAT32};
+    auto operand_lhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+    auto operand_rhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+    auto operand_result0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+
+    // Model0's add operation
+    operation::BinaryArithmetic::Param param0;
+    param0.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param0.activation = Activation::NONE;
+    auto input_set0 = OperandIndexSequence{operand_lhs0, operand_rhs0};
+    auto output_set0 = OperandIndexSequence{operand_result0};
+    graphs[0]->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set0, output_set0, param0));
+
+    // Model0's inputs/outputs
+    graphs[0]->addInput(operand_lhs0);
+    graphs[0]->addInput(operand_rhs0);
+    graphs[0]->addOutput(operand_result0);
+    graphs[0]->verify();
+
+    // Model1's add operands (result2 <= Model0 result + rhs1)
+    // static float rhs1_data[4] = {3, 1, -1, 5};
+    static uint8_t rhs1_data[4] = {131, 129, 127, 133};
+    const float scale = 1;
+    const int32_t zero_point = 128;
+    auto operand_lhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+    auto operand_rhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+    auto operand_result1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+    graphs[1]
+      ->operands()
+      .at(operand_rhs1)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs1_data), 4));
+
+    // Model1's add operation
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param1.activation = Activation::NONE;
+    auto input_set1 = OperandIndexSequence{operand_lhs1, operand_rhs1};
+    auto output_set1 = OperandIndexSequence{operand_result1};
+    graphs[1]->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+
+    // Model1's inputs/outputs
+    graphs[1]->addInput(operand_lhs1);
+    graphs[1]->addOutput(operand_result1);
+    graphs[1]->verify();
+
+    // Model2's additional operands (result3 <= Model0 result + Model1 result)
+    auto operand_lhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+    auto operand_rhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+    auto operand_result2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+
+    // Model2's add operation
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param2.activation = Activation::NONE;
+    auto input_set2 = OperandIndexSequence{operand_lhs2, operand_rhs2};
+    auto output_set2 = OperandIndexSequence{operand_result2};
+    graphs[2]->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+
+    // Model1's inputs/outputs
+    graphs[2]->addInput(operand_lhs2);
+    graphs[2]->addInput(operand_rhs2);
+    graphs[2]->addOutput(operand_result2);
+    graphs[2]->verify();
+
+    // Compile
+    compile();
+  }
+
+public:
+  void compile()
+  {
+    auto nnpkg = std::make_shared<onert::ir::NNPkg>();
+    coptions.clear();
+    for (uint16_t i = 0; i < graphs.size(); ++i)
+    {
+      coptions.emplace_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+
+      auto model = std::make_shared<onert::ir::Model>();
+      model->push(SubgraphIndex{0}, graphs[i]);
+
+      nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+    }
+    for (const auto &pkg_input : edges.pkg_inputs)
+    {
+      nnpkg->addInput(pkg_input);
+    }
+    for (const auto &pkg_output : edges.pkg_outputs)
+    {
+      nnpkg->addOutput(pkg_output);
+    }
+    for (const auto &edge : edges.edges)
+    {
+      nnpkg->addEdge(edge.from, edge.to);
+    }
+    auto compiler = onert::compiler::CompilerFactory::get().create(nnpkg, coptions);
+    nnpkg.reset();
+    artifact = compiler->compile();
+  }
+
+public:
+  std::vector<std::shared_ptr<Graph>> graphs;
+  std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> coptions;
+  std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+  ModelEdges edges;
+};
+
 TEST(ExecInstance, simple)
 {
   auto mockup = CompiledMockUpModel();
@@ -209,7 +365,7 @@ class Inference
 {
 public:
   Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
-            std::shared_ptr<onert::exec::Executors> &executors)
+            std::shared_ptr<onert::exec::IExecutors> &executors)
     : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
   {
     // DO NOTHING
@@ -233,7 +389,7 @@ private:
   const float (&_input1)[4];
   const float (&_input2)[4];
   float (&_output)[4];
-  std::shared_ptr<onert::exec::Executors> &_executors;
+  std::shared_ptr<onert::exec::IExecutors> &_executors;
 };
 
 // Support multi-thread execution
@@ -299,4 +455,181 @@ TEST(ExecInstance, async)
   }
 }
 
+TEST(ExecInstance, multi_model_simple)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float input1_buffer[4] = {1, 0, -1, -2};
+  const float input2_buffer[4] = {1, -3, 2, -4};
+  float output_buffer[4] = {};
+  const float output_expected[4] = {7, -5, 1, -7};
+
+  onert::exec::Execution execution{executors};
+
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+TEST(ExecInstance, multi_model_twoCompile)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors1 = mockup.artifact->_executors;
+  onert::exec::Execution execution1{executors1};
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {7, -5, 1, -7};
+
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+  // Make new executor: compile again
+  mockup.compile();
+  onert::exec::Execution execution2{mockup.artifact->_executors};
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+  const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+  execution1.execute();
+  execution2.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, multi_model_twoExecution)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors = mockup.artifact->_executors;
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output1 = IOIndex{0};
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {7, -5, 1, -7};
+  const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+  onert::exec::Execution execution1{executors};
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+
+  // Make new execution
+  onert::exec::Execution execution2{executors};
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+  execution1.execute();
+  execution1.execute();
+  execution2.execute();
+  execution2.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+// Multi-model is not thread-safe yet
+
+// Support asynchronous execution
+TEST(ExecInstance, multi_model_async)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float input1_buffer[4] = {1, 0, -1, -2};
+  const float input2_buffer[4] = {1, -3, 2, -4};
+  float output_buffer[4] = {};
+  const float output_expected[4] = {7, -5, 1, -7};
+
+  onert::exec::Execution execution{executors};
+
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.startExecute();
+  execution.waitFinish();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+TEST(ExecInstance, multi_model_dequant_input_quant_output)
+{
+  auto mockup = CompiledMockUpMultiModel();
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const uint8_t input1_buffer[4] = {138, 128, 118, 108}; // {1, 0, -1, -2}
+  const uint8_t input2_buffer[4] = {138, 98, 148, 88};   // {1, -3, 2, -4}
+  uint8_t output_buffer[4] = {};
+  const uint8_t output_expected[4] = {198, 78, 138, 58}; // {7, -5, 1, -7}
+  float scale = 0.1;
+  int32_t zero_point = 128;
+
+  onert::exec::Execution execution{executors};
+
+  onert::ir::TypeInfo type_info{onert::ir::DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+  execution.setInput(input1, type_info, execution.getInputShape(input1),
+                     reinterpret_cast<const void *>(input1_buffer), 4, onert::ir::Layout::NHWC);
+  execution.setInput(input2, type_info, execution.getInputShape(input2),
+                     reinterpret_cast<const void *>(input2_buffer), 4, onert::ir::Layout::NHWC);
+  execution.setOutput(output, type_info, execution.getOutputShape(output),
+                      reinterpret_cast<void *>(output_buffer), 4, onert::ir::Layout::NHWC);
+  execution.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+// TODO Add an unittest multi_model_quant_input_dequant_output
+
 } // namespace
index d6a2bfd..66610f0 100644 (file)
@@ -28,7 +28,7 @@ void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
 
 void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
 {
-  for (auto &o : _observers)
+  for (auto &&o : _observers)
   {
     o->handleSubgraphBegin(ind);
   }
@@ -36,7 +36,7 @@ void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
 
 void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
 {
-  for (auto &o : _observers)
+  for (auto &&o : _observers)
   {
     o->handleSubgraphEnd(ind);
   }
@@ -45,7 +45,7 @@ void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
 void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind,
                                        ir::OperationIndex op_ind, const backend::Backend *backend)
 {
-  for (auto &o : _observers)
+  for (auto &&o : _observers)
   {
     o->handleJobBegin(executor, subg_ind, op_ind, backend);
   }
@@ -54,7 +54,7 @@ void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex su
 void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind,
                                      ir::OperationIndex op_ind, const backend::Backend *backend)
 {
-  for (auto &o : _observers)
+  for (auto &&o : _observers)
   {
     o->handleJobEnd(executor, subg_ind, op_ind, backend);
   }
index 1aadac2..91fbac3 100644 (file)
@@ -22,7 +22,7 @@
 #include "../util/EventRecorder.h"
 #include "../util/EventWriter.h"
 
-#include "exec/Executors.h"
+#include "exec/IExecutor.h"
 #include "ir/Index.h"
 #include "ir/Operation.h"
 #include "util/ITimer.h"
index d2d204a..515cf8e 100644 (file)
@@ -29,8 +29,8 @@ ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_gra
                            backend::BackendContexts &&backend_contexts,
                            const compiler::TensorRegistries &tensor_regs,
                            const util::TracingCtx *tracing_ctx)
-  : _lowered_graph{std::move(lowered_graph)}, _backend_contexts{std::move(backend_contexts)},
-    _graph{_lowered_graph->graph()}, _parent_graph{_lowered_graph->parent_graph()}, _mutex(),
+  : _lowered_graph{std::move(lowered_graph)},
+    _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(),
     _tracing_ctx(tracing_ctx)
 {
   auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
@@ -120,9 +120,27 @@ void ExecutorBase::execute(const IODescription &desc)
     {
       tensor->set_dynamic();
       tensor->setShape(input_shape->second);
+      /*
+       * Changes tensor shape and allocate memory since its shape was changed
+       * perhaps by nnfw_set_input_tensorinfo()
+       *
+       * Cases are:
+       * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+       *                                                 (a)          (b)
+       *
+       * at (a), operand is static, tensor is static - memory dealloc is not needed
+       *   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
+       * at (b), operand is static, tensor is dynamic - memory dealloc is needed
+       *
+       * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+       *                                                  (a)          (b)
+       *
+       * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
+       *                                       since it has not been allocated yet
+       * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
+       */
+      tensor->applyShape(input_shape->second);
     }
-
-    handleDynamicInputTensor(ir::IOIndex{i}, desc);
   }
 
   assert(_output_tensors.size() == desc.outputs.size());
@@ -156,38 +174,9 @@ void ExecutorBase::execute(const IODescription &desc)
   }
 }
 
-/**
- * @brief Changes tensor shape and allocate memory
- *        if input shape was changed by nnfw_set_input_tensorinfo()
- *
- * @note  Cases are:
- *        1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- *                                                        (a)          (b)
- *
- *           at (a), operand is static, tensor is static - memory dealloc is not needed
- *                   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
- *           at (b), operand is static, tensor is dynamic - memory dealloc is needed
- *
- *        2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- *                                                         (a)          (b)
- *
- *           at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
- *                                                           since it has not been allocated yet
- *           at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
- */
-void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
-{
-  auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
-  if (shape_sig_found != desc.dynamic_input_shapes.end())
-  {
-    auto changed_input_shape = shape_sig_found->second;
-    _input_tensors[io_ind.value()]->applyShape(changed_input_shape);
-  }
-}
-
 bool ExecutorBase::hasDynamicInput()
 {
-  for (auto &tensor : _input_tensors)
+  for (auto &&tensor : _input_tensors)
   {
     if (tensor->is_dynamic())
       return true;
index e4f9145..7aee3d9 100644 (file)
@@ -51,9 +51,7 @@ public:
 
   virtual ~ExecutorBase() = default;
 
-  const ir::Graph &graph() final { return _graph; }
-
-  const ir::Graph &parent_graph() final { return _parent_graph; }
+  const ir::Graph &graph() const final { return _graph; }
 
   void execute(const IODescription &desc) final;
 
@@ -70,6 +68,11 @@ public:
 
   void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
 
+  const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override
+  {
+    return _input_tensors;
+  }
+
   const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
   {
     return _output_tensors;
@@ -87,14 +90,10 @@ protected:
   std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
   backend::BackendContexts _backend_contexts;
   const ir::Graph &_graph;
-  const ir::Graph &_parent_graph;
   std::vector<backend::builtin::IOTensor *> _input_tensors;
   std::vector<backend::builtin::IOTensor *> _output_tensors;
   std::mutex _mutex;
   const util::TracingCtx *_tracing_ctx;
-
-private:
-  void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc);
 };
 
 } // namespace exec
index e0ee24f..3f4b3cc 100644 (file)
  * limitations under the License.
  */
 
-#include "exec/Executors.h"
+#include "Executors.h"
 
-namespace onert
-{
-namespace exec
+#include "../backend/builtin/IOTensor.h"
+
+namespace
 {
 
-uint32_t Executors::inputSize() const
+using namespace onert;
+
+int32_t find_input_index(const std::vector<ir::IODesc> &pkg_inputs,
+                         const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                         const ir::IOIndex &io_index)
 {
-  return _model_edges ? _model_edges->pkg_inputs.size()
-                      : _executors.at(ir::SubgraphIndex{0})->graph().getInputs().size();
+  for (size_t i = 0; i < pkg_inputs.size(); i++)
+  {
+    auto &input_desc = pkg_inputs[i];
+    if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+        (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+        (std::get<ir::IOIndex>(input_desc) == io_index))
+      return static_cast<int32_t>(i);
+  }
+  return -1;
 }
 
-uint32_t Executors::outputSize() const
+int32_t find_output_index(const std::vector<ir::IODesc> &pkg_outputs,
+                          const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                          const ir::IOIndex &io_index)
 {
-  return _model_edges ? _model_edges->pkg_outputs.size()
-                      : _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().size();
+  for (size_t i = 0; i < pkg_outputs.size(); i++)
+  {
+    auto &input_desc = pkg_outputs[i];
+    if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+        (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+        (std::get<ir::IOIndex>(input_desc) == io_index))
+      return static_cast<int32_t>(i);
+  }
+  return -1;
 }
 
-const ir::OperandInfo Executors::inputInfo(const ir::IOIndex &index)
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+class Executors::EdgeTensor : public backend::builtin::IOTensor
 {
-  if (_model_edges)
+public:
+  EdgeTensor(const ir::OperandInfo &info, ir::Layout layout)
+    : backend::builtin::IOTensor(info, layout), _buffer{nullptr}, _ref_count{0}
   {
-    // Assume that each model may have only one subgraph
-    // TODO handle general case
-    const auto desc = _model_edges->pkg_inputs[index.value()];
-    const auto model_idx = std::get<0>(desc);
-    const auto executor_idx = ir::SubgraphIndex{model_idx.value()};
-    const auto input_index = _executors.at(executor_idx)->graph().getInputs().at(std::get<2>(desc));
-    return _executors.at(executor_idx)->graph().operands().at(input_index).info();
   }
+  ~EdgeTensor() = default;
 
-  const auto input_index = _executors.at(ir::SubgraphIndex{0})->graph().getInputs().at(index);
-  return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(input_index).info();
-}
+  void allocate_buffer()
+  {
+    const auto total_size = orig_info().total_size();
+    _buffer = std::make_unique<uint8_t[]>(total_size);
+    _ref_count = 1;
 
-const ir::OperandInfo Executors::outputInfo(const ir::IOIndex &index)
-{
-  if (_model_edges)
+    // NOTE Executor's inputs/outputs are always IPortableTensor. If backend of inputs/outputs
+    //      is using tensor that does not inherit IPortableTensor, Permute operation is added
+    //      and all inputs/outputs become IPortableTensor at compile stage.
+    //      This allows user's buffers to be set to inputs/outputs of executors.
+    setUserTensor(_buffer.get(), total_size);
+  }
+
+  void increase_ref() { _ref_count++; }
+
+  void decrease_ref()
   {
-    // Assume that each model may have only one subgraph
-    // TODO handle general case
-    auto desc = _model_edges->pkg_outputs[index.value()];
-    auto model_idx = std::get<0>(desc);
-    auto executor_idx = ir::SubgraphIndex{model_idx.value()};
-    auto output_index = _executors.at(executor_idx)->graph().getOutputs().at(std::get<2>(desc));
-    return _executors.at(executor_idx)->graph().operands().at(output_index).info();
+    assert(_ref_count > 0);
+    _ref_count--;
+    if (_ref_count == 0)
+    {
+      _buffer.reset();
+      setUserTensor(nullptr, orig_info().total_size());
+    }
   }
 
-  auto output_index = _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(index);
-  return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(output_index).info();
+private:
+  std::unique_ptr<uint8_t[]> _buffer;
+  int32_t _ref_count;
+};
+
+void Executors::emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                        std::unique_ptr<IExecutor> exec)
+{
+  _executors.emplace(std::make_pair(model_index, subg_index), std::move(exec));
 }
 
-void Executors::execute(const IODescription &desc)
+IExecutor *Executors::at(const ir::ModelIndex &model_index,
+                         const ir::SubgraphIndex &subg_index) const
+{
+  return _executors.at(std::make_pair(model_index, subg_index)).get();
+}
+
+uint32_t Executors::inputSize() const { return _model_edges->pkg_inputs.size(); }
+
+uint32_t Executors::outputSize() const { return _model_edges->pkg_outputs.size(); }
+
+const ir::OperandInfo &Executors::inputInfo(const ir::IOIndex &index) const
 {
-  if (_model_edges)
-    return executeEntries(desc);
+  auto const desc = _model_edges->pkg_inputs[index.value()];
+  auto const model_index = std::get<0>(desc);
+  auto const subg_index = std::get<1>(desc);
+  auto const io_index = std::get<2>(desc);
+  auto const executor = at(model_index, subg_index);
+  return executor->getInputTensors().at(io_index.value())->orig_info();
+}
 
-  _executors.at(ir::SubgraphIndex{0})->execute(desc);
+const ir::OperandInfo &Executors::outputInfo(const ir::IOIndex &index) const
+{
+  auto const desc = _model_edges->pkg_outputs[index.value()];
+  auto const model_index = std::get<0>(desc);
+  auto const subg_index = std::get<1>(desc);
+  auto const io_index = std::get<2>(desc);
+  auto const executor = at(model_index, subg_index);
+  return executor->getOutputTensors().at(io_index.value())->orig_info();
 }
 
-void Executors::executeEntries(const IODescription &desc)
+// Allow below edges only
+//  m1 < m2, s1 == 0 and s2 == 0 if m1:s1:o1 -> m2:s2:o2'
+void Executors::checkSupportedMultimodel() const
 {
-  // Assume 2 executors only
-  // Assume that each model may have only one subgraph
-  // TODO Support general case
-  if (_executors.size() != 2)
-    throw std::runtime_error{"NYI: Multi model execution for this package is not supported yet"};
+  // If package includes no-connection model, model_count is less than real model count in package.
+  // Then this method will throw exception based on model index
+  //  1st model: input assumption
+  //  Otherwise: edges assumption
 
-  // Assume all edges are 0:0:x -> 1:0:x
+  // Assumption: edges
+  // m1 < m2, s1 == 0 and s2 == 0 if edge 'm1:s1:o1 -> m2:s2:o2'
   for (auto edge : _model_edges->edges)
   {
-    if ((std::get<ir::ModelIndex>(edge.from) != ir::ModelIndex{0}) ||
-        (std::get<ir::ModelIndex>(edge.to) != ir::ModelIndex{1}) ||
-        (std::get<ir::SubgraphIndex>(edge.from) != ir::SubgraphIndex{0}) ||
-        (std::get<ir::SubgraphIndex>(edge.to) != ir::SubgraphIndex{0}) ||
-        (std::get<ir::IOIndex>(edge.from) != std::get<ir::IOIndex>(edge.to)))
-      throw std::runtime_error{"NYI: Multi model execution for this edge is not supported yet"};
+    auto const model_from = std::get<ir::ModelIndex>(edge.from);
+    auto const model_to = std::get<ir::ModelIndex>(edge.to);
+    auto const subg_from = std::get<ir::SubgraphIndex>(edge.from);
+    auto const subg_to = std::get<ir::SubgraphIndex>(edge.to);
+
+    if (model_from.value() == model_to.value())
+    {
+      throw std::runtime_error{"Multi model's edge set has invalid edge"};
+    }
+
+    if ((model_from.value() > model_to.value()) || (subg_from != ir::SubgraphIndex{0}) ||
+        (subg_to != ir::SubgraphIndex{0}))
+      throw std::runtime_error{"NYI: Multi model execution for this edge set is not supported yet"};
   }
 
-  // Assume all package inputs are 0:0:x
-  for (uint32_t i = 0; i < _model_edges->pkg_inputs.size(); i++)
+  // Assumption: package inputs
+  //  All 1st model inputs come from package input if always m1 < m2
   {
-    auto input = _model_edges->pkg_inputs[i];
-    if ((std::get<ir::ModelIndex>(input) != ir::ModelIndex{0}) ||
-        (std::get<ir::SubgraphIndex>(input) != ir::SubgraphIndex{0}) ||
-        (std::get<ir::IOIndex>(input) != ir::IOIndex{i}))
+    auto first_executor = at(ir::ModelIndex{0}, ir::SubgraphIndex{0});
+    auto search_first_model = [&](const ir::IOIndex &input_index) {
+      for (const auto &input : _model_edges->pkg_inputs)
+      {
+        if ((std::get<ir::ModelIndex>(input) == ir::ModelIndex{0}) ||
+            (std::get<ir::SubgraphIndex>(input) == ir::SubgraphIndex{0}) ||
+            (std::get<ir::IOIndex>(input) == input_index))
+          return true;
+      }
+
+      return false;
+    };
+
+    for (uint32_t i = 0; i < first_executor->getInputTensors().size(); i++)
     {
-      throw std::runtime_error{"NYI: Support package input to 1st model with same order"};
+      if (!search_first_model(ir::IOIndex{i}))
+        throw std::runtime_error{"Cannot find 1st model's input buffer"};
     }
   }
 
-  // Assume all package outputs are 1:0:x
-  for (uint32_t i = 0; i < _model_edges->pkg_outputs.size(); i++)
+  // Check whether nnpkg outputs and Edge `from` are duplicated
+  for (const auto &edge : _model_edges->edges)
   {
-    auto output = _model_edges->pkg_outputs[i];
-    if ((std::get<ir::ModelIndex>(output) != ir::ModelIndex{1}) ||
-        (std::get<ir::SubgraphIndex>(output) != ir::SubgraphIndex{0}) ||
-        (std::get<ir::IOIndex>(output) != ir::IOIndex{i}))
+    if (std::find(_model_edges->pkg_outputs.begin(), _model_edges->pkg_outputs.end(), edge.from) !=
+        _model_edges->pkg_outputs.end())
     {
-      throw std::runtime_error{"NYI: Support package output from 2nd model with same order"};
+      throw std::runtime_error{"Multi model execution does not support duplicating nnpkg outputs "
+                               "with `from` of edges yet"};
     }
   }
+}
+
+void Executors::createEdgeQuantLayers()
+{
+  if (_is_created_edge_quant_layers)
+  {
+    return;
+  }
 
-  const auto &executor1 = _executors.at(ir::SubgraphIndex{0});
-  const auto &graph1 = executor1->graph();
-  const auto &executor2 = _executors.at(ir::SubgraphIndex{1});
-  const auto &graph2 = executor2->graph();
+  // Create EdgeTensor for edges between executors
+  for (const auto &pair : _edge_map)
+  {
+    const auto &from_iodesc = pair.first;
+    const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+    const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+    const auto &from_io_index = std::get<ir::IOIndex>(from_iodesc);
+
+    const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+    const auto from_tensor = from_executor->getOutputTensors().at(from_io_index.value());
+
+    const auto &from_info = from_tensor->orig_info();
+    const auto from_layout = from_tensor->orig_layout();
+    _edge_tensors[from_iodesc] = std::make_unique<EdgeTensor>(from_info, from_layout);
+  }
 
-  if ((graph1.getInputs().size() != _model_edges->pkg_inputs.size()) ||
-      (graph2.getOutputs().size() != _model_edges->pkg_outputs.size()) ||
-      (graph1.getOutputs().size() != graph2.getInputs().size()) ||
-      (graph1.getOutputs().size() != _model_edges->edges.size()))
+  // Append type-aware quantization layer for edges between executors
+  for (const auto &executor_pair : _executors)
   {
-    throw std::runtime_error{"NYI: Unsupported model edge pattern"};
+    const auto &executor_index = executor_pair.first;
+    const auto &model_index = executor_index.first;
+    const auto &subg_index = executor_index.second;
+
+    std::vector<backend::ITensor *> inputs;
+    std::vector<backend::ITensor *> outputs;
+    for (const auto &pair : _edge_map)
+    {
+      const auto &from_iodesc = pair.first;
+      if (std::get<ir::ModelIndex>(from_iodesc) == model_index &&
+          std::get<ir::SubgraphIndex>(from_iodesc) == subg_index)
+      {
+        const auto from_tensor = _edge_tensors[from_iodesc].get();
+        const auto &to_list = pair.second;
+
+        for (const auto &to_iodesc : to_list)
+        {
+          const auto &to_model_index = std::get<ir::ModelIndex>(to_iodesc);
+          const auto &to_subg_index = std::get<ir::SubgraphIndex>(to_iodesc);
+          const auto &to_io_index = std::get<ir::IOIndex>(to_iodesc);
+
+          const auto to_executor = _executors.at({to_model_index, to_subg_index}).get();
+          const auto to_tensor = to_executor->getInputTensors().at(to_io_index.value());
+
+          // TODO Unify tensors with the same `from` tensor and same type
+          if (from_tensor->data_type() != to_tensor->data_type())
+          {
+            assert(inputs.size() == outputs.size());
+            const auto &to_info =
+              to_executor->getInputTensors().at(to_io_index.value())->orig_info();
+            const auto to_layout = to_tensor->orig_layout();
+            inputs.emplace_back(from_tensor);
+
+            auto type_aware_quant_tensor = std::make_unique<EdgeTensor>(to_info, to_layout);
+            outputs.emplace_back(type_aware_quant_tensor.get());
+
+            _edge_quant_tensors[to_iodesc] = std::move(type_aware_quant_tensor);
+          }
+        }
+      }
+    }
+
+    auto layer = std::make_unique<PermuteLayer>(inputs, outputs);
+    layer->prepare();
+    _edge_quant_layers[{model_index, subg_index}] = std::move(layer);
   }
 
-  // Prepare buffer
-  // Assume buffer layout is NHWC
-  std::vector<std::unique_ptr<uint8_t[]>> bufs(_model_edges->edges.size());
-  std::vector<const ir::OperandInfo *> buf_infos(_model_edges->edges.size());
-  const auto layout = ir::Layout::NHWC;
+  _is_created_edge_quant_layers = true;
+}
 
-  for (uint32_t i = 0; i < graph1.getOutputs().size(); i++)
+void Executors::CreatePkgIOTensors(const IODescription &desc)
+{
+  for (const auto &pkg_input : _model_edges->pkg_inputs)
   {
-    const auto buf_index =
-      _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(ir::IOIndex{i});
-    buf_infos[i] = &_executors.at(ir::SubgraphIndex{0})->graph().operands().at(buf_index).info();
-    const auto buf_size = buf_infos[i]->total_size();
-    bufs[i] = std::make_unique<uint8_t[]>(buf_size);
+    // Create IOTensor for nnpkg inputs
+    const auto &model_index = std::get<ir::ModelIndex>(pkg_input);
+    const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_input);
+    const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+    const auto input_pkg_index =
+      find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+    auto input_desc = desc.inputs[input_pkg_index].get();
+    _pkg_input_tensors[pkg_input] =
+      std::make_unique<backend::builtin::IOTensor>(input_desc->info, input_desc->layout);
   }
 
-  // 1st executor
+  for (const auto &pkg_output : _model_edges->pkg_outputs)
   {
-    IODescription desc1;
-    const auto input_size = graph1.getInputs().size();
-    const auto output_size = graph1.getOutputs().size();
-    desc1.inputs.resize(input_size);
-    desc1.outputs.resize(output_size);
-    for (uint32_t i = 0; i < input_size; i++)
-      desc1.inputs[i] = std::make_unique<InputDesc>(*desc.inputs[i].get());
-    for (uint32_t i = 0; i < output_size; i++)
-      desc1.outputs[i] = std::make_unique<OutputDesc>(*buf_infos[i], bufs[i].get(),
-                                                      buf_infos[i]->total_size(), layout);
+    // Create IOTensor for nnpkg outputs
+    const auto &model_index = std::get<ir::ModelIndex>(pkg_output);
+    const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_output);
+    const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+    const auto output_pkg_index =
+      find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+    auto output_desc = desc.outputs[output_pkg_index].get();
+    _pkg_output_tensors[pkg_output] =
+      std::make_unique<backend::builtin::IOTensor>(output_desc->info, output_desc->layout);
+  }
+}
 
-    executor1->execute(desc1);
+void Executors::createPkgIOQuantLayers(const IODescription &desc)
+{
+  // Append type-aware quantization layer for nnpkg inputs/outputs between executors
+  for (const auto &pair : _executors)
+  {
+    const auto &executor_index = pair.first;
+    const auto &model_index = executor_index.first;
+    const auto &subg_index = executor_index.second;
+    const auto executor = pair.second.get();
+
+    // Find pkg inputs of current executor
+    std::vector<ir::IODesc> pkg_inputs;
+    for (const auto &pkg_input : _model_edges->pkg_inputs)
+    {
+      if (std::get<ir::ModelIndex>(pkg_input) == model_index &&
+          std::get<ir::SubgraphIndex>(pkg_input) == subg_index)
+      {
+        pkg_inputs.emplace_back(pkg_input);
+      }
+    }
+    std::vector<backend::ITensor *> src_tensors;
+    std::vector<backend::ITensor *> dst_tensors;
+    for (const auto &pkg_input : pkg_inputs)
+    {
+      const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+      const auto input_pkg_index =
+        find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+      auto input_desc = desc.inputs[input_pkg_index].get();
+
+      // Create EdgeTensor for nnpkg input if type is different
+      const auto input_tensor =
+        executor->getInputTensors().at(std::get<ir::IOIndex>(pkg_input).value());
+      const auto &orig_info = input_tensor->orig_info();
+      if (input_desc->info.typeInfo().type() != input_tensor->orig_info().typeInfo().type())
+      {
+        const auto orig_layout = input_tensor->orig_layout();
+        auto pkg_input_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+        _pkg_input_quant_tensors[pkg_input] = std::move(pkg_input_edge_tensor);
+
+        // Append type-aware quantization layer's inputs/outputs
+        src_tensors.emplace_back(_pkg_input_tensors[pkg_input].get());
+        dst_tensors.emplace_back(_pkg_input_quant_tensors[pkg_input].get());
+      }
+    }
+
+    // Create type-aware quantization layer for nnpkg inputs
+    auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+    pkg_input_layer->prepare();
+    _pkg_input_quant_layers[{model_index, subg_index}] = std::move(pkg_input_layer);
+
+    // Find pkg outputs of current executor
+    std::vector<ir::IODesc> pkg_outputs;
+    for (const auto &pkg_output : _model_edges->pkg_outputs)
+    {
+      if (std::get<ir::ModelIndex>(pkg_output) == model_index &&
+          std::get<ir::SubgraphIndex>(pkg_output) == subg_index)
+      {
+        pkg_outputs.emplace_back(pkg_output);
+      }
+    }
+    src_tensors.clear();
+    dst_tensors.clear();
+    // Create Tensors of nnpkg outputs for type-aware quantization
+    for (const auto &pkg_output : pkg_outputs)
+    {
+      const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+      const auto output_pkg_index =
+        find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+      auto output_desc = desc.outputs[output_pkg_index].get();
+
+      // Create EdgeTensor for nnpkg output if type is different
+      const auto output_tensor =
+        executor->getOutputTensors().at(std::get<ir::IOIndex>(pkg_output).value());
+      const auto &orig_info = output_tensor->orig_info();
+      if (output_desc->info.typeInfo().type() != output_tensor->orig_info().typeInfo().type())
+      {
+        const auto orig_layout = output_tensor->orig_layout();
+        auto pkg_output_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+        _pkg_output_quant_tensors[pkg_output] = std::move(pkg_output_edge_tensor);
+
+        // Append type-aware quantization layer's inputs/outputs
+        src_tensors.emplace_back(_pkg_output_quant_tensors[pkg_output].get());
+        dst_tensors.emplace_back(_pkg_output_tensors[pkg_output].get());
+      }
+    }
+
+    // Create type-aware quantization layer for nnpkg outputs
+    auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+    pkg_output_layer->prepare();
+    _pkg_output_quant_layers[{model_index, subg_index}] = std::move(pkg_output_layer);
   }
+}
+
+void Executors::execute(const IODescription &desc)
+{
+  // Check supported multi model package
+  checkSupportedMultimodel();
+
+  // TODO Move creating type-aware quantization layers for edges in compilation stage
+  createEdgeQuantLayers();
+
+  // TODO Create IOTensors only once and recreate them only if nnpkg info changes
+  CreatePkgIOTensors(desc);
+
+  // TODO Create type-aware quantization layers only once and recreate them only if type changes
+  createPkgIOQuantLayers(desc);
 
-  // 2nd executor
+  // TODO Find better way to schedule order of executors
+  auto const model_count = modelCount();
+
+  auto find_from = [&](const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+                       const ir::IOIndex &io_index) {
+    for (const auto &edge : _model_edges->edges)
+    {
+      if ((std::get<ir::ModelIndex>(edge.to) == model_index) &&
+          (std::get<ir::SubgraphIndex>(edge.to) == subg_index) &&
+          (std::get<ir::IOIndex>(edge.to) == io_index))
+        return edge.from;
+    }
+
+    throw std::runtime_error{"Cannot find edge for model input"};
+  };
+
+  // Execute each model
+  // NOTE May be better to use vector instead of unordered_map for _executors
+  for (auto model_index = ir::ModelIndex{0}; model_index.value() < model_count; model_index++)
   {
-    IODescription desc2;
-    const auto input_size = graph2.getInputs().size();
-    const auto output_size = graph2.getOutputs().size();
-    desc2.inputs.resize(input_size);
-    desc2.outputs.resize(output_size);
+    // Find executor
+    auto executor = at(model_index, ir::SubgraphIndex{0});
+
+    // Set IOTensors
+    // TODO Set internal IOTensors only once
+    std::vector<backend::IPortableTensor *> inputs_inter;
+    std::vector<backend::IPortableTensor *> outputs_inter;
+    const auto &input_tensors = executor->getInputTensors();
+    const auto &output_tensors = executor->getOutputTensors();
+    auto const input_size = input_tensors.size();
+    auto const output_size = output_tensors.size();
+    inputs_inter.resize(input_size);
+    outputs_inter.resize(output_size);
+
+    // Set inputs of executor
+    // TODO Create layer to allocate/deallocate buffers of EdgeTensor for each executor
     for (uint32_t i = 0; i < input_size; i++)
-      desc2.inputs[i] = std::make_unique<InputDesc>(*buf_infos[i], bufs[i].get(),
-                                                    buf_infos[i]->total_size(), layout);
+    {
+      const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+                                                    ir::SubgraphIndex{0}, ir::IOIndex{i});
+      const auto input_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+      if (input_pkg_index != -1)
+      {
+        // Allocate type-aware quantization tensors for nnpkg inputs and set internal tensors
+        if (_pkg_input_quant_tensors.find(input_io_desc) != _pkg_input_quant_tensors.end())
+        {
+          _pkg_input_quant_tensors[input_io_desc]->allocate_buffer();
+
+          inputs_inter[i] = _pkg_input_quant_tensors[input_io_desc].get();
+        }
+        else
+        {
+          inputs_inter[i] = _pkg_input_tensors[input_io_desc].get();
+        }
+
+        // Set buffer of IOTensor
+        auto input_desc = desc.inputs[input_pkg_index].get();
+        // TODO Remove const_cast (we need const_cast as ITensor is writable)
+        _pkg_input_tensors[input_io_desc]->setUserTensor(
+          reinterpret_cast<uint8_t *>(const_cast<void *>(input_desc->buffer)), input_desc->size);
+      }
+      else
+      {
+        auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+        const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+        const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+        const auto &from_ioindex = std::get<ir::IOIndex>(from_iodesc).value();
+
+        // Supported only sequantial execution of models
+        assert(from_model_index.value() < model_index.value());
+        assert(from_subg_index.value() == 0);
+        const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+        const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+        if (_edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end())
+        {
+          inputs_inter[i] = from_executor->getOutputTensors().at(from_ioindex);
+        }
+        else
+        {
+          inputs_inter[i] = _edge_quant_tensors.at(to_iodesc).get();
+        }
+        assert(inputs_inter[i]->buffer() != nullptr);
+      }
+    }
+
+    // Set outputs of executor
     for (uint32_t i = 0; i < output_size; i++)
-      desc2.outputs[i] = std::make_unique<OutputDesc>(*desc.outputs[i].get());
+    {
+      const auto output_pkg_index = find_output_index(_model_edges->pkg_outputs, model_index,
+                                                      ir::SubgraphIndex{0}, ir::IOIndex{i});
+      const auto output_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+      if (output_pkg_index != -1)
+      {
+        // Allocate type-aware quantization tensors for nnpkg outputs and set internal tensors
+        if (_pkg_output_quant_tensors.find(output_io_desc) != _pkg_output_quant_tensors.end())
+        {
+          _pkg_output_quant_tensors[output_io_desc]->allocate_buffer();
+
+          outputs_inter[i] = _pkg_output_quant_tensors[output_io_desc].get();
+        }
+        else
+        {
+          outputs_inter[i] = _pkg_output_tensors[output_io_desc].get();
+        }
+
+        // Set buffer of IOTensor
+        auto output_desc = desc.outputs[output_pkg_index].get();
+        _pkg_output_tensors[output_io_desc]->setUserTensor(
+          reinterpret_cast<uint8_t *>(output_desc->buffer), output_desc->size);
+      }
+      else
+      {
+        // Allocate buffer of `from` tensors
+        const auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+        _edge_tensors[from_iodesc]->allocate_buffer();
+        outputs_inter[i] = _edge_tensors[from_iodesc].get();
 
-    executor2->execute(desc2);
+        // Allocate buffer of tensors for type-aware quantization
+        for (const auto &to_iodesc : _edge_map[from_iodesc])
+        {
+          _edge_tensors[from_iodesc]->increase_ref();
+          if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+          {
+            auto type_aware_quant_tensor = _edge_quant_tensors.at(to_iodesc).get();
+            type_aware_quant_tensor->allocate_buffer();
+
+            _edge_tensors[from_iodesc]->decrease_ref();
+          }
+        }
+      }
+    }
+
+    _pkg_input_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+    executor->execute(inputs_inter, outputs_inter);
+
+    _edge_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+    _pkg_output_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+    // Release input buffers that are no longer needed
+    for (uint32_t i = 0; i < input_size; i++)
+    {
+      const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+                                                    ir::SubgraphIndex{0}, ir::IOIndex{i});
+
+      const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+      if (input_pkg_index == -1)
+      {
+        if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+        {
+          // Decrease reference count of tensor for type-aware quantization if input tensor is the
+          // tensor
+          const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+          if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+          {
+            _edge_quant_tensors[to_iodesc]->decrease_ref();
+          }
+        }
+        else
+        {
+          // Decrease reference count of `from` tensor if input tensor is the `from` tensor
+          const auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+          _edge_tensors[from_iodesc]->decrease_ref();
+
+          // Decrease reference count of nnpkg inputs
+          if (_pkg_input_quant_tensors.find(to_iodesc) != _pkg_input_quant_tensors.end())
+          {
+            _pkg_input_quant_tensors[to_iodesc]->decrease_ref();
+          }
+        }
+      }
+    }
+
+    // Release output buffers if those buffers are no longer used other executors because of
+    // type-aware quantization
+    // FIXME if tensors for type-aware quantization unified for the same `from` tensor and same type
+    for (uint32_t i = 0; i < output_size; i++)
+    {
+      auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+
+      // Check if other executors will use the buffer of edge tensor
+      const auto &to_list = _edge_map[from_iodesc];
+      if (to_list.size() == 0)
+      {
+        // This condition means `from_iodesc` tensor is an output of nnpkg
+        continue;
+      }
+
+      bool to_be_release =
+        !std::any_of(to_list.begin(), to_list.end(), [&](const ir::IODesc &to_iodesc) {
+          // This condition means another executor uses the buffer of edge tensor
+          return _edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end();
+        });
+
+      if (to_be_release)
+      {
+        // This edge tensor's buffer won't be used in other executors
+        // Tensors for type-aware quantization take over the role of this edge tensor instead
+        _edge_tensors[from_iodesc]->decrease_ref();
+      }
+
+      // Decrease reference count of nnpkg outputs
+      if (_pkg_output_quant_tensors.find(from_iodesc) != _pkg_output_quant_tensors.end())
+      {
+        _pkg_output_quant_tensors[from_iodesc]->decrease_ref();
+      }
+    }
   }
 }
 
+// modelCount() iterates _executors.
+// It assumes that Compiler will generate Executor for all models and _executors includes all
+// generated Executor.
+// If nnpackage includes model(s) which has no connection and Compiler does not
+// generate Executor for them, modelCount() return less value than real model count.
+uint16_t Executors::modelCount() const
+{
+  uint16_t model_count = 0;
+  for (; _executors.find(std::make_pair(ir::ModelIndex{model_count}, ir::SubgraphIndex{0})) !=
+         _executors.end();
+       model_count++)
+    ;
+
+  return model_count;
+}
+
 } // namespace exec
 } // namespace onert
diff --git a/runtime/onert/core/src/exec/Executors.h b/runtime/onert/core/src/exec/Executors.h
new file mode 100644 (file)
index 0000000..ac74891
--- /dev/null
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTORS_H__
+#define __ONERT_EXEC_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+#include "IPermuteFunction.h"
+
+namespace std
+{
+
+template <> struct hash<std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex>>
+{
+  size_t
+  operator()(const std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex> &pair) const
+    noexcept
+  {
+    return (hash<uint32_t>()(pair.first.value()) << 16) ^ hash<uint32_t>()(pair.second.value());
+  }
+};
+
+} // namespace std
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executors
+ */
+class Executors : public IExecutors
+{
+public:
+  Executors(void) = delete;
+  Executors(std::unique_ptr<ir::ModelEdges> model_edges)
+    : _executors{}, _model_edges{std::move(model_edges)}, _edge_quant_layers{},
+      _edge_quant_tensors{}, _edge_tensors{}, _is_created_edge_quant_layers{false},
+      _pkg_input_quant_layers{}, _pkg_output_quant_layers{}, _pkg_input_quant_tensors{},
+      _pkg_output_quant_tensors{}, _pkg_input_tensors{}, _pkg_output_tensors{}
+  {
+    for (const auto &edge : _model_edges->edges)
+    {
+      _edge_map[edge.from].emplace_back(edge.to);
+    }
+  }
+  Executors(const Executors &) = delete;
+  Executors(Executors &&) = default;
+  ~Executors() = default;
+
+  // TODO Use Executor index
+  void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+               std::unique_ptr<IExecutor> exec) override;
+
+  IExecutor *at(const ir::ModelIndex &model_index,
+                const ir::SubgraphIndex &subg_index) const override;
+
+  uint32_t inputSize() const override;
+
+  uint32_t outputSize() const override;
+
+  const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+  const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+  void execute(const IODescription &desc) override;
+
+private:
+  void checkSupportedMultimodel() const;
+  void createEdgeQuantLayers();
+  void CreatePkgIOTensors(const IODescription &desc);
+  void createPkgIOQuantLayers(const IODescription &desc);
+  uint16_t modelCount() const;
+
+private:
+  // TODO Remove this class
+  class PermuteLayer : public exec::IPermuteFunction
+  {
+  public:
+    PermuteLayer(const std::vector<backend::ITensor *> &inputs,
+                 const std::vector<backend::ITensor *> &outputs)
+    {
+      assert(inputs.size() == outputs.size());
+      _src_tensors = inputs;
+      _dst_tensors = outputs;
+    }
+    virtual ~PermuteLayer() {}
+    void optimize() override {}
+  };
+
+  class EdgeTensor;
+
+private:
+  std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<IExecutor>>
+    _executors;
+
+  // NOTE _model_edges may use different struct type for executor implementation
+  std::unique_ptr<ir::ModelEdges> _model_edges;
+  std::unordered_map<ir::IODesc, std::vector<ir::IODesc>> _edge_map;
+
+  /**
+   * @brief Type-aware quantization layers for edges between executors
+   *
+   */
+  // TODO Move variables related to type-aware quantization for edges into compilation stage
+  // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+  std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+    _edge_quant_layers;
+
+  /**
+   * @brief Tensors for type-aware quantization of edges
+   *        Key: `to` IODesc, Value: EdgeTensor
+   */
+  //
+  // Q: Why is Key `to` IODesc
+  // A: these tensors are currently created depending on the type of `to`
+  // TODO Unify tensors with the same `from` tensor and same type
+  // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+  std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_quant_tensors;
+
+  /**
+   * @brief Tensors for edges between executors that are not related to type-aware quantization
+   *        Key: `from` IODesc, Value: EdgeTensor
+   */
+  // Q: Why is Key `from` IODesc
+  // A: `from` can be connected to multiple `to`
+  // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+  std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_tensors;
+  /**
+   * @brief Whether type-aware quantization layers for edges between executors are created
+   *
+   */
+  // TODO Remove this member after the creation of type-aware quantization layers for edges
+  //      is moved into compilation stage
+  bool _is_created_edge_quant_layers;
+
+  // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+  std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+    _pkg_input_quant_layers;
+  // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+  std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+    _pkg_output_quant_layers;
+  // Edge tensors of nnpkg inputs/outputs for type-aware quantization
+  std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_input_quant_tensors;
+  std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_output_quant_tensors;
+  // IOTensors for user buffer
+  std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_input_tensors;
+  std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_output_tensors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.cc b/runtime/onert/core/src/exec/IPermuteFunction.cc
new file mode 100644 (file)
index 0000000..9d548e6
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Dequantize.h>
+#include "backend/IPortableTensor.h"
+#include "exec/IFunction.h"
+#include "ir/Index.h"
+#include "ir/Shape.h"
+#include <memory>
+#include <misc/polymorphic_downcast.h>
+#include <typeinfo>
+#include "util/Utils.h"
+#include <vector>
+#include <unordered_map>
+
+namespace
+{
+using namespace onert;
+
+inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)
+{
+  const ir::Shape shape = tensor->getShape();
+
+  assert(tensor->layout() == ir::Layout::NHWC);
+
+  auto rank = shape.rank();
+  nnfw::cker::Shape ret(rank);
+  auto data = ret.DimsData();
+  for (int i = 0; i < rank; ++i)
+  {
+    data[i] = shape.dim(i);
+  }
+  return ret;
+}
+
+// Quantize per element
+template <typename InputT, typename OutputT>
+void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+  const auto scale = dst_tensor->data_scale();
+  const auto zero_point = dst_tensor->data_zero_point();
+
+  int min_val = std::numeric_limits<OutputT>::min();
+  int max_val = std::numeric_limits<OutputT>::max();
+
+  auto loop_shape = src_tensor->getShape();
+  const auto src_layout = src_tensor->layout();
+  const auto dst_layout = dst_tensor->layout();
+  const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+  ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+    const InputT *input_data =
+      reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+    int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
+    int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+
+    ir::Coordinates dst_coords =
+      is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+    OutputT *output_data =
+      reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+    *output_data = clamped;
+  });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+  if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+      src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+  {
+    assert(!dst_tensor->is_dynamic());
+
+    // Call optimized neon kernel
+    nnfw::cker::Quantize(getShape(src_tensor),
+                         reinterpret_cast<const InputT *>(src_tensor->buffer()),
+                         getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+                         dst_tensor->data_scale(), dst_tensor->data_zero_point());
+  }
+  else
+  {
+    elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
+  }
+}
+
+// Dequantize per element
+template <typename InputT, typename OutputT>
+void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+  const auto scale = src_tensor->data_scale();
+  const auto zero_point = src_tensor->data_zero_point();
+
+  auto loop_shape = src_tensor->getShape();
+  const auto src_layout = src_tensor->layout();
+  const auto dst_layout = dst_tensor->layout();
+  const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+  ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+    const InputT *input_data =
+      reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+    const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));
+
+    ir::Coordinates dst_coords =
+      is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+    OutputT *output_data =
+      reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+    *output_data = result;
+  });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+  if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+      src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+  {
+    assert(!dst_tensor->is_dynamic());
+
+    // Call optimized neon kernel
+    nnfw::cker::Dequantize(getShape(src_tensor),
+                           reinterpret_cast<const InputT *>(src_tensor->buffer()),
+                           getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+                           src_tensor->data_scale(), src_tensor->data_zero_point());
+  }
+  else
+  {
+    elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
+  }
+}
+
+template <typename SRC_T, typename DST_T,
+          std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
+                             std::is_base_of<backend::ITensor, DST_T>::value,
+                           bool> = true>
+void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
+{
+  // TODO Support other types
+  if (src_tensor->data_type() == ir::DataType::FLOAT32)
+  {
+    switch (dst_tensor->data_type())
+    {
+      case ir::DataType::QUANT_UINT8_ASYMM:
+      {
+        quantize<float, uint8_t>(src_tensor, dst_tensor);
+        break;
+      }
+      case ir::DataType::QUANT_INT8_SYMM:
+      {
+        quantize<float, int8_t>(src_tensor, dst_tensor);
+        break;
+      }
+      case ir::DataType::QUANT_INT16_SYMM:
+      {
+        quantize<float, int16_t>(src_tensor, dst_tensor);
+        break;
+      }
+      default:
+      {
+        throw std::runtime_error("IPermuteFunction: Unsupported quantization type");
+        break;
+      }
+    }
+  }
+  else if (dst_tensor->data_type() == ir::DataType::FLOAT32)
+  {
+    switch (src_tensor->data_type())
+    {
+      case ir::DataType::QUANT_UINT8_ASYMM:
+      {
+        dequantize<uint8_t, float>(src_tensor, dst_tensor);
+        break;
+      }
+      case ir::DataType::QUANT_INT8_SYMM:
+      {
+        dequantize<int8_t, float>(src_tensor, dst_tensor);
+        break;
+      }
+      case ir::DataType::QUANT_INT16_SYMM:
+      {
+        dequantize<int16_t, float>(src_tensor, dst_tensor);
+        break;
+      }
+      default:
+      {
+        throw std::runtime_error("IPermuteFunction: Unsupported dequantization type");
+        break;
+      }
+    }
+  }
+  else
+  {
+    throw std::runtime_error("IPermuteFunction: Unsupported type for type-aware quantization yet");
+  }
+}
+
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+void IPermuteFunction::IPermuteFunction::run()
+{
+  // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
+  assert(_src_tensors.size() == _dst_tensors.size());
+  if (_src_tensors_offsets.size() == 0)
+  {
+    _src_tensors_offsets.resize(_src_tensors.size());
+    _dst_tensors_offsets.resize(_dst_tensors.size());
+  }
+  assert(_src_tensors.size() == _src_tensors_offsets.size());
+  assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
+
+  for (size_t i = 0; i < _src_tensors.size(); ++i)
+  {
+    auto src_tensor = _src_tensors.at(i);
+    auto dst_tensor = _dst_tensors.at(i);
+    auto &src_offsets = _src_tensors_offsets.at(i);
+    auto &dst_offsets = _dst_tensors_offsets.at(i);
+    if (src_tensor != dst_tensor)
+    {
+      const auto rank = src_tensor->getShape().rank();
+      permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+    }
+  }
+}
+
+void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+                               size_t rank, std::vector<size_t> &src_offsets,
+                               std::vector<size_t> &dst_offsets)
+{
+  if (src_tensor->total_size() == 0)
+  {
+    assert(dst_tensor->total_size() == 0);
+    return;
+  }
+
+  assert(src_tensor != dst_tensor);
+  if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
+  {
+    typeAwareQuantize(src_tensor, dst_tensor);
+    return;
+  }
+
+  switch (src_tensor->data_type())
+  {
+    case ir::DataType::FLOAT32:
+      permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::INT32:
+      permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::UINT32:
+      permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::BOOL8:
+    case ir::DataType::QUANT_UINT8_ASYMM:
+    case ir::DataType::UINT8:
+      permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::QUANT_INT8_ASYMM:
+    case ir::DataType::QUANT_INT8_SYMM:
+      permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::INT64:
+      permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    case ir::DataType::QUANT_INT16_SYMM:
+      permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+      break;
+    default:
+      throw std::runtime_error("IPermuteFunction: Not supported data type");
+      break;
+  }
+}
+
+const std::type_info &IPermuteFunction::underlying_type(ir::DataType type) const
+{
+  switch (type)
+  {
+    case ir::DataType::FLOAT32:
+      return typeid(float);
+    case ir::DataType::INT32:
+      return typeid(int32_t);
+    case ir::DataType::UINT32:
+      return typeid(uint32_t);
+    case ir::DataType::INT64:
+      return typeid(int64_t);
+    case ir::DataType::BOOL8:
+    case ir::DataType::QUANT_UINT8_ASYMM:
+    case ir::DataType::UINT8:
+      return typeid(uint8_t);
+    case ir::DataType::QUANT_INT8_ASYMM:
+    case ir::DataType::QUANT_INT8_SYMM:
+      return typeid(int8_t);
+    case ir::DataType::QUANT_INT16_SYMM:
+      return typeid(int16_t);
+    default:
+      throw std::runtime_error("IPermuteFunction: Not supported data type");
+  }
+}
+
+} // namespace exec
+} // namespace onert
index eb54b67..e790f32 100644 (file)
 
 #include "backend/ITensor.h"
 #include "exec/IFunction.h"
-#include "ir/Index.h"
-#include "ir/Shape.h"
 #include <memory>
-#include <typeinfo>
-#include "util/Utils.h"
 #include <vector>
 #include <unordered_map>
 
@@ -79,31 +75,7 @@ protected:
   };
 
 public:
-  virtual void run() override
-  {
-    // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
-    assert(_src_tensors.size() == _dst_tensors.size());
-    if (_src_tensors_offsets.size() == 0)
-    {
-      _src_tensors_offsets.resize(_src_tensors.size());
-      _dst_tensors_offsets.resize(_dst_tensors.size());
-    }
-    assert(_src_tensors.size() == _src_tensors_offsets.size());
-    assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
-
-    for (size_t i = 0; i < _src_tensors.size(); ++i)
-    {
-      auto src_tensor = _src_tensors.at(i);
-      auto dst_tensor = _dst_tensors.at(i);
-      auto &src_offsets = _src_tensors_offsets.at(i);
-      auto &dst_offsets = _dst_tensors_offsets.at(i);
-      if (src_tensor != dst_tensor)
-      {
-        const auto rank = src_tensor->getShape().rank();
-        permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
-      }
-    }
-  }
+  virtual void run() override;
 
   virtual void prepare() override { optimize(); }
 
@@ -111,48 +83,7 @@ public:
 
 protected:
   void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank,
-               std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
-  {
-    if (src_tensor->total_size() == 0)
-    {
-      assert(dst_tensor->total_size() == 0);
-      return;
-    }
-
-    assert(src_tensor != dst_tensor);
-    if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
-      throw std::runtime_error("data type does not match");
-    switch (src_tensor->data_type())
-    {
-      case ir::DataType::FLOAT32:
-        permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
-        break;
-      case ir::DataType::INT32:
-        permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
-        break;
-      case ir::DataType::UINT32:
-        permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
-        break;
-      case ir::DataType::BOOL8:
-      case ir::DataType::QUANT_UINT8_ASYMM:
-      case ir::DataType::UINT8:
-        permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
-        break;
-      case ir::DataType::QUANT_INT8_ASYMM:
-      case ir::DataType::QUANT_INT8_SYMM:
-        permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
-        break;
-      case ir::DataType::INT64:
-        permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
-        break;
-      case ir::DataType::QUANT_INT16_SYMM:
-        permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
-        break;
-      default:
-        throw std::runtime_error("IPermuteFunction: Not supported data type");
-        break;
-    }
-  }
+               std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets);
 
 private:
   // TODO make src const by proving const access()
@@ -322,31 +253,7 @@ protected:
   // NOTE The typeid expression is lvalue expression which refers to an object with static storage
   //      duration, of the polymorphic type const std::type_info or of some type derived from it.
   //      So std::type_info is non-copyable
-  const std::type_info &underlying_type(ir::DataType type) const
-  {
-    switch (type)
-    {
-      case ir::DataType::FLOAT32:
-        return typeid(float);
-      case ir::DataType::INT32:
-        return typeid(int32_t);
-      case ir::DataType::UINT32:
-        return typeid(uint32_t);
-      case ir::DataType::INT64:
-        return typeid(int64_t);
-      case ir::DataType::BOOL8:
-      case ir::DataType::QUANT_UINT8_ASYMM:
-      case ir::DataType::UINT8:
-        return typeid(uint8_t);
-      case ir::DataType::QUANT_INT8_ASYMM:
-      case ir::DataType::QUANT_INT8_SYMM:
-        return typeid(int8_t);
-      case ir::DataType::QUANT_INT16_SYMM:
-        return typeid(int16_t);
-      default:
-        throw std::runtime_error("IPermuteFunction: Not supported data type");
-    }
-  }
+  const std::type_info &underlying_type(ir::DataType type) const;
 
 protected:
   std::vector<backend::ITensor *> _src_tensors;
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.test.cc b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
new file mode 100644 (file)
index 0000000..1009f19
--- /dev/null
@@ -0,0 +1,902 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <ir/Layout.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+
+#include <cmath>
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace exec;
+
+class MockUpTensor : public ITensor
+{
+public:
+  MockUpTensor(const Shape &shape, const TypeInfo &type_info, Layout layout, size_t pad)
+    : _shape(shape), _type_info(type_info), _data(nullptr), _layout(layout)
+  {
+    _strides.resize(shape.rank());
+
+    std::vector<size_t> pads(shape.rank(), 0);
+    pads[shape.rank() - 1] = pad;
+    size_t stride = 1;
+    for (int32_t i = _shape.rank() - 1; i >= 0; --i)
+    {
+      _strides.at(i) = stride;
+      stride = stride * (_shape.dim(i) + pads.at(i));
+    }
+  }
+  virtual ~MockUpTensor() {}
+
+  void setBuffer(uint8_t *data) { _data = data; }
+
+  size_t total_size() const override
+  {
+    size_t total_size = _strides[0] * _shape.dim(0);
+    total_size *= sizeOfDataType(data_type());
+    return total_size;
+  }
+
+  size_t calcOffset(const ir::Coordinates &coords) const override
+  {
+    size_t offset = 0;
+    for (size_t i = 0; i < _shape.rank(); ++i)
+    {
+      offset += (_strides[i] * coords[i]);
+    }
+    offset *= sizeOfDataType(data_type());
+    return offset;
+  }
+
+  uint8_t *buffer() const override { return _data; }
+
+  ir::Layout layout() const override { return _layout; }
+  ir::DataType data_type() const override { return _type_info.type(); }
+  float data_scale() const override { return _type_info.scale(); }
+  int32_t data_zero_point() const override { return _type_info.zero_point(); }
+  const std::vector<float> &data_scales() const override { return _type_info.scales(); }
+  const std::vector<int32_t> &data_zero_points() const override { return _type_info.zero_points(); }
+  bool has_padding() const override
+  {
+    return total_size() / sizeOfDataType(data_type()) != _shape.num_elements();
+  }
+  void access(const std::function<void(ITensor &tensor)> &fn) final { fn(*this); }
+
+  bool is_dynamic() const override { return false; }
+  Shape getShape() const override { return _shape; }
+
+private:
+  Shape _shape;
+  TypeInfo _type_info;
+  Layout _layout;
+  uint8_t *_data;
+  std::vector<size_t> _strides;
+};
+
+class MockUpLayer : public IPermuteFunction
+{
+public:
+  MockUpLayer(const std::vector<ITensor *> &inputs, const std::vector<ITensor *> &outputs)
+  {
+    assert(inputs.size() == outputs.size());
+    _src_tensors = inputs;
+    _dst_tensors = outputs;
+  }
+  virtual ~MockUpLayer() {}
+  void optimize() override {}
+};
+
+TEST(IPermuteFunction, float_rank1)
+{
+  const size_t input_pads[4] = {0, 1, 0, 2};
+  const size_t output_pads[4] = {0, 0, 2, 1};
+  const std::vector<Shape> shapes{{1}, {4}, {5}, {2}};
+  float expected_buffer[] = {1, 0, -1, -2, 3};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      Coordinates coords{j};
+      float result =
+        *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+      float expected =
+        *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+      EXPECT_EQ(result, expected);
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_rank2)
+{
+  const size_t input_pads[4] = {0, 1, 0, 2};
+  const size_t output_pads[4] = {0, 0, 2, 1};
+  const std::vector<Shape> shapes{{1, 4}, {2, 2}, {1, 5}, {2, 3}};
+  float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        Coordinates coords{j, k};
+        float result =
+          *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+        float expected =
+          *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+        EXPECT_EQ(result, expected);
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_rank3)
+{
+  const size_t input_pads[4] = {0, 5, 0, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 4, 1}, {1, 2, 1}, {2, 1, 5}, {1, 2, 3}};
+  float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          Coordinates coords{j, k, l};
+          float result =
+            *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+          float expected =
+            *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+          EXPECT_EQ(result, expected);
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_rank4)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            float result =
+              *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_rank4_layout)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16};
+  const auto type_info = TypeInfo(DataType::FLOAT32);
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    Layout layout = Layout::NHWC;
+    Shape shape = shapes[i];
+    if (i % 2 == 1)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    if (layout == Layout::NHWC)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    else
+    {
+      layout = Layout::NHWC;
+      shape = shapes[i];
+    }
+    outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates input_coords;
+            Coordinates output_coords;
+            if (inputs[i]->layout() == Layout::NHWC)
+            {
+              input_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              input_coords = Coordinates{j, m, k, l};
+            }
+            if (outputs[i]->layout() == Layout::NHWC)
+            {
+              output_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              output_coords = Coordinates{j, m, k, l};
+            }
+            float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+                                                      outputs[i]->calcOffset(output_coords));
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 128;
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+                                               input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            uint8_t qasymm8 =
+              *reinterpret_cast<uint8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            float result = (qasymm8 - zero_point) * scale;
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_to_qsymm8)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 0;
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+                                               input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            int8_t qsymm8 =
+              *reinterpret_cast<int8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            float result = (qsymm8 - zero_point) * scale;
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_to_qsymm16)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 0;
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+                                               input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            int16_t qsymm16 =
+              *reinterpret_cast<int16_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            float result = (qsymm16 - zero_point) * scale;
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, qasymm8_to_float)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 128;
+  uint8_t input_buffer[12];
+
+  int32_t min_val = std::numeric_limits<uint8_t>::min();
+  int32_t max_val = std::numeric_limits<uint8_t>::max();
+  for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+  {
+    int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+    input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+  }
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+                                                Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            float result =
+              *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            uint8_t qasymm8 =
+              *reinterpret_cast<uint8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            float expected = (qasymm8 - zero_point) * scale;
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, qsymm8_to_float)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 0;
+  uint8_t input_buffer[12];
+
+  int32_t min_val = std::numeric_limits<int8_t>::min();
+  int32_t max_val = std::numeric_limits<int8_t>::max();
+  for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+  {
+    int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+    input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+  }
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+                                                Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            float result =
+              *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            int8_t qasymm8 =
+              *reinterpret_cast<int8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            float expected = (qasymm8 - zero_point) * scale;
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, qsymm16_to_float)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+  float scale = 10;
+  int32_t zero_point = 0;
+  uint8_t input_buffer[12];
+
+  int32_t min_val = std::numeric_limits<int16_t>::min();
+  int32_t max_val = std::numeric_limits<int16_t>::max();
+  for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+  {
+    int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+    input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+  }
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+    inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+    outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+                                                Layout::NHWC, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates coords{j, k, l, m};
+            float result =
+              *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+            int16_t qasymm8 =
+              *reinterpret_cast<int16_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+            float expected = (qasymm8 - zero_point) * scale;
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8_layout)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10,  0,  -10,  -20, 30,   -40, 50,   -60, 70,
+                             -80, 90, -100, 110, -120, 130, -140, 150, -160};
+  float scale = 10;
+  int32_t zero_point = 128;
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    Layout layout = Layout::NHWC;
+    Shape shape = shapes[i];
+    if (i % 2 == 1)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    inputs[i] =
+      std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    if (layout == Layout::NHWC)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    else
+    {
+      layout = Layout::NHWC;
+      shape = shapes[i];
+    }
+    TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+    outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates input_coords;
+            Coordinates output_coords;
+            if (inputs[i]->layout() == Layout::NHWC)
+            {
+              input_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              input_coords = Coordinates{j, m, k, l};
+            }
+            if (outputs[i]->layout() == Layout::NHWC)
+            {
+              output_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              output_coords = Coordinates{j, m, k, l};
+            }
+            uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(outputs[i]->buffer() +
+                                                           outputs[i]->calcOffset(output_coords));
+            float result = (qasymm8 - zero_point) * scale;
+            float expected =
+              *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(IPermuteFunction, asymm8_to_float_layout)
+{
+  const size_t input_pads[4] = {0, 0, 1, 2};
+  const size_t output_pads[4] = {0, 3, 2, 1};
+  const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+  float expected_buffer[] = {10,  0,  -10,  -20, 30,   -40, 50,   -60, 70,
+                             -80, 90, -100, 110, -120, 130, -140, 150, -160};
+  float scale = 10;
+  int32_t zero_point = 128;
+  uint8_t input_buffer[18];
+
+  int32_t min_val = std::numeric_limits<int16_t>::min();
+  int32_t max_val = std::numeric_limits<int16_t>::max();
+  for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+  {
+    int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+    input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+  }
+
+  std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+  std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+  std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+  for (size_t i = 0; i < 4; ++i)
+  {
+    Layout layout = Layout::NHWC;
+    Shape shape = shapes[i];
+    if (i % 2 == 1)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+    inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+    inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+    if (layout == Layout::NHWC)
+    {
+      layout = Layout::NCHW;
+      shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+    }
+    else
+    {
+      layout = Layout::NHWC;
+      shape = shapes[i];
+    }
+    outputs[i] =
+      std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, output_pads[i]);
+    output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+    outputs[i]->setBuffer(output_buffers[i].get());
+  }
+
+  auto mockup_layer = std::make_unique<MockUpLayer>(
+    std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+    std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+  mockup_layer->run();
+
+  for (size_t i = 0; i < 4; ++i)
+  {
+    for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+    {
+      for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+      {
+        for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+        {
+          for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+          {
+            Coordinates input_coords;
+            Coordinates output_coords;
+            if (inputs[i]->layout() == Layout::NHWC)
+            {
+              input_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              input_coords = Coordinates{j, m, k, l};
+            }
+            if (outputs[i]->layout() == Layout::NHWC)
+            {
+              output_coords = Coordinates{j, k, l, m};
+            }
+            else
+            {
+              output_coords = Coordinates{j, m, k, l};
+            }
+            float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+                                                      outputs[i]->calcOffset(output_coords));
+            uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(inputs[i]->buffer() +
+                                                           inputs[i]->calcOffset(input_coords));
+            float expected = (qasymm8 - zero_point) * scale;
+            EXPECT_EQ(result, expected);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace
index 70c9c3d..456663f 100644 (file)
@@ -45,7 +45,7 @@ void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::B
 
 void ParallelScheduler::finish()
 {
-  for (auto &itr : _thread_pools)
+  for (auto &&itr : _thread_pools)
   {
     itr.second->finish();
   }
diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.cc b/runtime/onert/core/src/exec/SingleModelExecutors.cc
new file mode 100644 (file)
index 0000000..4b954ba
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SingleModelExecutors.h"
+
+#include "../backend/builtin/IOTensor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+void SingleModelExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index,
+                                   std::unique_ptr<IExecutor> exec)
+{
+  _executors.emplace(subg_index, std::move(exec));
+}
+
+IExecutor *SingleModelExecutors::at(const ir::ModelIndex &,
+                                    const ir::SubgraphIndex &subg_index) const
+{
+  return _executors.at(subg_index).get();
+}
+
+uint32_t SingleModelExecutors::inputSize() const
+{
+  return entryExecutor()->getInputTensors().size();
+}
+
+uint32_t SingleModelExecutors::outputSize() const
+{
+  return entryExecutor()->getOutputTensors().size();
+}
+
+const ir::OperandInfo &SingleModelExecutors::inputInfo(const ir::IOIndex &index) const
+{
+  return entryExecutor()->getInputTensors().at(index.value())->orig_info();
+}
+
+const ir::OperandInfo &SingleModelExecutors::outputInfo(const ir::IOIndex &index) const
+{
+  return entryExecutor()->getOutputTensors().at(index.value())->orig_info();
+}
+
+void SingleModelExecutors::execute(const IODescription &desc) { entryExecutor()->execute(desc); }
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.h b/runtime/onert/core/src/exec/SingleModelExecutors.h
new file mode 100644 (file)
index 0000000..98d629e
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+#define __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executor set for single model NN package
+ */
+class SingleModelExecutors : public IExecutors
+{
+public:
+  /**
+   * @brief Construct a new SingleModelExecutors object
+   */
+  SingleModelExecutors(void) = default;
+  SingleModelExecutors(const SingleModelExecutors &) = delete;
+  SingleModelExecutors(SingleModelExecutors &&) = default;
+
+  /**
+   * @brief Destroy the SingleModelExecutors object
+   */
+  ~SingleModelExecutors() = default;
+
+public:
+  void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+               std::unique_ptr<IExecutor> exec) override;
+
+  IExecutor *at(const ir::ModelIndex &model_index,
+                const ir::SubgraphIndex &subg_index) const override;
+
+  uint32_t inputSize() const override;
+
+  uint32_t outputSize() const override;
+
+  const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+  const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+  void execute(const IODescription &desc) override;
+
+private:
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
index c8e0e32..bf85e59 100644 (file)
@@ -48,7 +48,7 @@ uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); }
 
 void ThreadPool::join()
 {
-  for (auto &thread : _threads)
+  for (auto &&thread : _threads)
   {
     thread.join();
   }
diff --git a/runtime/onert/core/src/interp/Buffer.h b/runtime/onert/core/src/interp/Buffer.h
deleted file mode 100644 (file)
index 24938f7..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  Buffer.h
- * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class
- */
-#ifndef __ONERT_INTERP_BUFFER_H__
-#define __ONERT_INTERP_BUFFER_H__
-
-#include <memory>
-
-#include "ir/Data.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface for writable data area
- */
-class Buffer : public ir::Data
-{
-public:
-  /**
-   * @brief   Return writable pointer for data area
-   * @return  Writable pointer
-   */
-  virtual uint8_t *baseWritable(void) const = 0;
-};
-
-/**
- * @brief Class for internally allocated data area
- */
-class InternalBuffer final : public Buffer
-{
-public:
-  InternalBuffer(size_t size) : _base{std::make_unique<uint8_t[]>(size)}, _size{size}
-  {
-    // DO NOTHING
-  }
-
-public:
-  size_t size(void) const override { return _size; }
-  const uint8_t *base(void) const override { return _base.get(); }
-  uint8_t *baseWritable(void) const override { return _base.get(); }
-
-private:
-  std::unique_ptr<uint8_t[]> _base;
-  size_t _size;
-};
-
-/**
- * @brief Class for data area from outside
- */
-class ExternalBuffer final : public Buffer
-{
-public:
-  ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size}
-  {
-    // DO NOTHING
-  }
-
-public:
-  size_t size(void) const override { return _size; }
-  const uint8_t *base(void) const override { return _base; }
-  uint8_t *baseWritable(void) const override { return _base; }
-
-private:
-  uint8_t *_base;
-  size_t _size;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_BUFFER_H__
diff --git a/runtime/onert/core/src/interp/ExecEnv.h b/runtime/onert/core/src/interp/ExecEnv.h
deleted file mode 100644 (file)
index 7f577ea..0000000
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  ExecEnv.h
- * @brief This file contains ExecEnv to access interpreter tensor and execution status
- */
-#ifndef __ONERT_INTERP_EXEC_ENV_H_
-#define __ONERT_INTERP_EXEC_ENV_H_
-
-#include <unordered_set>
-
-#include "ir/Graph.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class to gather interpreter execution environment
- *        Each interpreter instance own execution environment
- */
-class ExecEnv
-{
-public:
-  /**
-   * @brief Construct a new Exec Env object (deleted)
-   */
-  ExecEnv(void) = delete;
-  /**
-   * @brief Construct a new ExecEnv object
-   * @param[in] graph Graph to execute by interpreter
-   */
-  explicit ExecEnv(const ir::Graph &graph) : _graph(graph)
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief   Return graph to execute
-   * @return  Graph
-   */
-  const ir::Graph &graph(void) const { return _graph; }
-  /**
-   * @brief     Assign tensor to environment which have allocated or assigned buffer
-   * @param[in] index   Tensor index
-   * @param[in] tensor  Tensor
-   */
-  void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor)
-  {
-    assert(tensor->bufferRO() != nullptr);
-    _tensors.emplace(index, tensor);
-  }
-
-  /**
-   * @brief     Return tensor pointer in environment
-   * @param[in] index         Tensor index
-   *            can_optional  @c True if tensor can be optional input, otherwise @c false
-   * @return    Tensor pointer
-   */
-  const ITensor *tensorAt(const ir::OperandIndex index, bool can_optional = false) const
-  {
-    if (_tensors.find(index) == _tensors.end())
-    {
-      // It may optional input,
-      // otherwise input is not set by runtime user
-      if (can_optional)
-      {
-        return nullptr;
-      }
-
-      throw std::runtime_error{"ExecEnv: Input is not set"};
-    }
-
-    return _tensors.at(index).get();
-  }
-
-  /**
-   * @brief     Check environment contains tensor
-   * @param[in] index Tensor index
-   * @return    @c true if environment contain tensor, otherwise @c false
-   */
-  bool contains(const ir::OperandIndex index) const
-  {
-    return (_tensors.find(index) != _tensors.end());
-  }
-
-  /**
-   * @brief     Allocate tensor using operand info
-   * @param[in] index     Tensor index
-   * @param[in] info      Operand info
-   * @note      If already allocated, just return
-   * @TODO      More smart allocation policy
-   */
-  void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info)
-  {
-    // already allocated, or constant
-    if (contains(index))
-    {
-      return;
-    }
-
-    // Buffer from external (ex. model output)
-    auto tensor = std::make_shared<Tensor>(info);
-    if (isExtBuffer(index))
-    {
-      tensor->setBuffer(_external_buffers.at(index));
-      assignTensor(index, tensor);
-
-      return;
-    }
-
-    tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size()));
-    assignTensor(index, tensor);
-    _buffers.insert(index);
-  }
-
-  /**
-   * @brief     Allocate read-only tensor and share data with other tensor
-   * @param[in] index           Tensor index
-   * @param[in] info            Operand info
-   * @param[in] index_to_share  Tensor index that have data to share
-   */
-  void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info,
-                                const ir::OperandIndex index_to_share)
-  {
-    if (!contains(index_to_share))
-    {
-      throw std::runtime_error{"Cannot find tensor to share data"};
-    }
-
-    // already allocated
-    if (contains(index))
-    {
-      return;
-    }
-
-    if (isExtBuffer(index))
-    {
-      auto tensor = std::make_shared<Tensor>(info);
-      tensor->setBuffer(_external_buffers.at(index));
-      assignTensor(index, tensor);
-    }
-    else
-    {
-      auto tensor = std::make_shared<ROTensor>(info);
-      tensor->setData(tensorAt(index_to_share)->shareData());
-      assignTensor(index, tensor);
-      _buffers.insert(index);
-    }
-  }
-
-  /**
-   * @brief     Free buffer if allocated by allocateIfNeed
-   * @param[in] index Tensor index
-   * @note      If allocated by outside, just return
-   */
-  void freeIfAllocated(const ir::OperandIndex index)
-  {
-    if (_buffers.find(index) != _buffers.end())
-    {
-      _tensors.at(index)->releaseData();
-    }
-  }
-
-  /**
-   * @brief     Assign ExternalBuffer into external buffer map
-   * @param[in] index   Tensor index
-   * @param[in] buffer  External buffer
-   */
-  void assignExternalBuffer(const ir::OperandIndex index, std::shared_ptr<ExternalBuffer> buffer)
-  {
-    _external_buffers.emplace(index, buffer);
-  }
-
-private:
-  bool isExtBuffer(const ir::OperandIndex index)
-  {
-    return (_external_buffers.find(index) != _external_buffers.end());
-  }
-
-private:
-  const ir::Graph &_graph;
-  // Tensor map to use in interpreter
-  // It should map tensors that have allocated or assigned buffer pointer
-  std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors;
-  // Tensors allocated by allocateIfNeed (buffer)
-  std::unordered_set<ir::OperandIndex> _buffers;
-  // Tensor buffer from external
-  std::unordered_map<ir::OperandIndex, std::shared_ptr<ExternalBuffer>> _external_buffers;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_EXEC_ENV_H_
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc
deleted file mode 100644 (file)
index f047771..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InterpExecutor.h"
-
-#include "ExecEnv.h"
-#include "Interpreter.h"
-
-#include "util/logging.h"
-
-#include <memory>
-
-namespace onert
-{
-namespace interp
-{
-
-void InterpExecutor::execute(const exec::IODescription &desc)
-{
-  /************************************************************************
-   * Prepare execution model (submodel)
-     It may execute divided model
-     but now consider model inference is done at interpreter
-   ***********************************************************************/
-  ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map;
-
-  for (uint32_t n = 0; n < _graph.getInputs().size(); n++)
-  {
-    ir::IOIndex index{n};
-    const auto input_index = _graph.getInputs().at(index);
-
-    const auto input = desc.inputs.at(n).get();
-    if (input == nullptr)
-    {
-      // Optional input
-      continue;
-    }
-
-    auto input_tensor = std::make_shared<ROTensor>(input->info);
-    input_tensor->setData(std::make_shared<const ir::ExternalData>(
-      reinterpret_cast<const uint8_t *>(input->buffer), input->size));
-    tensor_map[input_index] = input_tensor;
-  }
-
-  /************************************************************************
-   * Prepare execution environment
-     Execution environment will be assigned to invoked interpreter instance
-   ***********************************************************************/
-
-  std::unique_ptr<ExecEnv> interp_env = std::make_unique<ExecEnv>(_graph);
-
-  // Assign input/output tensor into interpreter execution environment
-  for (auto index : _graph.getInputs())
-  {
-    if (tensor_map.find(index) != tensor_map.end())
-    {
-      VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index << std::endl;
-      interp_env->assignTensor(index, tensor_map.at(index));
-    }
-  }
-
-  for (uint32_t n = 0; n < _graph.getOutputs().size(); n++)
-  {
-    ir::IOIndex index{n};
-    const auto output_index = _graph.getOutputs().at(index);
-    const auto output = desc.outputs.at(n).get();
-    if (output == nullptr)
-    {
-      // Optional output
-      continue;
-    }
-
-    VERBOSE(INTERPRETER) << "Set out buffer to ExecEnv. operand index:" << output_index.value()
-                         << std::endl;
-
-    interp_env->assignExternalBuffer(
-      output_index,
-      std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer), output->size));
-  }
-
-  // Allocate constant tensor
-  _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (obj.isConstant())
-    {
-      VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind
-                           << std::endl;
-
-      assert(obj.data());
-      auto const_tensor = std::make_shared<ROTensor>(obj.info());
-      // Assume that interpreter's tensor layout is same with model (NHWC)
-      const_tensor->setData(
-        std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
-      interp_env->assignTensor(ind, const_tensor);
-    }
-  });
-
-  /*****************************************************************************
-   * Invoke interpreter
-   ****************************************************************************/
-
-  interp::Interpreter interp(std::move(interp_env));
-  interp.run();
-
-  /*****************************************************************************
-   * Invoked interpreter run is finished
-   ****************************************************************************/
-
-  // If interpreter execute submodel
-  //  1. Get tensor output of submodel into tensor_map to save result
-  //  2. Generate new ExecEnv for next interpretation
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
deleted file mode 100644 (file)
index d6d5dd0..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  InterpExecutor.h
- * @brief This file contains InterpExecutor class\n
- *        to manage interpreter execution and environment
- */
-#ifndef __ONERT_INTERP_INTERP_EXECUTOR_H__
-#define __ONERT_INTERP_INTERP_EXECUTOR_H__
-
-#include "ir/OperandIndexMap.h"
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-class ITensor;
-
-/**
- * @brief Class to execute model using interpreter
- */
-class InterpExecutor final : public exec::IExecutor
-{
-public:
-  explicit InterpExecutor(const ir::Graph &graph) : _graph(graph)
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief   Return graph object
-   * @return  Graph object
-   */
-  const ir::Graph &graph() final { return _graph; }
-
-  const ir::Graph &parent_graph() final
-  {
-    throw new std::runtime_error{"Interpreter does not support this function."};
-  }
-  void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
-    // Not implemented
-  };
-  /**
-   * @brief  Start execution
-   * @note   It should be called after setting input and output buffer
-   */
-  void execute(const exec::IODescription &desc) final;
-  void execute(const std::vector<backend::IPortableTensor *> &,
-               const std::vector<backend::IPortableTensor *> &) final
-  {
-    throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"};
-  }
-  const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const final
-  {
-    throw new std::runtime_error{"Interpreter does not support this function."};
-  }
-
-private:
-  /**
-   * @brief Copy of target graph for lowering
-   * @note  It uses copy of graph, not reference.
-   *        Original graph may be deallocated by frontend.
-   */
-  const ir::Graph _graph;
-  ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERP_EXECUTOR_H__
diff --git a/runtime/onert/core/src/interp/InterpExecutor.test.cc b/runtime/onert/core/src/interp/InterpExecutor.test.cc
deleted file mode 100644 (file)
index 9f95ffe..0000000
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InterpExecutor.h"
-
-#include "exec/Execution.h"
-#include "ir/Graph.h"
-#include "ir/operation/BinaryArithmetic.h"
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-namespace
-{
-
-using namespace onert::ir;
-using InterpExecutor = onert::interp::InterpExecutor;
-using Execution = onert::exec::Execution;
-using Executors = onert::exec::Executors;
-
-class InterpExecutorTest : public ::testing::Test
-{
-protected:
-  virtual void SetUp() {}
-  void CreateSimpleModel()
-  {
-    // Model: one elementwise add operation
-    // model input: lhs, rhs
-    // model output: add result
-    // lhs, rhs, result shape: {1, 2, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // Add operands
-
-    Shape shape{1, 2, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs = _graph->addOperand(shape, type);
-    auto operand_result = _graph->addOperand(shape, type);
-
-    // Add operations
-
-    operation::BinaryArithmetic::Param param;
-    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param.activation = Activation::NONE;
-    auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
-    auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs);
-    _graph->getOutputs().append(operand_result);
-
-    _graph->verify();
-
-    auto model = std::make_shared<onert::ir::Model>();
-    model->push(onert::ir::SubgraphIndex{0}, _graph);
-
-    _executors = std::make_shared<Executors>();
-    _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
-  }
-
-  void CreateTwoStepModel()
-  {
-    // Model: two elementwise add operation
-    // model input: lhs, rhs1
-    // model output: second add result (result2)
-    // constant: rhs2
-    // result1 <= (lhs + rhs)
-    // result2 <= (result1 + rhs2)
-    // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // 1st add operands (result1 <= lhs + rhs1)
-
-    Shape shape{1, 2, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    static int32_t rhs2_data[4] = {3, 1, -1, 5};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs1 = _graph->addOperand(shape, type);
-    auto operand_result1 = _graph->addOperand(shape, type);
-    auto operand_rhs2 = _graph->addOperand(shape, type);
-    auto operand_result2 = _graph->addOperand(shape, type);
-    _graph->operands()
-      .at(operand_rhs2)
-      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
-
-    // 2nd add operations (result2 <= result1 + rhs2)
-
-    operation::BinaryArithmetic::Param param1;
-    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param1.activation = Activation::NONE;
-    auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
-    auto output_set1 = OperandIndexSequence{operand_result1};
-    _graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
-
-    operation::BinaryArithmetic::Param param2;
-    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param2.activation = Activation::NONE;
-    auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
-    auto output_set2 = OperandIndexSequence{operand_result2};
-    _graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs1);
-    _graph->getOutputs().append(operand_result2);
-
-    _graph->verify();
-
-    auto model = std::make_shared<onert::ir::Model>();
-    model->push(onert::ir::SubgraphIndex{0}, _graph);
-
-    _executors = std::make_shared<Executors>();
-    _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
-  }
-
-  void CreateUnspecifiedDimensionsModel()
-  {
-    // Model: one elementwise add operation
-    // model input: lhs, rhs
-    // model output: add result
-    // lhs, rhs, result shape: {1, unknown, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // Add operands
-
-    Shape shape{1, 0, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs = _graph->addOperand(shape, type);
-
-    auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
-    _graph->operands()
-      .at(operand_activation)
-      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
-
-    auto operand_result = _graph->addOperand(shape, type);
-
-    // Add operations
-
-    operation::BinaryArithmetic::Param param;
-    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param.activation = Activation::NONE;
-    auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
-    auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs);
-    _graph->getOutputs().append(operand_result);
-
-    _graph->verify();
-
-    auto model = std::make_shared<onert::ir::Model>();
-    model->push(onert::ir::SubgraphIndex{0}, _graph);
-
-    _executors = std::make_shared<Executors>();
-    _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
-  }
-
-  void createExecution() { _execution = std::make_unique<Execution>(_executors); }
-
-  virtual void TearDown() { _executors = nullptr; }
-
-  std::shared_ptr<Graph> _graph{nullptr};
-  std::shared_ptr<Executors> _executors{nullptr};
-  std::unique_ptr<Execution> _execution{nullptr};
-  const int32_t _activation_value{0};
-};
-
-TEST_F(InterpExecutorTest, create_empty)
-{
-  Graph graph;
-  graph.verify();
-  auto executor = std::make_unique<InterpExecutor>(graph);
-  ASSERT_NE(executor, nullptr);
-}
-
-TEST_F(InterpExecutorTest, create_simple)
-{
-  CreateSimpleModel();
-  ASSERT_NE(_executors, nullptr);
-  ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr);
-}
-
-TEST_F(InterpExecutorTest, neg_setInput)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
-  EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutput)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions)
-{
-  CreateUnspecifiedDimensionsModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
-  TypeInfo operand_type{DataType::INT32};
-  Shape operand_shape{1, 2, 2, 1};
-
-  EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                    reinterpret_cast<const void *>(input1_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                    reinterpret_cast<const void *>(input1_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                       reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions)
-{
-  CreateUnspecifiedDimensionsModel();
-  createExecution();
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  TypeInfo operand_type{DataType::INT32};
-  Shape operand_shape{1, 2, 2, 1};
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                     reinterpret_cast<void *>(output_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                     reinterpret_cast<void *>(output_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                        reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, execute)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto input1_idx = _graph->getInputs().at(input1);
-  auto input2_idx = _graph->getInputs().at(input2);
-
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-  const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-  EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-  EXPECT_NO_THROW(_execution->execute());
-  EXPECT_EQ(output_buffer[0], 2);
-  EXPECT_EQ(output_buffer[1], -3);
-  EXPECT_EQ(output_buffer[2], 1);
-  EXPECT_EQ(output_buffer[3], -6);
-}
-
-TEST_F(InterpExecutorTest, executeTwoStep)
-{
-  CreateTwoStepModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto input1_idx = _graph->getInputs().at(input1);
-  auto input2_idx = _graph->getInputs().at(input2);
-
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-  const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-  EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-  EXPECT_NO_THROW(_execution->execute());
-  EXPECT_EQ(output_buffer[0], 5);
-  EXPECT_EQ(output_buffer[1], -2);
-  EXPECT_EQ(output_buffer[2], 0);
-  EXPECT_EQ(output_buffer[3], -1);
-}
-
-} // namespace
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst
deleted file mode 100644 (file)
index 0714df3..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INTERP_OP
-#error Define INTERP_OP before including this file
-#endif
-
-// Supported operation name in interpreter
-//
-// Same list with Operations.lst
-// Make comment out if operation is not supported in interpreter
-INTERP_OP(BinaryArithmetic)
-//INTERP_OP(BatchToSpaceND)
-//INTERP_OP(Cast)
-INTERP_OP(Conv2D)
-INTERP_OP(DepthwiseConv2D)
-INTERP_OP(Pool2D)
-INTERP_OP(Concat)
-INTERP_OP(FullyConnected)
-//INTERP_OP(Reduce)
-INTERP_OP(Reshape)
-INTERP_OP(Softmax)
-//INTERP_OP(Squeeze)
-//INTERP_OP(Slice)
-//INTERP_OP(StridedSlice)
-INTERP_OP(ElementwiseActivation)
-//INTERP_OP(Transpose)
-//INTERP_OP(Exp)
-//INTERP_OP(Comparison)
-//INTERP_OP(LogicalNot)
-//INTERP_OP(LSTM)
-//INTERP_OP(RSQRT)
-//INTERP_OP(ResizeBilinear)
-//INTERP_OP(RNN)
-//INTERP_OP(Floor)
-//INTERP_OP(SpaceToBatchND)
-//INTERP_OP(SpaceToDepth)
-//INTERP_OP(EmbeddingLookup)
-//INTERP_OP(L2Normalization)
-//INTERP_OP(HashtableLookup)
-INTERP_OP(InstanceNorm)
-//INTERP_OP(PReLU)
-INTERP_OP(TransposeConv)
-//INTERP_OP(SQRT)
-//INTERP_OP(SquaredDifference)
-//INTERP_OP(TopKV2)
-INTERP_OP(Gather)
-//INTERP_OP(Neg)
-//INTERP_OP(Abs)
-//INTERP_OP(ArgMax)
-//INTERP_OP(Dequantize)
-//INTERP_OP(LocalResponseNormalization)
-//INTERP_OP(DepthToSpace)
-//INTERP_OP(Pack)
-//INTERP_OP(Split)
-//INTERP_OP(Unpack)
-INTERP_OP(Pad)
-//INTERP_OP(Custom)
-//INTERP_OP(Permute)
-//INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/Interpreter.cc b/runtime/onert/core/src/interp/Interpreter.cc
deleted file mode 100644 (file)
index e01afb8..0000000
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Interpreter.h"
-
-#include <stack>
-#include <unordered_set>
-
-#include "Registration.h"
-
-#include "ir/OperandIndexMap.h"
-#include "util/logging.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-// TODO more structured execution kernel implementation
-// TODO use cker for execution
-// TODO divide tensor prepare and execution
-// TODO introduce memory manager (buffer allocate and free)
-class OperationExecutor
-{
-public:
-  OperationExecutor(ExecEnv *env) : _env{env}
-  {
-#define INTERP_OP(InternalName) _kernels[ir::OpCode::InternalName] = get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
-  }
-
-  void execute(const ir::OperationIndex &idx)
-  {
-    const ir::Operation &node = _env->graph().operations().at(idx);
-    const auto nodeName = node.name();
-    VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
-                         << " operation (id: " << idx << ")" << std::endl;
-
-    const auto nodeOpCode = node.opcode();
-    if (_kernels.find(nodeOpCode) == _kernels.end())
-    {
-      throw std::runtime_error{"Interpreter: Operation " + nodeName + " is not yet implemented"};
-    }
-
-    if (_kernels[nodeOpCode]->prepare != nullptr)
-    {
-      _kernels[nodeOpCode]->prepare(_env, node);
-    }
-    _kernels[nodeOpCode]->invoke(_env, node);
-  }
-
-private:
-  ExecEnv *_env;
-  std::unordered_map<ir::OpCode, OpKernel *> _kernels;
-};
-
-void Interpreter::run()
-{
-  VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl;
-
-  // operand_stack: save operands prepared to use
-  std::stack<ir::OperandIndex> operand_stack;
-
-  // Note: We should push input first, then constant.
-  //       We use use-def for find operators ready to execution,
-  //       but Use-Def cannot handle parameters (maybe constant, but not always)
-  // Note: If all model inputs are constant, it may not work (depend on tensors' order).
-  //       But that scenario may not exist
-  for (auto ind : _env->graph().getInputs())
-  {
-    VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind << std::endl;
-
-    operand_stack.push(ind);
-  }
-
-  _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (obj.isConstant())
-    {
-      VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind << std::endl;
-
-      operand_stack.push(ind);
-    }
-  });
-
-  // Execution
-  std::unordered_set<ir::OperandIndex> ready_check;
-  std::unordered_set<ir::OperationIndex> executed;
-  OperationExecutor executor{_env.get()};
-  while (!operand_stack.empty())
-  {
-    const auto current_operand_index = operand_stack.top();
-    operand_stack.pop();
-    VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value()
-                         << " is checked ready to use" << std::endl;
-
-    assert(ready_check.find(current_operand_index) == ready_check.end());
-    ready_check.insert(current_operand_index);
-
-    // Find prepared operations by scan use of current operand
-    std::stack<ir::OperationIndex> operation_stack;
-    const auto use_operators = _env->graph().operands().at(current_operand_index).getUses();
-    for (const auto &use_operator : use_operators)
-    {
-      // Assumption: all parameters are ready to use
-      bool operator_ready = true;
-      for (auto input_index : _env->graph().operations().at(use_operator).getInputs())
-      {
-        if (ready_check.find(input_index) == ready_check.end())
-        {
-          operator_ready = false;
-          break;
-        }
-      }
-
-      if (operator_ready)
-      {
-        VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator << std::endl;
-        operation_stack.push(use_operator);
-      }
-    }
-
-    while (!operation_stack.empty())
-    {
-      const auto current_operation_index = operation_stack.top();
-      operation_stack.pop();
-      VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index << "("
-                           << _env->graph().operations().at(current_operation_index).name() << ")"
-                           << std::endl;
-
-      // execution
-      // 1. Prepare output tensor
-      // 2. Call operation kernel
-      executor.execute(current_operation_index);
-      executed.insert(current_operation_index);
-
-      // 3. Push each output into operand stack
-      const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs();
-      for (auto def_operand : def_operands)
-      {
-        VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value()
-                             << std::endl;
-        operand_stack.push(def_operand);
-      }
-
-      // 4. Free if lifetime of buffer operands used by input is finished
-      for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs())
-      {
-        const auto use_operators = _env->graph().operands().at(input_index).getUses();
-        bool dead_buffer = true;
-        for (const auto &use_operator : use_operators)
-        {
-          if (executed.find(use_operator) == executed.end())
-          {
-            dead_buffer = false;
-            break;
-          }
-        }
-
-        if (dead_buffer)
-        {
-          _env->freeIfAllocated(input_index);
-        }
-      }
-    }
-  }
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/Interpreter.h b/runtime/onert/core/src/interp/Interpreter.h
deleted file mode 100644 (file)
index d2165f5..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  Interpreter.h
- * @brief This file contains Interpreter class for interpretation
- */
-#ifndef __ONERT_INTERP_INTERPRETER_H__
-#define __ONERT_INTERP_INTERPRETER_H__
-
-#include "ExecEnv.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class for interpretation
- */
-class Interpreter
-{
-
-public:
-  /**
-   * @brief Construct a new Interpreter object (deleted)
-   */
-  Interpreter() = delete;
-  /**
-   * @brief     Construct a new Interpreter object
-   * @param[in] env Execution environment variable for interpreter object
-   */
-  Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)}
-  {
-    // DO NOTHING
-  }
-
-public:
-  /**
-   * @brief Run interpreter until there is no operation to execute
-   */
-  void run();
-
-private:
-  std::unique_ptr<ExecEnv> _env;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERPRETER_H__
diff --git a/runtime/onert/core/src/interp/Registration.h b/runtime/onert/core/src/interp/Registration.h
deleted file mode 100644 (file)
index 956b92a..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_REGISTRATION_H__
-#define __ONERT_INTERP_REGISTRATION_H__
-
-#include "ExecEnv.h"
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace interp
-{
-
-struct OpKernel
-{
-  std::function<void(ExecEnv *, const ir::Operation &)> prepare;
-  std::function<void(const ExecEnv *, const ir::Operation &)> invoke;
-};
-
-// Defined in operations/ directory
-#define INTERP_OP(InternalName) OpKernel *get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_REGISTRATION_H__
diff --git a/runtime/onert/core/src/interp/Tensor.cc b/runtime/onert/core/src/interp/Tensor.cc
deleted file mode 100644 (file)
index de095c9..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#define NO_USE(a) (void)(a)
-
-namespace onert
-{
-namespace interp
-{
-
-void ITensor::access(const std::function<void(backend::ITensor &tensor)> &fn) { fn(*this); }
-
-size_t ROTensor::calcOffset(const ir::Coordinates &coords) const
-{
-  NO_USE(coords);
-  throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
-{
-  NO_USE(coords);
-  throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-ir::Layout ROTensor::layout() const
-{
-  // TODO Changes to return frontend layout
-  return ir::Layout::NHWC;
-}
-
-ir::Layout Tensor::layout() const
-{
-  // TODO Changes to return frontend layout
-  return ir::Layout::NHWC;
-}
-
-ir::Shape Tensor::getShape() const { return _info.shape(); }
-
-ir::Shape ROTensor::getShape() const { return _info.shape(); }
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h
deleted file mode 100644 (file)
index 642fdc1..0000000
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file  Tensor.h
- * @brief This file contains ITensor interface, ROTensor class, and Tensor class
- */
-#ifndef __ONERT_INTERP_TENSOR_H__
-#define __ONERT_INTERP_TENSOR_H__
-
-#include "Buffer.h"
-
-#include "ir/OperandInfo.h"
-#include "backend/ITensor.h"
-#include "ir/Layout.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface to handle Tensor in interpreter
- */
-class ITensor : public backend::ITensor
-{
-public:
-  virtual ~ITensor() = default;
-
-public:
-  virtual uint8_t *buffer() const = 0;
-  /**
-   * @brief   Return shared pointer for buffer
-   * @return  Buffer shared pointer
-   */
-  virtual std::shared_ptr<const Buffer> shareBuffer() const = 0;
-  /**
-   * @brief   Return read-only buffer pointer
-   * @return  Read-only buffer pointer
-   */
-  virtual const uint8_t *bufferRO() const = 0;
-  /**
-   * @brief   Return shared pointer for data
-   * @return  Data shared pointer
-   */
-  virtual std::shared_ptr<const ir::Data> shareData() const = 0;
-  /**
-   * @brief     Set internal/external buffer
-   * @param[in] buffer  Buffer pointer
-   */
-  virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0;
-  /**
-   * @brief     Set data reference (including constant, input)
-   * @param[in] data  Data pointer
-   */
-  virtual void setData(std::shared_ptr<const ir::Data> data) = 0;
-  virtual void releaseData() = 0;
-
-  virtual size_t total_size() const = 0;
-  virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
-
-  virtual bool has_padding() const = 0;
-  /**
-   * @brief   Return data type of tensor
-   * @return  Data type of tensor
-   */
-  virtual ir::DataType data_type() const = 0;
-  /**
-   * @brief   Return TensorInfo
-   * @return  TensorInfo
-   */
-  virtual const ir::OperandInfo &tensorInfo() const = 0;
-  /**
-   * @brief   Return number of elements
-   * @return  Number of elements
-   */
-  virtual uint64_t num_elements() const = 0;
-  void access(const std::function<void(backend::ITensor &tensor)> &fn) final;
-};
-
-/**
- * @brief Class to handle tensor in interpreter as read-only
- */
-class ROTensor final : public ITensor
-{
-public:
-  ROTensor() = delete;
-  ROTensor(const ir::OperandInfo &info) : _info(info)
-  {
-    // DO NOTHING
-  }
-
-public:
-  uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; }
-  std::shared_ptr<const Buffer> shareBuffer() const override
-  {
-    throw std::runtime_error{"Read only tensor"};
-  }
-  const uint8_t *bufferRO() const override { return _data->base(); }
-  std::shared_ptr<const ir::Data> shareData() const override { return _data; }
-  void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; }
-  void setData(std::shared_ptr<const ir::Data> data) override { _data = data; }
-  void releaseData() override { _data = nullptr; }
-
-  size_t total_size() const override { return _info.total_size(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override;
-  ir::Layout layout() const override;
-  bool is_dynamic() const override { return false; }
-  bool has_padding() const override { return false; }
-  ir::DataType data_type() const override { return _info.typeInfo().type(); }
-  float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
-  const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
-  const std::vector<int32_t> &data_zero_points() const override
-  {
-    return _info.typeInfo().zero_points();
-  }
-  const ir::OperandInfo &tensorInfo() const override { return _info; }
-  uint64_t num_elements() const override { return _info.shape().num_elements(); };
-  ir::Shape getShape() const override;
-
-private:
-  const ir::OperandInfo _info;
-  std::shared_ptr<const ir::Data> _data{nullptr};
-};
-
-/**
- * @brief Class to handle tensor in interpreter as writable
- */
-class Tensor final : public ITensor
-{
-public:
-  Tensor() = delete;
-  Tensor(const ir::OperandInfo &info) : _info(info)
-  {
-    // DO NOTHING
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer->baseWritable(); }
-  std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; };
-  const uint8_t *bufferRO() const override { return _buffer->base(); }
-  std::shared_ptr<const ir::Data> shareData() const override { return _buffer; }
-  void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; }
-  void setData(std::shared_ptr<const ir::Data>) override
-  {
-    throw std::runtime_error{"Passed data may read-only"};
-  }
-  void releaseData() override { _buffer = nullptr; }
-
-  size_t total_size() const override { return _info.total_size(); }
-  size_t calcOffset(const ir::Coordinates &coords) const override;
-  ir::Layout layout() const override;
-  bool is_dynamic() const override { return false; }
-  bool has_padding() const override { return false; }
-  ir::DataType data_type() const override { return _info.typeInfo().type(); }
-  float data_scale() const override { return _info.typeInfo().scale(); }
-  int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
-  const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
-  const std::vector<int32_t> &data_zero_points() const override
-  {
-    return _info.typeInfo().zero_points();
-  }
-  const ir::OperandInfo &tensorInfo() const override { return _info; }
-  uint64_t num_elements() const override { return _info.shape().num_elements(); };
-  ir::Shape getShape() const override;
-
-private:
-  const ir::OperandInfo _info;
-  std::shared_ptr<const Buffer> _buffer{nullptr};
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_TENSOR_H__
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
deleted file mode 100644 (file)
index fe4acd3..0000000
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/BinaryArithmetic.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-#include <cker/Types.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class OpType
-{
-  ADD,
-  SUB,
-  MUL
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
-  const auto &arithmetic_node =
-    nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
-  const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
-  const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto lhs_tensor = env->tensorAt(lhs_index);
-  const auto rhs_tensor = env->tensorAt(rhs_index);
-
-  // Check shape and type lhs is same with rhs
-  // TODO Util function to compare TensorInfo
-  if (lhs_tensor->data_type() != rhs_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
-  }
-
-  bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
-  if (try_broadcast)
-  {
-    bool success = true;
-    auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(),
-                                        rhs_tensor->tensorInfo().shape(), success);
-    if (!success)
-    {
-      throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
-    }
-
-    auto output_info =
-      ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
-    // We can handle already allocated (ex. model output)
-    env->allocateIfNeeded(out_index, output_info);
-  }
-  else
-  {
-    // Output's shape and type is same with input
-    auto output_info = lhs_tensor->tensorInfo();
-    // We can handle already allocated (ex. model output)
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  // Check shape and type lhs is same with output
-  // TODO Util function to compare TensorInfo
-  if (lhs_tensor->data_type() != out_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
-  }
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
-{
-  params->float_activation_min = min;
-  params->float_activation_max = max;
-}
-
-inline void setActivationParams(int32_t min, int32_t max,
-                                nnfw::cker::BinaryArithmeticOpParam *params)
-{
-  params->quantized_activation_min = min;
-  params->quantized_activation_max = max;
-}
-
-template <typename raw_type, OpType op_type>
-void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
-            const ir::operation::BinaryArithmetic::Param &param)
-{
-  const auto lhs_buffer = lhs_tensor->bufferRO();
-  const auto rhs_buffer = rhs_tensor->bufferRO();
-  auto out_buffer = out_tensor->buffer();
-
-  nnfw::cker::BinaryArithmeticOpParam cker_param;
-  raw_type activation_min, activation_max;
-  calculateActivationRange(param.activation, &activation_min, &activation_max);
-  setActivationParams(activation_min, activation_max, &cker_param);
-  const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer);
-  const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer);
-  raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
-
-  const auto cker_op_type =
-    (op_type == OpType::ADD) ? nnfw::cker::BinaryArithmeticOpType::ADD
-                             : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
-                                                         : nnfw::cker::BinaryArithmeticOpType::MUL);
-
-  const bool need_broadcast =
-    nnfw::cker::ProcessBroadcastShapes(convertShape(lhs_tensor->tensorInfo().shape()),
-                                       convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
-
-  if (need_broadcast)
-  {
-    const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
-    const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
-    const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-    nnfw::cker::BroadcastBinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape,
-                                                          rhs_ptr, out_shape, out_ptr);
-    return;
-  }
-
-  const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
-  const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
-  const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-  nnfw::cker::BinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr,
-                                               out_shape, out_ptr);
-}
-
-template <OpType op_type>
-void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
-{
-  const auto lhs_index = node.getInputs().at(node.LHS);
-  const auto rhs_index = node.getInputs().at(node.RHS);
-  const auto out_index = node.getOutputs().at(0);
-  const auto lhs_tensor = env->tensorAt(lhs_index);
-  const auto rhs_tensor = env->tensorAt(rhs_index);
-  const auto out_tensor = env->tensorAt(out_index);
-  const auto data_type = lhs_tensor->data_type();
-
-  if (data_type == ir::DataType::INT32)
-  {
-    invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
-  }
-  else if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Unsupported data type"};
-  }
-}
-
-void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &arithmetic_node =
-    nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
-  switch (arithmetic_node.param().arithmetic_type)
-  {
-    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
-      invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
-      break;
-    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
-      invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
-      break;
-    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
-      invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
-      break;
-    default:
-      throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
-                               arithmetic_node.name()};
-      break;
-  }
-}
-
-} // namespace
-
-OpKernel *getBinaryArithmetic()
-{
-  static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc
deleted file mode 100644 (file)
index 1036046..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Concat.h"
-
-#include <cker/operation/Concatenation.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace concat
-{
-
-void prepareConcat(ExecEnv *env, const ir::Operation &node)
-{
-  const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
-
-  const auto first_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto first_tensor = env->tensorAt(first_index);
-  uint32_t out_axis_dimension = 0;
-  const int32_t axis_raw = concat_node.param().axis;
-  const int32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->getShape().rank()) : axis_raw;
-
-  // All inputs shape should be same except axis dimension
-  // All inputs type should be same
-  for (auto input : node.getInputs())
-  {
-    assert(first_tensor->getShape().rank() == env->tensorAt(input)->getShape().rank());
-    assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
-    for (int i = 0; i < first_tensor->getShape().rank(); i++)
-    {
-      if (i == axis)
-      {
-        out_axis_dimension += env->tensorAt(input)->getShape().dim(i);
-        continue;
-      }
-      assert(first_tensor->getShape().dim(i) == env->tensorAt(input)->getShape().dim(i));
-    }
-  }
-
-  // Make output tensor info using first input tensor info, and accumulated axis dimension value
-  auto out_shape = first_tensor->tensorInfo().shape();
-  out_shape.dim(axis) = out_axis_dimension;
-  env->allocateIfNeeded(
-    out_index, ir::OperandInfo::createStaticInfo(out_shape, first_tensor->tensorInfo().typeInfo()));
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Output shape should be same with input except axis getShape().dim
-  // Output type should be same with input
-  assert(first_tensor->data_type() == out_tensor->data_type());
-  for (int i = 0; i < first_tensor->getShape().rank(); i++)
-  {
-    if (i == axis)
-    {
-      continue;
-    }
-    assert(first_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
-  }
-}
-
-void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis)
-{
-  const uint32_t count = in_tensors.size();
-
-  // Calculate
-  nnfw::cker::ConcatenationParams cker_param;
-  cker_param.axis = (int8_t)axis;
-  cker_param.inputs_count = count;
-
-  const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
-  std::vector<nnfw::cker::Shape> in_shapes;
-  std::vector<const nnfw::cker::Shape *> in_shape_ptrs;
-  in_shapes.reserve(count);
-  in_shape_ptrs.reserve(count);
-  std::vector<const float *> in_ptrs;
-  for (uint32_t i = 0; i < count; i++)
-  {
-    in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape()));
-    in_shape_ptrs.push_back(&in_shapes[i]);
-    in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO()));
-  }
-
-  auto out_buffer = out_tensor->buffer();
-  float *out_ptr = reinterpret_cast<float *>(out_buffer);
-
-  nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape,
-                                   out_ptr);
-}
-
-void invokeConcat(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
-  const int32_t axis_raw = concat_node.param().axis;
-
-  std::vector<const ITensor *> in_tensors;
-  for (const auto &e : concat_node.getInputs())
-  {
-    in_tensors.emplace_back(env->tensorAt(e));
-  }
-
-  const auto out_index = node.getOutputs().at(0);
-  const auto out_tensor = env->tensorAt(out_index);
-  const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->getShape().rank()) : axis_raw;
-
-  const auto data_type = in_tensors[0]->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(in_tensors, out_tensor, axis);
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-} // namespace concat
-
-OpKernel *getConcat()
-{
-  static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc
deleted file mode 100644 (file)
index 72c2057..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Conv2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/Conv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace conv2d
-{
-
-void prepareConv2D(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
-  const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
-  const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto kernel_tensor = env->tensorAt(kernel_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-
-  assert(in_tensor->getShape().rank() == 4);
-  assert(kernel_tensor->getShape().rank() == 4);
-  assert(bias_tensor->getShape().rank() == 1);
-
-  UNUSED_RELEASE(in_tensor);
-  UNUSED_RELEASE(kernel_tensor);
-  UNUSED_RELEASE(bias_tensor);
-
-  const auto output_info = env->graph().operands().at(out_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Handle unspecified output shape
-    const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
-    const auto infered_output_shape = shape_inference::inferConv2DShape(
-      in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
-    env->allocateIfNeeded(
-      out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
-  }
-  else
-  {
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->getShape().rank() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
-            const ITensor *ofm_tensor, const ir::operation::Conv2D::Param &param)
-{
-  // TODO Support NCHW frontned
-  const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
-  const auto &ker_shape = ker_tensor->tensorInfo().shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
-  const auto padding =
-    ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
-
-  // Calculate
-  float activation_min, activation_max;
-  calculateActivationRange(param.activation, &activation_min, &activation_max);
-
-  nnfw::cker::ConvParams cker_param;
-  cker_param.padding_type = convertPaddingType(param.padding.type);
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-  cker_param.dilation_width_factor = 1;
-  cker_param.dilation_height_factor = 1;
-  cker_param.float_activation_min = activation_min;
-  cker_param.float_activation_max = activation_max;
-
-  const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
-  const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
-  const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
-  const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
-  const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
-  const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
-  const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
-  float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
-  nnfw::cker::Conv conv_kernel;
-  conv_kernel(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape,
-              bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeConv2D(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
-
-  const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
-  const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-  const auto ofm_tensor = env->tensorAt(ofm_index);
-
-  const auto data_type = ifm_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-} // namespace conv2d
-
-OpKernel *getConv2D()
-{
-  static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
deleted file mode 100644 (file)
index 9f52744..0000000
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/DepthwiseConv2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-
-namespace
-{
-
-void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
-  const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
-  const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto kernel_tensor = env->tensorAt(kernel_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-
-  assert(in_tensor->getShape().rank() == 4);
-  assert(kernel_tensor->getShape().rank() == 4);
-  assert(bias_tensor->getShape().rank() == 1);
-
-  UNUSED_RELEASE(in_tensor);
-  UNUSED_RELEASE(kernel_tensor);
-  UNUSED_RELEASE(bias_tensor);
-
-  // TODO handle unspecified output shape:
-  //      calculate output shape using ifm shape, kernel shape, padding, stride
-  const auto output_info = env->graph().operands().at(out_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Handle unspecified output shape
-    const auto &depth_conv_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
-    const auto infered_output_shape = shape_inference::inferDepthwiseConv2DShape(
-      in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
-      depth_conv_node.param());
-    env->allocateIfNeeded(
-      out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
-  }
-  else
-  {
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->getShape().rank() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
-            const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param &param)
-{
-  // TODO Support NCHW frontend
-  const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  // Kernel format is [1, kernel_height, kernel_width, depth_out].
-  const auto &ker_shape = ker_tensor->tensorInfo().shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
-  const auto padding =
-    ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
-
-  // Calculate
-  float activation_min, activation_max;
-  calculateActivationRange(param.activation, &activation_min, &activation_max);
-
-  nnfw::cker::DepthwiseConvParams cker_param;
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.depth_multiplier = param.multiplier;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-  cker_param.dilation_width_factor = 1;
-  cker_param.dilation_height_factor = 1;
-  cker_param.float_activation_min = activation_min;
-  cker_param.float_activation_max = activation_max;
-
-  const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
-  const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
-  const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
-  const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
-  const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
-  const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
-  const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
-  float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
-  nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
-                            cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr, nullptr);
-}
-
-void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node);
-
-  const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
-  const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-  const auto ofm_tensor = env->tensorAt(ofm_index);
-
-  const auto data_type = ifm_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-
-} // namespace
-
-OpKernel *getDepthwiseConv2D()
-{
-  static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
deleted file mode 100644 (file)
index e13080e..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/ElementwiseActivation.h"
-
-#include <cker/operation/Logistic.h>
-#include <cker/operation/Tanh.h>
-#include <misc/polymorphic_downcast.h>
-
-#include <cmath>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class ActivationType
-{
-  Logistic,
-  ReLU,
-  Tanh
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(0);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-
-  const auto output_info = env->graph().operands().at(output_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Output's shape and type is same with input
-    auto input_info = input_tensor->tensorInfo();
-    // We can handle already allocated (ex. model output)
-    env->allocateIfNeeded(output_index, input_info);
-  }
-  else
-  {
-    env->allocateIfNeeded(output_index, output_info);
-  }
-
-  const auto output_tensor = env->tensorAt(output_index);
-  // Check shape and type lhs is same with output
-  // TODO Util function to compare TensorInfo
-  if (input_tensor->data_type() != output_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
-  }
-}
-
-template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
-               float beta)
-{
-  std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
-  switch (act_type)
-  {
-    case ActivationType::ReLU:
-      fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
-      break;
-    case ActivationType::Tanh:
-      fn = [](const float &in) { return std::tanh(in); };
-      break;
-    default:
-      throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
-      break;
-  }
-
-  const float *input_end = input_ptr + num_elements;
-  for (; input_ptr < input_end; input_ptr++, output_ptr++)
-  {
-    *output_ptr = fn(*input_ptr);
-  }
-}
-
-template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(0);
-  const auto output_index = node.getOutputs().at(0);
-
-  // Check lhs shape is same with rhs (with broadcast)
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto output_tensor = env->tensorAt(output_index);
-
-  const auto data_type = input_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    uint64_t elements = input_tensor->num_elements();
-    const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
-    float *out = reinterpret_cast<float *>(output_tensor->buffer());
-    if (act_type == ActivationType::Logistic)
-    {
-      const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
-      const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-      nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
-    }
-    else
-    {
-      const auto &act_node =
-        nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
-      evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
-                          act_node.param().beta);
-    }
-  }
-  else
-  {
-    throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
-  }
-}
-
-void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &act_node =
-    nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
-  switch (act_node.param().op_type)
-  {
-    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
-      invoke<ActivationType::Logistic>(env, node);
-      break;
-    case ir::operation::ElementwiseActivation::Type::RELU:
-      invoke<ActivationType::ReLU>(env, node);
-      break;
-    case ir::operation::ElementwiseActivation::Type::TANH:
-      invoke<ActivationType::Tanh>(env, node);
-      break;
-    default:
-      throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
-  }
-}
-
-} // namespace
-
-OpKernel *getElementwiseActivation()
-{
-  static OpKernel kernel = {prepare, invokeElementwiseActivation};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc
deleted file mode 100644 (file)
index 2bc9f51..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/FullyConnected.h"
-
-#include <cker/operation/FullyConnected.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace fc
-{
-
-void prepareFC(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
-  const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
-  const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto kernel_tensor = env->tensorAt(kernel_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-
-  UNUSED_RELEASE(in_tensor);
-  UNUSED_RELEASE(kernel_tensor);
-  UNUSED_RELEASE(bias_tensor);
-
-  assert(in_tensor->getShape().rank() >= 2);
-  assert(kernel_tensor->getShape().rank() == 2);
-  assert(bias_tensor->getShape().rank() == 1);
-
-  const auto input_size_with_batch = in_tensor->num_elements();
-  const auto num_units = kernel_tensor->getShape().dim(0);
-  const auto input_size = kernel_tensor->getShape().dim(1);
-  const int32_t batch_size = input_size_with_batch / input_size;
-  assert(input_size_with_batch % input_size == 0);
-  assert(num_units == bias_tensor->getShape().dim(0));
-
-  // Make output tensor info
-  ir::Shape output_shape(2);
-  output_shape.dim(0) = batch_size;
-  output_shape.dim(1) = num_units;
-  const auto out_info =
-    ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
-  env->allocateIfNeeded(out_index, out_info);
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->getShape().rank() == 2);
-  assert(out_tensor->getShape().dim(0) == batch_size);
-  assert(out_tensor->getShape().dim(1) == num_units);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
-            const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param &param)
-{
-  const auto ifm_buffer = ifm_tensor->bufferRO();
-  const auto ker_buffer = ker_tensor->bufferRO();
-  const auto bias_buffer = bias_tensor->bufferRO();
-  auto ofm_buffer = ofm_tensor->buffer();
-
-  // Calculate
-  nnfw::cker::FullyConnectedParams cker_param;
-  cker_param.activation = convertActivationType(param.activation);
-  const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
-  const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
-  const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
-  const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
-  const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer);
-  const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer);
-  const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer);
-  float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer);
-
-  nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
-                             cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeFC(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &conv_node =
-    nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
-
-  const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
-  const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto bias_tensor = env->tensorAt(bias_index);
-  const auto ofm_tensor = env->tensorAt(ofm_index);
-
-  const auto data_type = ifm_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float only"};
-  }
-}
-} // namespace fc
-
-OpKernel *getFullyConnected()
-{
-  static OpKernel kernel = {fc::prepareFC, fc::invokeFC};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc
deleted file mode 100644 (file)
index d686cfc..0000000
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Gather.h"
-
-#include <cker/operation/Gather.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareGather(ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
-  const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto indices_tensor = env->tensorAt(indices_index);
-
-  // TODO handle unspecified output shape:
-  //      calculate output shape using ifm shape, kernel shape, padding, stride
-  const auto output_info = env->graph().operands().at(output_index).info();
-  if (output_info.total_size() == 0)
-  {
-    throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"};
-  }
-  else
-  {
-    env->allocateIfNeeded(output_index, output_info);
-  }
-
-  if (indices_tensor->data_type() != ir::DataType::INT32)
-  {
-    throw std::runtime_error{"Interp(Gather): Invalid indices data type"};
-  }
-
-  auto output_tensor = env->tensorAt(output_index);
-  auto output_rank = input_tensor->getShape().rank() + indices_tensor->getShape().rank() - 1;
-
-  if (output_rank != output_tensor->getShape().rank())
-  {
-    throw std::runtime_error{"Interp(Gather): Invalid output rank"};
-  }
-  if (output_tensor->data_type() != input_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(Gather): Invalid output data type"};
-  }
-
-  if (input_tensor->data_type() == ir::DataType::QUANT_UINT8_ASYMM &&
-      input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
-  {
-    throw std::runtime_error{
-      "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
-  }
-}
-
-template <typename raw_type>
-void invoke(const ITensor *input_tensors, const ITensor *indices_tensors,
-            const ITensor *output_tensor, uint32_t axis)
-{
-  // Calculate
-  nnfw::cker::GatherParams cker_param;
-  cker_param.axis = (int8_t)axis;
-
-  const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape());
-  const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape());
-  const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-  const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO());
-  const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO());
-  raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer());
-
-  nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape,
-                               indices_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeGather(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node);
-  const int32_t axis_raw = gather_node.param().axis;
-
-  const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
-  const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto indices_tensor = env->tensorAt(indices_index);
-  const auto output_tensor = env->tensorAt(output_index);
-  const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->getShape().rank()) : axis_raw;
-
-  const auto data_type = input_tensor->data_type();
-
-  switch (data_type)
-  {
-    case ir::DataType::FLOAT32:
-      invoke<float>(input_tensor, indices_tensor, output_tensor, axis);
-      break;
-    case ir::DataType::INT32:
-      invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis);
-      break;
-    case ir::DataType::QUANT_UINT8_ASYMM:
-      invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis);
-      break;
-    default:
-      throw std::runtime_error{"Interp(Gather): NYI - Not supported type"};
-  }
-}
-
-} // namespace
-
-OpKernel *getGather()
-{
-  static OpKernel kernel = {prepareGather, invokeGather};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
deleted file mode 100644 (file)
index 3180884..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/InstanceNorm.h"
-
-#include <cker/operation/InstanceNorm.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace instancenorm
-{
-
-void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
-{
-  const auto &instancenorm_node =
-    nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
-  const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
-  const auto output_index = node.getOutputs().at(0);
-  const auto input_tensor = env->tensorAt(input_index);
-
-  if (input_tensor->getShape().rank() != 4)
-  {
-    throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
-  }
-
-  // Output shape should be same with input
-  env->allocateIfNeeded(output_index, input_tensor->tensorInfo());
-
-  auto output_tensor = env->tensorAt(output_index);
-  UNUSED_RELEASE(output_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(input_tensor->data_type() == output_tensor->data_type());
-  assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape());
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params)
-{
-  params->float_activation_min = min;
-  params->float_activation_max = max;
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor,
-            const ITensor *output_tensor, const ir::operation::InstanceNorm::Param &param)
-{
-  // Calculate
-  float activation_min, activation_max;
-  calculateActivationRange(param.activation, &activation_min, &activation_max);
-
-  nnfw::cker::InstanceNormParams cker_param;
-  cker_param.epsilon = param.epsilon;
-  cker_param.float_activation_min = activation_min;
-  cker_param.float_activation_max = activation_max;
-
-  const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
-  const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape());
-  const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape());
-  const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-  const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO());
-  const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO());
-  const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO());
-  float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer());
-
-  nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr,
-                           cker_beta_shape, beta_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &instancenorm_node =
-    nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
-  const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
-  const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
-  const auto beta_index = node.getInputs().at(instancenorm_node.BETA);
-  const auto out_index = node.getOutputs().at(0);
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto gamma_tensor = env->tensorAt(gamma_index);
-  const auto beta_tensor = env->tensorAt(beta_index);
-  const auto out_tensor = env->tensorAt(out_index);
-  const auto data_type = input_tensor->data_type();
-
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Unsupported data type"};
-  }
-}
-} // namespace instancenorm
-
-OpKernel *getInstanceNorm()
-{
-  static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/OperationUtil.h b/runtime/onert/core/src/interp/operations/OperationUtil.h
deleted file mode 100644 (file)
index 2fdf098..0000000
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-#define __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-
-#include "ir/Shape.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-
-namespace onert
-{
-namespace interp
-{
-
-inline nnfw::cker::Shape convertShape(const ir::Shape &shape)
-{
-  auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
-  std::vector<int32_t> raw_shape;
-  raw_shape.resize(dimensions.size());
-
-  for (uint32_t i = 0; i < dimensions.size(); ++i)
-  {
-    raw_shape[i] = dimensions[i];
-  }
-
-  return nnfw::cker::GetShape(raw_shape);
-}
-
-inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape)
-{
-  auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
-  const int32_t extended_rank = 4;
-  int32_t raw_shape[extended_rank];
-  uint32_t start = extended_rank - dimensions.size();
-
-  for (uint32_t i = 0; i < extended_rank; ++i)
-  {
-    if (i < start)
-    {
-      raw_shape[i] = 1;
-    }
-    else
-    {
-      raw_shape[i] = dimensions[i - start];
-    }
-  }
-
-  return nnfw::cker::Shape(extended_rank, raw_shape);
-}
-
-inline nnfw::cker::FusedActivationFunctionType
-convertActivationType(const ir::Activation activation)
-{
-  switch (activation)
-  {
-    case ir::Activation::NONE:
-      return nnfw::cker::FusedActivationFunctionType::kNone;
-    case ir::Activation::RELU:
-      return nnfw::cker::FusedActivationFunctionType::kRelu;
-    case ir::Activation::RELU1:
-      return nnfw::cker::FusedActivationFunctionType::kRelu1;
-    case ir::Activation::RELU6:
-      return nnfw::cker::FusedActivationFunctionType::kRelu6;
-    default:
-      throw std::runtime_error{"CPU backend: Cannot convert activation type"};
-  }
-}
-
-template <typename T>
-void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    throw std::runtime_error{"Unsupported activation type"};
-  }
-}
-
-inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success)
-{
-  int lhs_rank = lhs.rank();
-  int rhs_rank = rhs.rank();
-
-  int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank);
-  ir::Shape out_shape(out_rank);
-
-  int lhs_idim = lhs_rank - 1;
-  int rhs_idim = rhs_rank - 1;
-  success = true;
-  for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--)
-  {
-    if (lhs_idim == -1 && rhs_idim == -1)
-    {
-      // invalid result
-      success = false;
-      break;
-    }
-
-    if (lhs_idim == -1)
-    {
-      out_shape.dim(out_idim) = rhs.dim(rhs_idim);
-      rhs_idim--;
-    }
-    else if (rhs_idim == -1)
-    {
-      out_shape.dim(out_idim) = lhs.dim(lhs_idim);
-      lhs_idim--;
-    }
-    else
-    {
-      if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim))
-      {
-        out_shape.dim(out_idim) = lhs.dim(lhs_idim);
-        lhs_idim--;
-        rhs_idim--;
-      }
-      else if (lhs.dim(lhs_idim) == 1)
-      {
-        out_shape.dim(out_idim) = rhs.dim(rhs_idim);
-        lhs_idim--;
-        rhs_idim--;
-      }
-      else if (rhs.dim(rhs_idim) == 1)
-      {
-        out_shape.dim(out_idim) = lhs.dim(lhs_idim);
-        lhs_idim--;
-        rhs_idim--;
-      }
-      else
-      {
-        // invalid result
-        success = false;
-        break;
-      }
-    }
-  }
-
-  if (lhs_idim != -1 || rhs_idim != -1)
-  {
-    // invalid result
-    success = false;
-  }
-  return out_shape;
-}
-
-inline nnfw::cker::PaddingType convertPaddingType(ir::PaddingType ir_padding_type)
-{
-  switch (ir_padding_type)
-  {
-    case ir::PaddingType::EXPLICIT:
-      return nnfw::cker::PaddingType::kNone;
-    case ir::PaddingType::SAME:
-      return nnfw::cker::PaddingType::kSame;
-    case ir::PaddingType::VALID:
-      return nnfw::cker::PaddingType::kValid;
-    default:
-      throw std::runtime_error("Wrong padding type.");
-      break;
-  }
-}
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc
deleted file mode 100644 (file)
index 3db0828..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Pad.h"
-
-#include <cker/operation/Pad.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void preparePad(ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-
-  const auto output_info = env->graph().operands().at(output_index).info();
-
-  // Check shape and type lhs is same with rhs
-  // TODO Util function to compare TensorInfo
-  if (output_info.total_size() == 0)
-  {
-    throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"};
-  }
-  else
-  {
-    env->allocateIfNeeded(output_index, output_info);
-  }
-
-  const auto output_tensor = env->tensorAt(output_index);
-  if (input_tensor->data_type() != output_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(Pad): Invalid output type"};
-  }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor)
-{
-  const auto input_buffer = input_tensor->bufferRO();
-  const auto pad_buffer = pad_tensor->bufferRO();
-  auto output_buffer = output_tensor->buffer();
-
-  int32_t pad_rank = pad_tensor->getShape().dim(0);
-
-  const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
-  const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-  const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
-  const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
-  float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
-  nnfw::cker::Pad<float>(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape,
-                         output_ptr, nullptr);
-}
-
-void invokePad(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
-  const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto pad_tensor = env->tensorAt(pad_index);
-  const auto output_tensor = env->tensorAt(output_index);
-
-  const auto data_type = input_tensor->data_type();
-
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(input_tensor, pad_tensor, output_tensor);
-  }
-  else
-  {
-    throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"};
-  }
-}
-} // namespace
-
-OpKernel *getPad()
-{
-  static OpKernel kernel = {preparePad, invokePad};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
deleted file mode 100644 (file)
index 3935d47..0000000
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Pool2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/AveragePool.h>
-#include <cker/operation/MaxPool.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace pool2d
-{
-
-void preparePool2D(ExecEnv *env, const ir::Operation &node)
-{
-  const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
-  const auto in_index = node.getInputs().at(pool_node.INPUT);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  UNUSED_RELEASE(in_tensor);
-
-  assert(in_tensor->getShape().rank() == 4);
-
-  const auto output_info = env->graph().operands().at(out_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Handle unspecified output shape
-    const auto infered_output_shape =
-      shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
-    env->allocateIfNeeded(
-      out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
-  }
-  else
-  {
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->getShape().rank() == 4);
-}
-
-template <typename T>
-void invoke(const nnfw::cker::PoolParams &params, const nnfw::cker::Shape &in_shape,
-            const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
-            ir::operation::Pool2D::PoolType op_type)
-{
-  switch (op_type)
-  {
-    case ir::operation::Pool2D::PoolType::AVG:
-      nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
-      break;
-    case ir::operation::Pool2D::PoolType::MAX:
-      nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
-      break;
-    default:
-      throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
-      break;
-  }
-}
-
-void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
-
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  // Check lhs shape is same with rhs (with broadcast)
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto out_tensor = env->tensorAt(out_index);
-
-  // TODO support NCHW frontend
-  const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto param = pool_node.param();
-  const auto padding =
-    ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
-  // Calculate
-  nnfw::cker::PoolParams cker_param;
-  cker_param.filter_width = param.kw;
-  cker_param.filter_height = param.kh;
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-
-  const auto data_type = in_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    calculateActivationRange(param.activation, &cker_param.float_activation_min,
-                             &cker_param.float_activation_max);
-
-    const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
-    const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-    const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
-    float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-    // Now, invoke() supports only Pool2D in float
-    invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float only"};
-  }
-}
-} // namespace pool2d
-
-OpKernel *getPool2D()
-{
-  static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc
deleted file mode 100644 (file)
index 1de5a57..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../Registration.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  // Unspecified shape is not supported in operation node spec now
-  const auto output_info = env->graph().operands().at(out_index).info();
-  env->allocateAndShareIfNeeded(out_index, output_info, in_index);
-
-  assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size());
-}
-
-void invoke(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO())
-  {
-    // Same data
-    return;
-  }
-
-  const auto output_info = env->graph().operands().at(out_index).info();
-  memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(),
-         output_info.total_size());
-}
-
-} // namespace
-
-OpKernel *getReshape()
-{
-  static OpKernel kernel = {prepare, invoke};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
deleted file mode 100644 (file)
index 8be2f22..0000000
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Softmax.h"
-
-#include <cker/operation/SoftMax.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  UNUSED_RELEASE(in_tensor);
-
-  assert((in_tensor->getShape().rank() == 4) || (in_tensor->getShape().rank() == 2));
-
-  // Output shape should be same with input
-  // Output type is pre-defined in model
-  const auto output_shape = env->graph().operands().at(in_index).info().shape();
-  const auto output_type = env->graph().operands().at(out_index).info().typeInfo();
-
-  const auto output_info = ir::OperandInfo::createStaticInfo(output_shape, output_type);
-  env->allocateIfNeeded(out_index, output_info);
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Check output shape is same with input
-  assert(out_tensor->getShape().rank() == out_tensor->getShape().rank());
-  for (int32_t i = 0; i < in_tensor->getShape().rank(); i++)
-  {
-    assert(in_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
-  }
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
-            const ir::operation::Softmax::Param &param)
-{
-  const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
-  float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
-  float beta = param.beta;
-
-  if (in_tensor->getShape().rank() == 2)
-  {
-    uint32_t batch_size = in_tensor->getShape().dim(0);
-    uint32_t input_size = in_tensor->getShape().dim(1);
-
-    nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
-  }
-  else if (in_tensor->getShape().rank() == 4)
-  {
-    const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
-    const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
-    nnfw::cker::SoftmaxParams cker_param;
-    cker_param.beta = beta;
-
-    nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-  }
-  else
-  {
-    throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"};
-  }
-}
-
-void invokeSoftMax(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node);
-
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto out_tensor = env->tensorAt(out_index);
-
-  const auto in_data_type = in_tensor->data_type();
-  const auto out_data_type = out_tensor->data_type();
-  if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32))
-  {
-    invoke(in_tensor, out_tensor, softmax_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-
-} // namespace
-
-OpKernel *getSoftmax()
-{
-  static OpKernel kernel = {prepareSoftMax, invokeSoftMax};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc
deleted file mode 100644 (file)
index 59c8e8c..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/TransposeConv.h"
-
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
-{
-  const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
-  const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
-
-  assert(ifm_tensor->getShape().rank() == 4);
-  assert(ker_tensor->getShape().rank() == 4);
-  assert(ofm_shape_tensor->getShape().rank() == 1);
-
-  UNUSED_RELEASE(ifm_tensor);
-  UNUSED_RELEASE(ker_tensor);
-  UNUSED_RELEASE(ofm_shape_tensor);
-
-  const auto output_info = env->graph().operands().at(ofm_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // TODO: Handle unspecified output shape
-    throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
-  }
-  else
-  {
-    env->allocateIfNeeded(ofm_index, output_info);
-  }
-
-  auto ofm_tensor = env->tensorAt(ofm_index);
-  UNUSED_RELEASE(ofm_tensor);
-
-  // Handle same ifm & ofm data type only
-  if (ifm_tensor->data_type() != ofm_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
-  }
-
-  if (ofm_tensor->getShape().rank() != 4)
-  {
-    throw std::runtime_error{"Interp(TConv): Invalid output rank"};
-  }
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor,
-            const ir::operation::TransposeConv::Param &param)
-{
-  const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
-  const auto ker_shape = ker_tensor->tensorInfo().shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
-  const auto padding =
-    ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride, ker_width, ker_height);
-
-  nnfw::cker::TransposeConvParams cker_param;
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-  cker_param.dilation_width_factor = 1;
-  cker_param.dilation_height_factor = 1;
-
-  const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
-  const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
-  const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
-  const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
-  const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
-  float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
-  nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
-                            cker_ofm_shape, ofm_ptr);
-}
-
-void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &tconv_node =
-    nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
-
-  const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
-  const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
-  const auto ofm_index = node.getOutputs().at(0);
-
-  const auto ifm_tensor = env->tensorAt(ifm_index);
-  const auto ker_tensor = env->tensorAt(ker_index);
-  const auto ofm_tensor = env->tensorAt(ofm_index);
-
-  const auto data_type = ifm_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"Interp(TConv): Support float32 only"};
-  }
-}
-
-} // namespace
-
-OpKernel *getTransposeConv()
-{
-  static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
index a7c50a2..e4e4c15 100644 (file)
@@ -26,10 +26,10 @@ namespace onert
 namespace ir
 {
 
-int32_t const Shape::UNSPECIFIED_DIM = -1;
+int32_t const Shape::kUnspecifiedDim = -1;
 
 // NNFW_MAX_RANK is 6
-int32_t const Shape::MAX_RANK = 6;
+int32_t const Shape::kMaxRank = 6;
 
 FeatureShape Shape::asFeature(Layout layout) const
 {
@@ -80,7 +80,7 @@ uint64_t Shape::num_elements() const
 {
   // if dimension is 0, it means unspecified and cannot calculate the total number of elements
   if (std::any_of(_dimensions.begin(), _dimensions.end(),
-                  [](const int32_t &v) { return v == UNSPECIFIED_DIM; }))
+                  [](const int32_t &v) { return v == kUnspecifiedDim; }))
     throw std::runtime_error("num_elements() cannot calculate when any dimension is unspecified");
 
   return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1),
@@ -89,7 +89,7 @@ uint64_t Shape::num_elements() const
 
 Shape permuteShape(const Shape &shape, Layout from, Layout to)
 {
-  assert(shape.rank() <= Shape::MAX_RANK);
+  assert(shape.rank() <= Shape::kMaxRank);
   Shape ret{shape};
   if (from == to)
     return ret;
index afdb292..4788522 100644 (file)
@@ -48,7 +48,7 @@ TEST(ShapeTest, neg_basic_test)
     onert::ir::Shape shape(2);
 
     shape.dim(0) = 1;
-    shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
+    shape.dim(1) = onert::ir::Shape::kUnspecifiedDim;
 
     ASSERT_EQ(shape.rank(), 2);
     ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
index d868efe..c3f5179 100644 (file)
@@ -168,7 +168,7 @@ void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>
   _os << "{\n";
   _os << "  " << quote("traceEvents") << ": [\n";
 
-  for (auto &recorder : recorders)
+  for (const auto &recorder : recorders)
   {
     flushOneRecord(*recorder);
   }
@@ -180,7 +180,7 @@ void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>
 
 void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
 {
-  for (auto &evt : recorder.duration_events())
+  for (const auto &evt : recorder.duration_events())
   {
     const std::string name = getLabel(*evt);
     const std::string tid = getTid(*evt);
@@ -188,7 +188,7 @@ void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
     _os << "    " << object(*evt, name, tid) << ",\n";
   }
 
-  for (auto &evt : recorder.counter_events())
+  for (const auto &evt : recorder.counter_events())
   {
     _os << "    " << object(evt) << ",\n";
   }
index 7a8b9f2..13dab5b 100644 (file)
@@ -32,7 +32,7 @@ namespace
 void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
 {
   os << "| ";
-  for (auto &key : list)
+  for (const auto &key : list)
   {
     os << key << " | ";
   }
@@ -227,7 +227,7 @@ struct MDTableBuilder
 
   MDTableBuilder &build()
   {
-    for (auto &it : divideGraph())
+    for (const auto &it : divideGraph())
     {
       size_t begin_idx = it.first;
       size_t end_idx = it.second;
@@ -314,7 +314,7 @@ struct MDTableBuilder
     graph.end_ts = std::stoull(_duration_events[end_idx]->ts);
     graph.setOperations(name_to_op);
 
-    for (auto &arg : _duration_events[end_idx]->args)
+    for (const auto &arg : _duration_events[end_idx]->args)
     {
       if (arg.first == "session")
         graph.session_index = arg.second;
@@ -358,7 +358,7 @@ struct MDTableBuilder
 
 void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
 {
-  for (auto &recorder : records)
+  for (const auto &recorder : records)
   {
     MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
   }
index 4dea6d1..87bbfc6 100644 (file)
@@ -103,9 +103,9 @@ void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &record
   // Memory
   {
     std::unordered_map<std::string, Stat> mem_stats;
-    for (auto &recorder : recorders)
+    for (const auto &recorder : recorders)
     {
-      for (auto &evt : recorder->counter_events())
+      for (const auto &evt : recorder->counter_events())
       {
         auto &mem_stat = mem_stats[evt.name];
         uint64_t val = std::stoull(evt.values.at("value"));
@@ -114,7 +114,7 @@ void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &record
     }
 
     auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
-    for (auto &kv : mem_stats)
+    for (const auto &kv : mem_stats)
     {
       auto &key = kv.first;
       auto &val = kv.second;
@@ -132,9 +132,9 @@ void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &record
     // 2D keys : stats[tid][name]
     std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
     std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
-    for (auto &recorder : recorders)
+    for (const auto &recorder : recorders)
     {
-      for (auto &evt : recorder->duration_events())
+      for (const auto &evt : recorder->duration_events())
       {
         std::string evt_name = getLabel(*evt);
         std::string evt_tid = getBackend(*evt);
@@ -160,17 +160,17 @@ void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &record
       }
     }
 
-    for (auto &kv : begin_timestamps)
-      for (auto &kv2 : kv.second)
+    for (const auto &kv : begin_timestamps)
+      for (const auto &kv2 : kv.second)
         if (kv2.second != 0)
           throw std::runtime_error{"Invalid Data - B and E pair does not match."};
 
-    for (auto &kv : stats)
+    for (const auto &kv : stats)
     {
-      auto &tid = kv.first;
-      auto &map = kv.second;
+      const auto &tid = kv.first;
+      const auto &map = kv.second;
       auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
-      for (auto &kv : map)
+      for (const auto &kv : map)
       {
         auto &name = kv.first;
         auto &val = kv.second;
index 173de29..862d6f7 100644 (file)
@@ -608,12 +608,12 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el
                             const size_t total_num_elements)
 {
   ir::Shape ret(shape_num_elements);
-  int32_t flatten_dim = ir::Shape::UNSPECIFIED_DIM;
+  int32_t flatten_dim = ir::Shape::kUnspecifiedDim;
   for (int32_t i = 0; i < shape_num_elements; ++i)
   {
     if (shape_buf[i] < 0)
     {
-      if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+      if (flatten_dim != ir::Shape::kUnspecifiedDim)
         throw std::runtime_error("Reshape: 2nd param has special dim(for flatten) more than twice");
       flatten_dim = i;
       ret.dim(i) = 1;
@@ -623,7 +623,7 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el
       ret.dim(i) = shape_buf[i];
     }
   }
-  if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+  if (flatten_dim != ir::Shape::kUnspecifiedDim)
     ret.dim(flatten_dim) = total_num_elements / ret.num_elements();
 
   // Check reshapable
index cf080ab..878a594 100644 (file)
@@ -68,8 +68,7 @@ public:
    * @param model reference to model
    */
   explicit BaseLoader(std::unique_ptr<ir::Model> &model)
-    : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr},
-      _tensor_names(std::make_shared<std::unordered_map<ir::OperandIndex, std::string>>())
+    : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr}
   {
     _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
   }
@@ -194,7 +193,7 @@ protected:
   const Model *_domain_model;
   // Maps Tensor indices to onert Operands.
   std::vector<ir::OperandIndex> _tensor_to_operand;
-  std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
+  std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
   // Verifier
   std::unique_ptr<Verifier> _verifier;
   // Boolean flag to use MMAPED_DATA
@@ -411,7 +410,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
     subg.setOperandValue(operand_index, std::move(data_obj));
   }
 
-  _tensor_names->emplace(operand_index, tensor->name()->str());
+  _tensor_names.emplace(operand_index, tensor->name()->str());
 
   // Variable
   if (tensor->is_variable())
@@ -1297,8 +1296,8 @@ void BaseLoader<LoaderDomain>::loadIf(const Operator *op, ir::Graph &subg)
   verifySubgraphIndex(else_index);
 
   ir::operation::If::Param param;
-  param.then_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(then_index)};
-  param.else_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(else_index)};
+  param.then_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(then_index)};
+  param.else_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(else_index)};
 
   loadOperationTo<ir::operation::If>(op, subg, param);
 }
@@ -1314,8 +1313,8 @@ void BaseLoader<LoaderDomain>::loadWhile(const Operator *op, ir::Graph &subg)
   verifySubgraphIndex(body_index);
 
   ir::operation::While::Param param;
-  param.cond_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(cond_index)};
-  param.body_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(body_index)};
+  param.cond_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(cond_index)};
+  param.body_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(body_index)};
 
   loadOperationTo<ir::operation::While>(op, subg, param);
 }
@@ -1663,6 +1662,12 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
     case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE:
       loadDepthToSpace(op, subg);
       return;
+    case BuiltinOperator::BuiltinOperator_EMBEDDING_LOOKUP:
+      loadOperationTo<ir::operation::EmbeddingLookup>(op, subg);
+      return;
+    case BuiltinOperator::BuiltinOperator_HASHTABLE_LOOKUP:
+      loadOperationTo<ir::operation::HashtableLookup>(op, subg);
+      return;
     default:
       throw std::runtime_error(
         std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
@@ -1682,10 +1687,15 @@ template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
   // Load subgraphs and map operations on subgraph
   const auto subgraphs = _domain_model->subgraphs();
   auto model = std::make_unique<ir::Model>();
-  for (uint32_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
+  if (subgraphs->size() - 1 > ir::SubgraphIndex::max())
+    throw std::runtime_error{"The number of subgraphs cannot exceed " +
+                             std::to_string(ir::SubgraphIndex::max() + 1)};
+  for (uint16_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
   {
     auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]);
-    model->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
+    // NOTE: Used () instead of {}, which does not check narrowing.
+    // It is okay since overflow is checked the above if-statement.
+    model->push(ir::SubgraphIndex(subgraph_index), std::move(subg));
   }
   _model = std::move(model);
 }
index 5abcc9c..5bf626d 100644 (file)
@@ -112,13 +112,13 @@ private:
     for (const std::int32_t input_ind : *circle_subg->inputs())
     {
       subg->addInput(tensorIdxToOperandIdx(input_ind),
-                     _tensor_names->at(_tensor_to_operand[input_ind]));
+                     _tensor_names.at(_tensor_to_operand[input_ind]));
     }
     // Set outputs
     for (const std::int32_t output_ind : *circle_subg->outputs())
     {
       subg->addOutput(tensorIdxToOperandIdx(output_ind),
-                      _tensor_names->at(_tensor_to_operand[output_ind]));
+                      _tensor_names.at(_tensor_to_operand[output_ind]));
     }
     // Create operations
     for (const auto *op : *circle_subg->operators())
index dafd84c..b66b32e 100644 (file)
@@ -24,4 +24,4 @@ target_link_libraries(test_onert_frontend_nnapi PRIVATE ${LIB_ONERT} dl)
 target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest)
 target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest_main)
 
-install(TARGETS test_onert_frontend_nnapi DESTINATION unittest_standalone)
+install(TARGETS test_onert_frontend_nnapi DESTINATION unittest)
index 871c040..2c56f06 100644 (file)
@@ -58,7 +58,7 @@ int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
     return ANEURALNETWORKS_UNEXPECTED_NULL;
   }
 
-  if (compilation->state() != ::onert::compiler::State::CREATED)
+  if (compilation->isFinished())
   {
     VERBOSE(NNAPI::Compilation) << "finish: Already finished" << std::endl;
     return ANEURALNETWORKS_BAD_STATE;
@@ -87,7 +87,7 @@ int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compila
     return ANEURALNETWORKS_UNEXPECTED_NULL;
   }
 
-  if (compilation->state() != ::onert::compiler::State::CREATED)
+  if (compilation->isFinished())
   {
     VERBOSE(NNAPI::Compilation) << "setPreference: Already finished" << std::endl;
     return ANEURALNETWORKS_BAD_STATE;
index 19636a8..4e1a985 100644 (file)
@@ -37,7 +37,7 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
     return ANEURALNETWORKS_UNEXPECTED_NULL;
   }
 
-  std::shared_ptr<onert::exec::Executors> executors;
+  std::shared_ptr<onert::exec::IExecutors> executors;
 
   compilation->publish(executors);
 
index bb247b9..3b5edc1 100644 (file)
@@ -26,9 +26,7 @@ ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksMode
     _compiler{std::make_shared<compiler::Compiler>(_model, *_coptions)}
 {
   if (model->allowedToFp16())
-  {
-    _compiler->enableToFp16();
-  }
+    _coptions->enableToFp16();
 }
 
 bool ANeuralNetworksCompilation::finish() noexcept
@@ -36,6 +34,7 @@ bool ANeuralNetworksCompilation::finish() noexcept
   try
   {
     _artifact = _compiler->compile();
+    _compiler = nullptr;
   }
   catch (const std::exception &e)
   {
index dff5c6d..3898f1d 100644 (file)
@@ -22,7 +22,7 @@
 #include "compiler/Compiler.h"
 #include "ir/Graph.h"
 #include "ir/Model.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
 #include "util/TracingCtx.h"
 
 struct ANeuralNetworksCompilation
@@ -32,9 +32,9 @@ public:
 
 public:
   bool finish() noexcept;
+  bool isFinished() noexcept { return _compiler == nullptr; }
 
-  onert::compiler::State state(void) noexcept { return _compiler->state(); }
-  void publish(std::shared_ptr<onert::exec::Executors> &executors) noexcept
+  void publish(std::shared_ptr<onert::exec::IExecutors> &executors) noexcept
   {
     executors = _artifact ? _artifact->_executors : nullptr;
   }
index 110c7cd..6fbc4c2 100644 (file)
@@ -26,7 +26,7 @@
 struct ANeuralNetworksExecution
 {
 public:
-  ANeuralNetworksExecution(const std::shared_ptr<onert::exec::Executors> &executors)
+  ANeuralNetworksExecution(const std::shared_ptr<onert::exec::IExecutors> &executors)
     : _execution{std::make_shared<onert::exec::Execution>(executors)}
   {
     // DO NOTHING
index fe69e4e..dc85646 100644 (file)
@@ -99,13 +99,13 @@ private:
     for (const std::int32_t input_ind : *tflite_subg->inputs())
     {
       subg->addInput(tensorIdxToOperandIdx(input_ind),
-                     _tensor_names->at(_tensor_to_operand[input_ind]));
+                     _tensor_names.at(_tensor_to_operand[input_ind]));
     }
     // Set outputs
     for (const std::int32_t output_ind : *tflite_subg->outputs())
     {
       subg->addOutput(tensorIdxToOperandIdx(output_ind),
-                      _tensor_names->at(_tensor_to_operand[output_ind]));
+                      _tensor_names.at(_tensor_to_operand[output_ind]));
     }
     // Create operations
     for (const auto *op : *tflite_subg->operators())
@@ -113,7 +113,6 @@ private:
       loadOperation(op, *subg);
     }
 
-    subg->setTensorName(_tensor_names);
     subg->verify();
 
     return subg;
index 8cf5164..9cf433d 100644 (file)
@@ -2,20 +2,41 @@ if(NOT BUILD_NPUD)
   return()
 endif(NOT BUILD_NPUD)
 
-nnfw_find_package(GLib2.0 REQUIRED)
+set(NPUD_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})
 
-file(GLOB_RECURSE SOURCES "*.cc")
+nnfw_find_package(Gio2.0 REQUIRED)
+nnfw_find_package(Giounix2.0 REQUIRED)
 
-add_executable(npud ${SOURCES})
-set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX)
-target_include_directories(npud PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_include_directories(npud PUBLIC ${GLIB2.0_INCLUDE_DIRS})
-target_link_libraries(npud PRIVATE nnfw_lib_misc)
-target_link_libraries(npud PRIVATE ${GLIB2.0_LIBRARIES})
-target_link_libraries(npud PRIVATE ${LIB_PTHREAD})
+find_program(GDBUS_CODEGEN NAMES gdbus-codegen)
+if (NOT GDBUS_CODEGEN)
+    message(SEND_ERROR "Could not find gdbus-codegen")
+endif(NOT GDBUS_CODEGEN)
 
-if(ENVVAR_NPUD_CONFIG)
-  target_compile_definitions(npud PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
-endif(ENVVAR_NPUD_CONFIG)
+set(DBUS_INCLUDE_DIRS "${CMAKE_CURRENT_BINARY_DIR}")
+set(DBUS_INTERFACE "org.tizen.npud")
+set(DBUS_NAMESPACE "Npud")
+set(DBUS_INTROSPECTION_XML "org.tizen.npud.xml")
+set(DBUS_CORE "dbus-core")
+set(DBUS_CORE_SOURCE "${DBUS_CORE}.c")
+set(DBUS_CONFIG_FILE "org.tizen.npud.conf")
 
-install(TARGETS npud DESTINATION bin)
+add_custom_command(OUTPUT ${DBUS_CORE_SOURCE}
+  COMMAND ${GDBUS_CODEGEN}
+  --generate-c-code ${DBUS_CORE}
+  --interface-prefix ${DBUS_INTERFACE}
+  --c-namespace ${DBUS_NAMESPACE}
+  ${CMAKE_CURRENT_SOURCE_DIR}/${DBUS_INTROSPECTION_XML}
+  DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${DBUS_INTROSPECTION_XML})
+
+add_library(npud_dbus STATIC ${DBUS_CORE_SOURCE})
+
+target_include_directories(npud_dbus PUBLIC ${GIO2.0_INCLUDE_DIRS})
+target_include_directories(npud_dbus PUBLIC ${GIO_UNIX_2.0_INCLUDE_DIRS})
+target_link_libraries(npud_dbus PRIVATE ${GIO2.0_LIBRARIES})
+target_link_libraries(npud_dbus PRIVATE ${GIO_UNIX_2.0_LIBRARIES})
+
+install(FILES ${DBUS_CONFIG_FILE} DESTINATION share)
+
+add_subdirectory(core)
+add_subdirectory(tests)
+add_subdirectory(backend)
diff --git a/runtime/service/npud/backend/CMakeLists.txt b/runtime/service/npud/backend/CMakeLists.txt
new file mode 100644 (file)
index 0000000..53e3d7a
--- /dev/null
@@ -0,0 +1,2 @@
+# Backends
+add_subdirectory(trix)
diff --git a/runtime/service/npud/backend/trix/CMakeLists.txt b/runtime/service/npud/backend/trix/CMakeLists.txt
new file mode 100644 (file)
index 0000000..fa7aaa9
--- /dev/null
@@ -0,0 +1,19 @@
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
+
+if(NOT TRIXEngine_FOUND)
+  return()
+endif(NOT TRIXEngine_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(npud_backend_trix SHARED ${SOURCES})
+
+target_include_directories(npud_backend_trix PUBLIC ${NPUD_INCLUDE_DIRS})
+target_link_libraries(npud_backend_trix PRIVATE nnfw_lib_misc)
+target_link_libraries(npud_backend_trix PRIVATE trix_engine)
+
+if(ENVVAR_NPUD_CONFIG)
+  target_compile_definitions(npud_backend_trix PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+install(TARGETS npud_backend_trix DESTINATION lib)
diff --git a/runtime/service/npud/backend/trix/TrixBackend.cc b/runtime/service/npud/backend/trix/TrixBackend.cc
new file mode 100644 (file)
index 0000000..58264e6
--- /dev/null
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrixBackend.h"
+
+#include <algorithm>
+
+#if defined(__linux__)
+extern "C" {
+using namespace ::npud::backend::trix;
+
+TrixBackend *allocate() { return new TrixBackend(); }
+
+void deallocate(TrixBackend *trix) { delete trix; }
+}
+#endif
+
+namespace npud
+{
+namespace backend
+{
+namespace trix
+{
+
+TrixBackend::TrixBackend() : _devType(NPUCOND_TRIV2_CONN_SOCIP)
+{
+  auto coreNum = getnumNPUdeviceByType(_devType);
+  if (coreNum <= 0)
+  {
+    return;
+  }
+
+  std::vector<npudev_h> handles;
+  for (int i = 0; i < coreNum; ++i)
+  {
+    npudev_h handle;
+    if (getNPUdeviceByType(&handle, _devType, i) < 0)
+    {
+      // Note Run for all cores.
+      continue;
+    }
+    handles.emplace_back(handle);
+  }
+
+  if (handles.size() == 0)
+  {
+    return;
+  }
+
+  _dev = std::make_unique<TrixDevice>();
+  _dev->handles = std::move(handles);
+}
+
+TrixBackend::~TrixBackend()
+{
+  for (const auto &ctx : _dev->ctxs)
+  {
+    npudev_h handle = _dev->handles.at(ctx->defaultCore);
+    for (const auto id : ctx->requests)
+    {
+      removeNPU_request(handle, id);
+    }
+  }
+
+  for (const auto &handle : _dev->handles)
+  {
+    unregisterNPUmodel_all(handle);
+    putNPUdevice(handle);
+  }
+}
+
+NpuStatus TrixBackend::getVersion(std::string &version)
+{
+  // TODO Implement details
+  return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::createContext(int deviceId, int priority, NpuContext **ctx)
+{
+  if (deviceId >= _dev->handles.size())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+  auto context = std::make_unique<NpuContext>();
+  context->defaultCore = deviceId;
+  // TODO Consider priority.
+  *ctx = context.get();
+  _dev->ctxs.emplace_back(std::move(context));
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::destroyContext(NpuContext *ctx)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto citer = std::find_if(_dev->ctxs.begin(), _dev->ctxs.end(),
+                            [&](std::unique_ptr<NpuContext> &c) { return c.get() == ctx; });
+  if (citer == _dev->ctxs.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+
+  for (auto rid : ctx->requests)
+  {
+    if (removeNPU_request(handle, rid) < 0)
+    {
+      return NPU_STATUS_ERROR_OPERATION_FAILED;
+    }
+    _dev->requests.erase(rid);
+  }
+
+  for (auto mid : ctx->models)
+  {
+    auto &minfo = _dev->models.at(mid);
+    if (--minfo->refCount == 0)
+    {
+      if (unregisterNPUmodel(handle, mid) < 0)
+      {
+        return NPU_STATUS_ERROR_OPERATION_FAILED;
+      }
+      _dev->models.erase(mid);
+    }
+  }
+
+  _dev->ctxs.erase(citer);
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::createBuffer(NpuContext *ctx, GenericBuffer *buffer)
+{
+  // TODO Implement details
+  return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::destroyBuffer(NpuContext *ctx, GenericBuffer *buffer)
+{
+  // TODO Implement details
+  return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::registerModel(NpuContext *ctx, const std::string &modelPath,
+                                     ModelID *modelId)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  ModelID id = 0;
+  auto iter =
+    std::find_if(_dev->models.begin(), _dev->models.end(),
+                 [&](const std::pair<const ModelID, std::unique_ptr<TrixModelInfo>> &p) {
+                   return p.second->core == ctx->defaultCore && p.second->path == modelPath;
+                 });
+  // Already registered model.
+  if (iter != _dev->models.end())
+  {
+    _dev->models.at(iter->first)->refCount++;
+    ctx->models.emplace_back(iter->first);
+  }
+  else
+  {
+    auto meta = getNPUmodel_metadata(modelPath.c_str(), false);
+    if (meta == nullptr)
+    {
+      return NPU_STATUS_ERROR_OPERATION_FAILED;
+    }
+
+    generic_buffer fileInfo;
+    fileInfo.type = BUFFER_FILE;
+    fileInfo.filepath = modelPath.c_str();
+    fileInfo.size = meta->size;
+
+    npudev_h handle = _dev->handles.at(ctx->defaultCore);
+    if (registerNPUmodel(handle, &fileInfo, &id) < 0)
+    {
+      return NPU_STATUS_ERROR_OPERATION_FAILED;
+    }
+
+    _dev->models.insert(std::make_pair(id, std::unique_ptr<TrixModelInfo>(new TrixModelInfo{
+                                             id, modelPath, ctx->defaultCore, meta, 1})));
+    ctx->models.emplace_back(id);
+  }
+
+  *modelId = id;
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::unregisterModel(NpuContext *ctx, ModelID modelId)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto miter = std::find(ctx->models.begin(), ctx->models.end(), modelId);
+  if (miter == ctx->models.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_MODEL;
+  }
+
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+
+  for (auto riter = ctx->requests.begin(); riter != ctx->requests.end();)
+  {
+    auto &rinfo = _dev->requests.at(*riter);
+    if (rinfo->modelId == modelId)
+    {
+      if (removeNPU_request(handle, rinfo->id) < 0)
+      {
+        return NPU_STATUS_ERROR_OPERATION_FAILED;
+      }
+      _dev->requests.erase(rinfo->id);
+      riter = ctx->requests.erase(riter);
+    }
+    else
+    {
+      ++riter;
+    }
+  }
+
+  auto &minfo = _dev->models.at(modelId);
+  if (--minfo->refCount == 0)
+  {
+    if (unregisterNPUmodel(handle, modelId) < 0)
+    {
+      return NPU_STATUS_ERROR_OPERATION_FAILED;
+    }
+    _dev->models.erase(modelId);
+  }
+
+  ctx->models.erase(miter);
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto miter = std::find(ctx->models.begin(), ctx->models.end(), modelId);
+  if (miter == ctx->models.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_MODEL;
+  }
+
+  int id = 0;
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+  if (createNPU_request(handle, modelId, &id) < 0)
+  {
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+
+  _dev->requests.insert(std::make_pair(id, std::unique_ptr<TrixRequestInfo>(new TrixRequestInfo{
+                                             static_cast<RequestID>(id), modelId})));
+  ctx->requests.emplace_back(id);
+
+  *requestId = id;
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::destroyRequest(NpuContext *ctx, RequestID requestId)
+{
+  if (ctx == nullptr)
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto riter = std::find(ctx->requests.begin(), ctx->requests.end(), requestId);
+  if (riter == ctx->requests.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+  if (removeNPU_request(handle, requestId) < 0)
+  {
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+
+  _dev->requests.erase(requestId);
+  ctx->requests.erase(riter);
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+                                      TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+                                      TensorDataInfos *outputInfos)
+{
+  auto citer = std::find_if(_dev->ctxs.begin(), _dev->ctxs.end(),
+                            [&](std::unique_ptr<NpuContext> &c) { return c.get() == ctx; });
+  if (citer == _dev->ctxs.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto riter = std::find(ctx->requests.begin(), ctx->requests.end(), requestId);
+  if (riter == ctx->requests.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+  }
+
+  auto &req = _dev->requests.at(requestId);
+  auto miter = std::find(ctx->models.begin(), ctx->models.end(), req->modelId);
+  if (miter == ctx->models.end())
+  {
+    return NPU_STATUS_ERROR_INVALID_MODEL;
+  }
+
+  // TODO Exception controll of `at`
+  auto &minfo = _dev->models.at(req->modelId);
+  if (minfo->meta->input_seg_num != inputBufs->numBuffers ||
+      minfo->meta->output_seg_num != outputBufs->numBuffers)
+  {
+    return NPU_STATUS_ERROR_INVALID_DATA;
+  }
+
+  auto &inInfos = req->inInfos;
+  auto &outInfos = req->outInfos;
+
+  inInfos->num_info = inputBufs->numBuffers;
+  for (auto i = 0; i < inInfos->num_info; ++i)
+  {
+    inInfos->info[i].layout = DATA_LAYOUT_MODEL;
+    inInfos->info[i].type = minfo->meta->input_seg_quant_type[i];
+  }
+
+  outInfos->num_info = outputBufs->numBuffers;
+  for (auto i = 0; i < outInfos->num_info; ++i)
+  {
+    outInfos->info[i].layout = DATA_LAYOUT_MODEL;
+    outInfos->info[i].type = minfo->meta->output_seg_quant_type[i];
+  }
+
+  auto &inBufs = req->inBufs;
+  auto &outBufs = req->outBufs;
+
+  inBufs->num_buffers = inputBufs->numBuffers;
+  for (auto i = 0; i < inBufs->num_buffers; ++i)
+  {
+    if (inputBufs->buffers[i].type == NPU_BUFFER_MAPPED)
+    {
+      inBufs->bufs[i].addr = inputBufs->buffers[i].addr;
+    }
+    else if (inputBufs->buffers[i].type == NPU_BUFFER_DMABUF)
+    {
+      // TODO Implement details
+      // inBufs.bufs[i].dmabuf = inputBufs->buffers[i].dmabuf;
+      // inBufs.bufs[i].offset = inputBufs->buffers[i].offset;
+    }
+    else
+    {
+      continue;
+    }
+    inBufs->bufs[i].size = inputBufs->buffers[i].size;
+    inBufs->bufs[i].type = static_cast<buffer_types>(inputBufs->buffers[i].type);
+  }
+
+  outBufs->num_buffers = outputBufs->numBuffers;
+  for (auto i = 0; i < outBufs->num_buffers; ++i)
+  {
+    if (outputBufs->buffers[i].type == NPU_BUFFER_MAPPED)
+    {
+      outBufs->bufs[i].addr = outputBufs->buffers[i].addr;
+    }
+    else if (outputBufs->buffers[i].type == NPU_BUFFER_DMABUF)
+    {
+      // TODO Implement details
+      // outBufs.bufs[i].dmabuf = outputBufs->buffers[i].dmabuf;
+      // outBufs.bufs[i].offset = outputBufs->buffers[i].offset;
+    }
+    else
+    {
+      continue;
+    }
+    outBufs->bufs[i].size = outputBufs->buffers[i].size;
+    outBufs->bufs[i].type = static_cast<buffer_types>(outputBufs->buffers[i].type);
+  }
+
+  npudev_h handle = _dev->handles.at(ctx->defaultCore);
+  if (setNPU_requestData(handle, requestId, inBufs.get(), inInfos.get(), outBufs.get(),
+                         outInfos.get()) < 0)
+  {
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+
+  return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::submitRequest(NpuContext *ctx, RequestID requestId)
+{
+  // TODO Implement details
+  return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace npud
diff --git a/runtime/service/npud/backend/trix/TrixBackend.h b/runtime/service/npud/backend/trix/TrixBackend.h
new file mode 100644 (file)
index 0000000..0920d9a
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
+#define __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
+
+#include <core/Backend.h>
+#include <libnpuhost.h>
+#include <memory>
+#include <vector>
+#include <map>
+
+namespace npud
+{
+namespace backend
+{
+namespace trix
+{
+
+using namespace ::npud::core;
+
+using Handle = void *;
+
+/**
+ * @brief Trix model information.
+ *
+ * @param id The model identifier.
+ * @param path The model path.
+ * @param core The core number where the model is registered.
+ * @param meta The meta data of model.
+ * @param refCount The reference count of model users.
+ */
+struct TrixModelInfo
+{
+  ModelID id;
+  std::string path;
+  int core;
+  npubin_meta *meta;
+  int refCount;
+
+  TrixModelInfo() : meta(nullptr), refCount(0) {}
+  TrixModelInfo(ModelID _id, const std::string &_path, int _core, npubin_meta *_meta, int _refCount)
+    : id(_id), path(_path), core(_core), meta(_meta), refCount(_refCount)
+  {
+  }
+  ~TrixModelInfo() { free(meta); }
+};
+
+/**
+ * @brief Trix request information
+ *
+ * @param id The request identifier.
+ * @param modelId The model id of request.
+ */
+struct TrixRequestInfo
+{
+  RequestID id;
+  ModelID modelId;
+  std::unique_ptr<input_buffers> inBufs;
+  std::unique_ptr<tensors_data_info> inInfos;
+  std::unique_ptr<output_buffers> outBufs;
+  std::unique_ptr<tensors_data_info> outInfos;
+  TrixRequestInfo(RequestID _id, ModelID _mid)
+    : id(_id), modelId(_mid), inBufs(std::make_unique<input_buffers>()),
+      inInfos(std::make_unique<tensors_data_info>()), outBufs(std::make_unique<output_buffers>()),
+      outInfos(std::make_unique<tensors_data_info>())
+  {
+  }
+};
+
+/**
+ * @brief Trix device information
+ *
+ * @param handles The device handle list.
+ * @param ctxs The NpuContext list.
+ * @param models The model map.
+ * @param requests The request map.
+ */
+struct TrixDevice
+{
+  std::vector<Handle> handles;
+  std::vector<std::unique_ptr<NpuContext>> ctxs;
+  std::map<ModelID, std::unique_ptr<TrixModelInfo>> models;
+  std::map<RequestID, std::unique_ptr<TrixRequestInfo>> requests;
+};
+
+class TrixBackend : public Backend
+{
+public:
+  TrixBackend();
+  ~TrixBackend();
+
+  NpuStatus getVersion(std::string &version) override;
+  NpuStatus createContext(int deviceId, int priority, NpuContext **ctx) override;
+  NpuStatus destroyContext(NpuContext *ctx) override;
+  NpuStatus createBuffer(NpuContext *ctx, GenericBuffer *buffer) override;
+  NpuStatus destroyBuffer(NpuContext *ctx, GenericBuffer *buffer) override;
+  // TODO Support to register model from buffer
+  NpuStatus registerModel(NpuContext *ctx, const std::string &modelPath, ModelID *modelId) override;
+  NpuStatus unregisterModel(NpuContext *ctx, ModelID modelId) override;
+  NpuStatus createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId) override;
+  NpuStatus destroyRequest(NpuContext *ctx, RequestID requestId) override;
+  NpuStatus setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+                           TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+                           TensorDataInfos *outputInfos) override;
+  NpuStatus submitRequest(NpuContext *ctx, RequestID requestId) override;
+
+private:
+  dev_type _devType;
+  std::unique_ptr<TrixDevice> _dev;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
diff --git a/runtime/service/npud/core/Backend.h b/runtime/service/npud/core/Backend.h
new file mode 100644 (file)
index 0000000..5bc13b6
--- /dev/null
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_BACKEND_H__
+#define __ONE_SERVICE_NPUD_CORE_BACKEND_H__
+
+#include "ir/Layout.h"
+#include "ir/DataType.h"
+
+#include <string>
+#include <vector>
+
+namespace npud
+{
+namespace core
+{
+
+#define NPU_TENSOR_MAX (16)
+
+/**
+ * @brief Npu model ID.
+ *
+ */
+using ModelID = uint32_t;
+
+/**
+ * @brief Npu request ID.
+ *
+ */
+using RequestID = uint32_t;
+
+/**
+ * @brief Npu buffer type
+ *
+ */
+enum BufferTypes
+{
+  NPU_BUFFER_MAPPED,   /**< buffer is a memory-mapped ptr */
+  NPU_BUFFER_DMABUF,   /**< buffer is a dmabuf fd, representing contiguous memory */
+  NPU_BUFFER_UNDEFINED /**< buffer type is undefined */
+};
+
+/**
+ * @brief Various kinds of buffer supported for input/output/model.
+ *
+ */
+struct GenericBuffer
+{
+  struct
+  {             /** NPU_BUFFER_MAPPED/DMABUF */
+    void *addr; /**< Mapped address of the buffer */
+    struct
+    {                  /** NPU_BUFFER_DMABUF only */
+      int dmabuf;      /**< The dma-buf fd handle of the memory allocated */
+      uint64_t offset; /**< Offset to be applied to the base memory address */
+    };
+  };
+  uint64_t size;    /**< The size of the buffer in bytes */
+  BufferTypes type; /**< Type of memory in this buffer */
+};
+
+/**
+ * @brief Npu generic buffer array.
+ *
+ */
+struct GenericBuffers
+{
+  uint32_t numBuffers;
+  GenericBuffer buffers[NPU_TENSOR_MAX];
+};
+
+/**
+ * @brief Npu input/output buffers are compotible with GenericBuffers.
+ *
+ */
+typedef GenericBuffers InputBuffers;
+typedef GenericBuffers OutputBuffers;
+
+/**
+ * @brief Npu tensor data info description.
+ *
+ */
+struct TensorDataInfo
+{
+  ir::Layout layout;
+  ir::DataType type;
+};
+
+/**
+ * @brief Npu tensor data info array.
+ *
+ */
+struct TensorDataInfos
+{
+  uint32_t numInfos;
+  TensorDataInfo infos[NPU_TENSOR_MAX];
+};
+
+/**
+ * @brief Npu error status.
+ *
+ */
+enum NpuStatus
+{
+  NPU_STATUS_SUCCESS = 0,
+  NPU_STATUS_ERROR_OPERATION_FAILED,
+  NPU_STATUS_ERROR_NOT_SUPPORTED,
+  NPU_STATUS_ERROR_INVALID_ARGUMENT,
+  NPU_STATUS_ERROR_INVALID_MODEL,
+  NPU_STATUS_ERROR_INVALID_DATA,
+};
+
+/**
+ * @brief Npu context definition
+ *
+ * @param models The model lists.
+ * @param requests The request lists.
+ * @param defaultCore The core number to be used by default.
+ */
+struct NpuContext
+{
+  std::vector<ModelID> models;
+  std::vector<RequestID> requests;
+  int defaultCore;
+};
+
+/**
+ * @brief Npu backend interface
+ *
+ * Backend module should implement this Backend interface.
+ * Npu daemon will load this class symbol at runtime.
+ */
+class Backend
+{
+public:
+  virtual ~Backend() = default;
+
+  virtual NpuStatus getVersion(std::string &version) = 0;
+  virtual NpuStatus createContext(int deviceId, int priority, NpuContext **ctx) = 0;
+  virtual NpuStatus destroyContext(NpuContext *ctx) = 0;
+  virtual NpuStatus createBuffer(NpuContext *ctx, GenericBuffer *buffer) = 0;
+  virtual NpuStatus destroyBuffer(NpuContext *ctx, GenericBuffer *buffer) = 0;
+  // TODO Support to register model from buffer
+  virtual NpuStatus registerModel(NpuContext *ctx, const std::string &modelPath,
+                                  ModelID *modelId) = 0;
+  virtual NpuStatus unregisterModel(NpuContext *ctx, ModelID modelId) = 0;
+  virtual NpuStatus createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId) = 0;
+  virtual NpuStatus destroyRequest(NpuContext *ctx, RequestID requestId) = 0;
+  virtual NpuStatus setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+                                   TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+                                   TensorDataInfos *outputInfos) = 0;
+  virtual NpuStatus submitRequest(NpuContext *ctx, RequestID requestId) = 0;
+};
+
+typedef Backend *(*NpuAlloc)();
+typedef void (*NpuDealloc)(Backend *);
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_BACKEND_H__
diff --git a/runtime/service/npud/core/CMakeLists.txt b/runtime/service/npud/core/CMakeLists.txt
new file mode 100644 (file)
index 0000000..31341c6
--- /dev/null
@@ -0,0 +1,29 @@
+nnfw_find_package(GLib2.0 REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+file(GLOB_RECURSE MAIN_SOURCE_FILE "main.cc")
+list(REMOVE_ITEM SOURCES ${MAIN_SOURCE_FILE})
+
+add_library(npud_core STATIC ${SOURCES})
+
+set_target_properties(npud_core PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(npud_core PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_include_directories(npud_core PUBLIC ${DBUS_INCLUDE_DIRS})
+target_link_libraries(npud_core PRIVATE nnfw_lib_misc)
+target_link_libraries(npud_core PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud_core PRIVATE ${LIB_PTHREAD})
+target_link_libraries(npud_core PRIVATE dl)
+target_link_libraries(npud_core PRIVATE npud_dbus)
+
+if(ENVVAR_NPUD_CONFIG)
+  target_compile_definitions(npud_core PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+# npud executable
+add_executable(npud ${MAIN_SOURCE_FILE})
+
+set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX)
+target_link_libraries(npud PRIVATE npud_core)
+
+install(TARGETS npud DESTINATION bin)
diff --git a/runtime/service/npud/core/ContextManager.cc b/runtime/service/npud/core/ContextManager.cc
new file mode 100644 (file)
index 0000000..9ceb541
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ContextManager.h"
+
+#include <algorithm>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+ContextManager::ContextManager() noexcept {}
+
+ContextManager::~ContextManager() noexcept { _contexts.clear(); }
+
+void ContextManager::newContext(NpuContext *npuContext, ContextID *contextId)
+{
+  auto context = std::make_unique<Context>();
+  // TODO Consider the possibility of reusing the same address.
+  context->contextId = reinterpret_cast<ContextID>(context.get());
+  context->npuContext = npuContext;
+  *contextId = context->contextId;
+  _contexts.emplace_back(std::move(context));
+
+  this->listContexts();
+}
+
+void ContextManager::deleteContext(ContextID contextId)
+{
+  const auto iter =
+    std::remove_if(_contexts.begin(), _contexts.end(),
+                   [&](std::unique_ptr<Context> &c) { return c->contextId == contextId; });
+  if (iter == _contexts.end())
+  {
+    return;
+  }
+
+  _contexts.erase(iter, _contexts.end());
+
+  this->listContexts();
+}
+
+void ContextManager::listContexts()
+{
+#ifdef DEBUG
+  VERBOSE(ContextManager) << "Size: " << _contexts.size() << std::endl;
+  for (const auto &context : _contexts)
+  {
+    VERBOSE(ContextManager) << "==========================" << std::endl;
+    VERBOSE(ContextManager) << "contextId: " << context->contextId << std::endl;
+  }
+  VERBOSE(ContextManager) << "==========================" << std::endl;
+#endif
+}
+
+const std::vector<std::unique_ptr<Context>>::iterator
+ContextManager::getContext(ContextID contextId)
+{
+  const auto iter =
+    std::find_if(_contexts.begin(), _contexts.end(),
+                 [&](std::unique_ptr<Context> &c) { return c->contextId == contextId; });
+  return iter;
+}
+
+NpuContext *ContextManager::getNpuContext(ContextID contextId)
+{
+  const auto iter = getContext(contextId);
+  if (iter == _contexts.end())
+  {
+    return nullptr;
+  }
+
+  return iter->get()->npuContext;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/ContextManager.h b/runtime/service/npud/core/ContextManager.h
new file mode 100644 (file)
index 0000000..c548624
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
+#define __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
+
+#include "Backend.h"
+
+#include <vector>
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+using ContextID = uint64_t;
+struct Context
+{
+  // TODO Describe the variables
+  ContextID contextId;
+  NpuContext *npuContext;
+};
+
+class ContextManager
+{
+public:
+  ContextManager() noexcept;
+  ~ContextManager() noexcept;
+
+  ContextManager(const ContextManager &) = delete;
+  ContextManager &operator=(const ContextManager &) = delete;
+
+  void newContext(NpuContext *npuContext, ContextID *contextId);
+  void deleteContext(ContextID contextId);
+  const std::vector<std::unique_ptr<Context>>::iterator getContext(ContextID contextId);
+  NpuContext *getNpuContext(ContextID contextId);
+
+private:
+  void listContexts(void);
+
+private:
+  std::vector<std::unique_ptr<Context>> _contexts;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
diff --git a/runtime/service/npud/core/Core.cc b/runtime/service/npud/core/Core.cc
new file mode 100644 (file)
index 0000000..6d010ee
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Core.h"
+#include "util/Logging.h"
+
+namespace npud
+{
+namespace core
+{
+
+Core::Core() noexcept
+  : _devManager(std::make_unique<DevManager>()), _contextManager(std::make_unique<ContextManager>())
+{
+}
+
+void Core::init() { _devManager->loadModules(); }
+
+void Core::deinit() { _devManager->releaseModules(); }
+
+int Core::getAvailableDeviceList(std::vector<std::string> &list) const { return 0; }
+
+int Core::createContext(int deviceId, int priority, ContextID *contextId) const
+{
+  VERBOSE(Core) << "createContext with " << deviceId << ", " << priority << std::endl;
+  NpuContext *npuContext;
+  int ret = _devManager->createContext(deviceId, priority, &npuContext);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Fail to create dev context" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  ContextID _contextId;
+  _contextManager->newContext(npuContext, &_contextId);
+  *contextId = _contextId;
+  return 0;
+}
+
+int Core::destroyContext(ContextID contextId) const
+{
+  VERBOSE(Core) << "destroyContext with " << contextId << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  int ret = _devManager->destroyContext(npuContext);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to destroy npu context: " << ret << std::endl;
+    return 1;
+  }
+
+  _contextManager->deleteContext(contextId);
+  return 0;
+}
+
+int Core::createNetwork(ContextID contextId, const std::string &modelPath, ModelID *modelId) const
+{
+  VERBOSE(Core) << "createNetwork with " << contextId << ", " << modelPath << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  ModelID id;
+  int ret = _devManager->registerModel(npuContext, modelPath, &id);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to register model: " << ret << std::endl;
+    return 1;
+  }
+
+  *modelId = id;
+  return 0;
+}
+
+int Core::destroyNetwork(ContextID contextId, ModelID modelId) const
+{
+  VERBOSE(Core) << "destroyNetwork with " << contextId << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  int ret = _devManager->unregisterModel(npuContext, modelId);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to unregister model: " << ret << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  return 0;
+}
+
+int Core::createRequest(ContextID contextId, ModelID modelId, RequestID *requestId) const
+{
+  VERBOSE(Core) << "createRequest with " << contextId << ", " << modelId << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  RequestID id;
+  int ret = _devManager->createRequest(npuContext, modelId, &id);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to create request of model: " << ret << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  *requestId = id;
+  return 0;
+}
+
+int Core::destroyRequest(ContextID contextId, RequestID requestId) const
+{
+  VERBOSE(Core) << "destroyRequest with " << contextId << ", " << requestId << std::endl;
+  NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+  if (!npuContext)
+  {
+    VERBOSE(Core) << "Invalid context id" << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  int ret = _devManager->destroyRequest(npuContext, requestId);
+  if (ret != NPU_STATUS_SUCCESS)
+  {
+    VERBOSE(Core) << "Failed to destroy request: " << ret << std::endl;
+    // TODO Define CoreStatus
+    return 1;
+  }
+
+  return 0;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Core.h b/runtime/service/npud/core/Core.h
new file mode 100644 (file)
index 0000000..d702fcb
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_CORE_H__
+#define __ONE_SERVICE_NPUD_CORE_CORE_H__
+
+#include "DevManager.h"
+#include "ContextManager.h"
+
+#include <vector>
+#include <string>
+
+namespace npud
+{
+namespace core
+{
+
+// TODO Define error status
+
+class Core
+{
+public:
+  Core() noexcept;
+  ~Core() noexcept = default;
+
+  Core(const Core &) = delete;
+  Core &operator=(const Core &) = delete;
+
+  void init();
+  void deinit();
+
+  int getAvailableDeviceList(std::vector<std::string> &list) const;
+  int createContext(int deviceId, int priority, ContextID *contextId) const;
+  int destroyContext(ContextID contextId) const;
+  int createNetwork(ContextID contextId, const std::string &modelPath, ModelID *modelId) const;
+  int destroyNetwork(ContextID contextId, ModelID modelId) const;
+  int createRequest(ContextID contextId, ModelID modelId, RequestID *requestId) const;
+  int destroyRequest(ContextID contextId, RequestID requestId) const;
+
+private:
+  std::unique_ptr<DevManager> _devManager;
+  std::unique_ptr<ContextManager> _contextManager;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_CORE_H__
diff --git a/runtime/service/npud/core/DBus.cc b/runtime/service/npud/core/DBus.cc
new file mode 100644 (file)
index 0000000..074ef50
--- /dev/null
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DBus.h"
+#include "Server.h"
+
+#include <atomic>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+std::atomic_bool DBus::_isReady(false);
+
+DBus::DBus() noexcept
+{
+  VERBOSE(DBus) << "Starting dbus service" << std::endl;
+
+  _gdbus_id = g_bus_own_name(G_BUS_TYPE_SYSTEM, "org.tizen.npud", G_BUS_NAME_OWNER_FLAGS_NONE,
+                             (GBusAcquiredCallback)on_bus_acquired,
+                             (GBusNameAcquiredCallback)on_name_acquired,
+                             (GBusNameLostCallback)on_name_lost, NULL, NULL);
+}
+
+DBus::~DBus() noexcept
+{
+  VERBOSE(DBus) << "Stop dbus service" << std::endl;
+
+  g_bus_unown_name(_gdbus_id);
+}
+
+void DBus::on_bus_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+  VERBOSE(DBus) << name << " on bus acquired" << std::endl;
+
+  GError *error = NULL;
+  NpudCore *core = npud_core_skeleton_new();
+  NpudCoreIface *iface = NPUD_CORE_GET_IFACE(core);
+
+  iface->handle_device_get_available_list = &on_handle_device_get_available_list;
+  iface->handle_context_create = &on_handle_context_create;
+  iface->handle_context_destroy = &on_handle_context_destroy;
+  iface->handle_buffers_create = &on_handle_buffers_create;
+  iface->handle_buffers_destroy = &on_handle_buffers_destroy;
+  iface->handle_network_create = &on_handle_network_create;
+  iface->handle_network_destroy = &on_handle_network_destroy;
+  iface->handle_request_create = &on_handle_request_create;
+  iface->handle_request_destroy = &on_handle_request_destroy;
+  iface->handle_request_set_data = &on_handle_request_set_data;
+  iface->handle_execute_run = &on_handle_execute_run;
+
+  if (!g_dbus_interface_skeleton_export(G_DBUS_INTERFACE_SKELETON(core), conn, "/org/tizen/npud",
+                                        &error))
+  {
+    VERBOSE(DBus) << "Failed to export skeleton, Server will stop." << std::endl;
+    Server::instance().stop();
+  }
+
+  _isReady.exchange(true);
+}
+
+void DBus::on_name_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+  VERBOSE(DBus) << name << " on name acquired" << std::endl;
+}
+
+void DBus::on_name_lost(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+  VERBOSE(DBus) << name << " on name lost, Server will stop." << std::endl;
+  Server::instance().stop();
+}
+
+gboolean DBus::on_handle_device_get_available_list(NpudCore *object,
+                                                   GDBusMethodInvocation *invocation)
+{
+  VERBOSE(DBus) << __FUNCTION__ << std::endl;
+  std::vector<std::string> list;
+  int error = Server::instance().core().getAvailableDeviceList(list);
+  // TODO Implement variant outputs
+  npud_core_complete_device_get_available_list(object, invocation, error);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_context_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        gint arg_device_id, gint arg_priority)
+{
+  VERBOSE(DBus) << "on_handle_context_create with " << arg_device_id << ", " << arg_priority
+                << std::endl;
+
+  guint64 out_ctx = 0;
+  int ret = Server::instance().core().createContext(arg_device_id, arg_priority, &out_ctx);
+  npud_core_complete_context_create(object, invocation, out_ctx, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_context_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                         guint64 arg_ctx)
+{
+  VERBOSE(DBus) << "on_handle_context_destroy with " << arg_ctx << std::endl;
+  int ret = Server::instance().core().destroyContext(arg_ctx);
+  npud_core_complete_context_destroy(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_buffers_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        guint64 arg_ctx, GVariant *arg_buffers)
+{
+  VERBOSE(DBus) << "on_handle_buffers_create with " << arg_ctx << std::endl;
+  GenericBuffers bufs;
+  GVariantIter *iter = NULL;
+  gint32 type;
+  guint64 addr;
+  guint32 size;
+  int index = 0;
+  g_variant_get(arg_buffers, "a(itu)", &iter);
+  while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+  {
+    VERBOSE(DBus) << "in [" << index << "] Type: " << type << ", Addr: " << addr
+                  << ", Size: " << size << std::endl;
+    bufs.buffers[index].type = static_cast<BufferTypes>(type);
+    bufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+    bufs.buffers[index].size = size;
+    index++;
+  }
+  bufs.numBuffers = index;
+  g_variant_iter_free(iter);
+
+  // TODO Invoke Core function.
+  int ret = -1;
+
+  GVariantBuilder *builder = g_variant_builder_new(G_VARIANT_TYPE("a(itu)"));
+
+  // TODO Enable below code when we can update ret value by core function
+  // if (ret == 0)
+  // {
+  //   for (auto i = 0; i < bufs.numBuffers; ++i)
+  //   {
+  //     VERBOSE(DBus) << "out [" << index << "] Type: " << bufs.buffers[i].type
+  //                   << ", Addr: " << bufs.buffers[i].addr << ", Size: " << bufs.buffers[i].size
+  //                   << std::endl;
+  //     g_variant_builder_add(builder, "(itu)", bufs.buffers[i].type, bufs.buffers[i].addr,
+  //                           bufs.buffers[i].size);
+  //   }
+  // }
+  npud_core_complete_buffers_create(object, invocation, g_variant_builder_end(builder), ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_buffers_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                         guint64 arg_ctx, GVariant *arg_buffers)
+{
+  VERBOSE(DBus) << "on_handle_buffers_destroy with " << arg_ctx << std::endl;
+  GenericBuffers bufs;
+  GVariantIter *iter = NULL;
+  gint32 type;
+  guint64 addr;
+  guint32 size;
+  int index = 0;
+  g_variant_get(arg_buffers, "a(itu)", &iter);
+  while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+  {
+    VERBOSE(DBus) << "[" << index << "] Type: " << type << ", Addr: " << (void *)addr
+                  << ", Size: " << size << std::endl;
+    bufs.buffers[index].type = static_cast<BufferTypes>(type);
+    bufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+    bufs.buffers[index].size = size;
+    index++;
+  }
+  bufs.numBuffers = index;
+  g_variant_iter_free(iter);
+  // TODO Invoke Core function.
+  int ret = -1;
+  npud_core_complete_buffers_destroy(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_network_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        guint64 arg_ctx, const gchar *arg_model_path)
+{
+  VERBOSE(DBus) << "on_handle_network_create with " << arg_ctx << ", " << arg_model_path
+                << std::endl;
+  ModelID modelId = 0;
+  int ret = Server::instance().core().createNetwork(arg_ctx, arg_model_path, &modelId);
+  npud_core_complete_network_create(object, invocation, guint(modelId), ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_network_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                         guint64 arg_ctx, guint arg_nw_handle)
+{
+  VERBOSE(DBus) << "on_handle_network_destroy with " << arg_ctx << ", " << arg_nw_handle
+                << std::endl;
+  int ret = Server::instance().core().destroyNetwork(arg_ctx, arg_nw_handle);
+  npud_core_complete_network_destroy(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_request_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        guint64 arg_ctx, guint arg_nw_handle)
+{
+  VERBOSE(DBus) << "on_handle_request_create with " << arg_ctx << ", " << arg_nw_handle
+                << std::endl;
+  RequestID requestID = 0;
+  int ret = Server::instance().core().createRequest(arg_ctx, arg_nw_handle, &requestID);
+  npud_core_complete_request_create(object, invocation, guint(requestID), ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_request_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                         guint64 arg_ctx, guint arg_rq_handle)
+{
+  VERBOSE(DBus) << "on_handle_request_destroy with " << arg_ctx << ", " << arg_rq_handle
+                << std::endl;
+  int ret = Server::instance().core().destroyRequest(arg_ctx, arg_rq_handle);
+  npud_core_complete_request_destroy(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_request_set_data(NpudCore *object, GDBusMethodInvocation *invocation,
+                                          guint64 arg_ctx, guint arg_rq_handle,
+                                          GVariant *arg_input_buffers, GVariant *arg_output_buffers)
+{
+  VERBOSE(DBus) << "on_handle_request_set_data with " << arg_ctx << ", " << arg_rq_handle
+                << std::endl;
+  GVariantIter *iter = NULL;
+  InputBuffers inBufs;
+  OutputBuffers outBufs;
+  gint32 type;
+  guint64 addr;
+  guint32 size;
+  int index = 0;
+
+  // inBufs
+  g_variant_get(arg_input_buffers, "a(itu)", &iter);
+  index = 0;
+  while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+  {
+    VERBOSE(DBus) << "in [" << index << "] Type: " << type << ", Addr: " << (void *)addr
+                  << ", Size: " << size << std::endl;
+    if (type == 0) // NPU_BUFFER_MAPPED
+    {
+      inBufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+    }
+    else if (type == 1) // NPU_BUFFER_DMABUF
+    {
+      // TODO Support dma buffer
+      VERBOSE(DBus) << "[NYI] NPU_BUFFER_DMABUF" << std::endl;
+      continue;
+    }
+    else
+    {
+      VERBOSE(DBus) << "Wrong buffer type. Ignored." << std::endl;
+      continue;
+    }
+    inBufs.buffers[index].size = size;
+    inBufs.buffers[index].type = static_cast<BufferTypes>(type);
+    index++;
+  }
+  inBufs.numBuffers = index;
+  g_variant_iter_free(iter);
+
+  // outBufs
+  g_variant_get(arg_output_buffers, "a(itu)", &iter);
+  index = 0;
+  while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+  {
+    VERBOSE(DBus) << "out [" << index << "] Type: " << type << ", Addr: " << (void *)addr
+                  << ", Size: " << size << std::endl;
+    if (type == 0) // NPU_BUFFER_MAPPED
+    {
+      outBufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+    }
+    else if (type == 1) // NPU_BUFFER_DMABUF
+    {
+      // TODO Support dma buffer
+      VERBOSE(DBus) << "[NYI] NPU_BUFFER_DMABUF" << std::endl;
+      continue;
+    }
+    else
+    {
+      VERBOSE(DBus) << "Wrong buffer type. Ignored." << std::endl;
+      continue;
+    }
+    outBufs.buffers[index].size = size;
+    outBufs.buffers[index].type = static_cast<BufferTypes>(type);
+    index++;
+  }
+  outBufs.numBuffers = index;
+  g_variant_iter_free(iter);
+
+  // TODO Invoke Core function.
+  int ret = -1;
+  npud_core_complete_request_set_data(object, invocation, ret);
+  return TRUE;
+}
+
+gboolean DBus::on_handle_execute_run(NpudCore *object, GDBusMethodInvocation *invocation,
+                                     guint64 arg_ctx, guint arg_rq_handle)
+{
+  VERBOSE(DBus) << "on_handle_execute_run with " << arg_ctx << ", " << arg_rq_handle << std::endl;
+  // TODO Invoke Core function.
+  int ret = -1;
+  npud_core_complete_execute_run(object, invocation, ret);
+  return TRUE;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/DBus.h b/runtime/service/npud/core/DBus.h
new file mode 100644 (file)
index 0000000..8840f9c
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DBUS_H__
+#define __ONE_SERVICE_NPUD_CORE_DBUS_H__
+
+#include <dbus-core.h>
+#include <gio/gio.h>
+#include <memory>
+#include <atomic>
+
+namespace npud
+{
+namespace core
+{
+
+class DBus
+{
+public:
+  DBus() noexcept;
+  ~DBus() noexcept;
+
+  DBus(const DBus &) = delete;
+  DBus &operator=(const DBus &) = delete;
+
+  bool isReady() { return _isReady.load(); }
+
+  static void on_bus_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data);
+  static void on_name_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data);
+  static void on_name_lost(GDBusConnection *conn, const gchar *name, gpointer user_data);
+
+  static gboolean on_handle_device_get_available_list(NpudCore *object,
+                                                      GDBusMethodInvocation *invocation);
+  static gboolean on_handle_context_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                           gint arg_device_id, gint arg_priority);
+  static gboolean on_handle_context_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                            guint64 arg_ctx);
+  static gboolean on_handle_buffers_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                           guint64 arg_ctx, GVariant *arg_buffers);
+  static gboolean on_handle_buffers_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                            guint64 arg_ctx, GVariant *arg_buffers);
+  static gboolean on_handle_network_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                           guint64 arg_ctx, const gchar *arg_model_path);
+  static gboolean on_handle_network_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                            guint64 arg_ctx, guint arg_nw_handle);
+  static gboolean on_handle_request_create(NpudCore *object, GDBusMethodInvocation *invocation,
+                                           guint64 arg_ctx, guint arg_nw_handle);
+  static gboolean on_handle_request_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+                                            guint64 arg_ctx, guint arg_rq_handle);
+  static gboolean on_handle_request_set_data(NpudCore *object, GDBusMethodInvocation *invocation,
+                                             guint64 arg_ctx, guint arg_rq_handle,
+                                             GVariant *arg_input_buffers,
+                                             GVariant *arg_output_buffers);
+  static gboolean on_handle_execute_run(NpudCore *object, GDBusMethodInvocation *invocation,
+                                        guint64 arg_ctx, guint arg_rq_handle);
+
+private:
+  guint _gdbus_id;
+  static std::atomic_bool _isReady;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DBUS_H__
diff --git a/runtime/service/npud/core/DevManager.cc b/runtime/service/npud/core/DevManager.cc
new file mode 100644 (file)
index 0000000..0765eec
--- /dev/null
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DevManager.h"
+#include "util/Logging.h"
+
+#include <dirent.h>
+
+namespace npud
+{
+namespace core
+{
+
+DevManager::DevManager()
+{
+  const auto env = util::getConfigString(util::config::DEVICE_MODULE_PATH);
+  _module_dir = std::move(env);
+}
+
+void DevManager::loadModules()
+{
+  VERBOSE(DevManager) << "load modules from " << _module_dir << std::endl;
+
+  releaseModules();
+
+  DIR *dir;
+  struct dirent *entry;
+
+  // NOTE
+  // Return NULL(0) value when opendir or readdir error occurs.
+  // NULL should be used instead of nullptr.
+  dir = opendir(_module_dir.c_str());
+  if (dir == NULL)
+  {
+    VERBOSE(DevManager) << "Fail to open module directory" << std::endl;
+    return;
+  }
+
+  while ((entry = readdir(dir)) != NULL)
+  {
+    std::string modulePath(entry->d_name);
+    if (modulePath.find("npud_backend") == std::string::npos)
+    {
+      continue;
+    }
+
+    DynamicLoader *loader = nullptr;
+    try
+    {
+      loader = new DynamicLoader(modulePath.c_str());
+    }
+    catch (const std::exception &e)
+    {
+      VERBOSE(DevManager) << e.what() << std::endl;
+      continue;
+    }
+
+    std::unique_ptr<Device> dev = std::make_unique<Device>();
+    dev->modulePath = std::move(modulePath);
+    dev->loader = std::unique_ptr<DynamicLoader>(loader);
+
+    _dev = std::move(dev);
+    break;
+  }
+
+  closedir(dir);
+}
+
+void DevManager::releaseModules()
+{
+  if (_dev)
+  {
+    _dev.reset();
+  }
+}
+
+std::shared_ptr<Backend> DevManager::getBackend()
+{
+  if (!_dev)
+  {
+    throw std::runtime_error("No backend device.");
+  }
+  return _dev->loader->getInstance();
+}
+
+int DevManager::createContext(int deviceId, int priority, NpuContext **npuContext)
+{
+  try
+  {
+    return getBackend()->createContext(deviceId, priority, npuContext);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::destroyContext(NpuContext *npuContext)
+{
+  try
+  {
+    return getBackend()->destroyContext(npuContext);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::registerModel(NpuContext *npuContext, const std::string &modelPath,
+                              ModelID *modelId)
+{
+  try
+  {
+    return getBackend()->registerModel(npuContext, modelPath, modelId);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::unregisterModel(NpuContext *npuContext, ModelID modelId)
+{
+  try
+  {
+    return getBackend()->unregisterModel(npuContext, modelId);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::createRequest(NpuContext *npuContext, ModelID modelId, RequestID *requestId)
+{
+  try
+  {
+    return getBackend()->createRequest(npuContext, modelId, requestId);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+int DevManager::destroyRequest(NpuContext *npuContext, RequestID requestId)
+{
+  try
+  {
+    return getBackend()->destroyRequest(npuContext, requestId);
+  }
+  catch (const std::exception &e)
+  {
+    VERBOSE(DevManager) << e.what() << std::endl;
+    return NPU_STATUS_ERROR_OPERATION_FAILED;
+  }
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/DevManager.h b/runtime/service/npud/core/DevManager.h
new file mode 100644 (file)
index 0000000..7fbfe40
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
+#define __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
+
+#include "DynamicLoader.h"
+
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+struct Device
+{
+  std::string modulePath;
+  std::unique_ptr<DynamicLoader> loader;
+};
+
+class DevManager
+{
+public:
+  DevManager();
+  ~DevManager() = default;
+
+  DevManager(const DevManager &) = delete;
+  DevManager &operator=(const DevManager &) = delete;
+
+  void loadModules();
+  void releaseModules();
+  std::shared_ptr<Backend> getBackend();
+
+  int createContext(int deviceId, int priority, NpuContext **npuContext);
+  int destroyContext(NpuContext *npuContext);
+  int registerModel(NpuContext *npuContext, const std::string &modelPath, ModelID *modelId);
+  int unregisterModel(NpuContext *npuContext, ModelID modelId);
+  int createRequest(NpuContext *npuContext, ModelID modelId, RequestID *requestId);
+  int destroyRequest(NpuContext *npuContext, RequestID requestId);
+
+private:
+  std::unique_ptr<Device> _dev;
+  std::string _module_dir;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
diff --git a/runtime/service/npud/core/DynamicLoader.cc b/runtime/service/npud/core/DynamicLoader.cc
new file mode 100644 (file)
index 0000000..6be2ff9
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DynamicLoader.h"
+
+#include "util/Logging.h"
+
+namespace npud
+{
+namespace core
+{
+
+DynamicLoader::DynamicLoader(const char *file, int flags)
+  : _handle(nullptr), _filepath(file), _allocSymbol("allocate"), _deallocSymbol("deallocate")
+{
+  if (!(_handle = dlopen(_filepath.c_str(), flags)))
+  {
+    VERBOSE(DynamicLoader) << "Fail to load " << _filepath << " module: " << dlerror() << std::endl;
+    throw std::runtime_error("Fail to load " + _filepath + " module");
+  }
+
+  NpuAlloc alloc;
+  NpuDealloc dealloc;
+
+  alloc = reinterpret_cast<NpuAlloc>(dlsym(_handle, _allocSymbol.c_str()));
+  dealloc = reinterpret_cast<NpuDealloc>(dlsym(_handle, _deallocSymbol.c_str()));
+  if (!alloc || !dealloc)
+  {
+    VERBOSE(DynamicLoader) << "Fail to load " << _filepath << " symbol: " << dlerror() << std::endl;
+    dlclose(_handle);
+    throw std::runtime_error("Fail to load " + _filepath + " module");
+  }
+
+  _backend = std::shared_ptr<Backend>(alloc(), [dealloc](Backend *b) { dealloc(b); });
+}
+
+DynamicLoader::~DynamicLoader()
+{
+  // NOTE
+  // The _backend shared_ptr must be explicitly deleted before
+  // the dynamic library handle is released.
+  _backend.reset();
+  dlclose(_handle);
+}
+
+std::shared_ptr<Backend> DynamicLoader::getInstance() { return _backend; }
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/DynamicLoader.h b/runtime/service/npud/core/DynamicLoader.h
new file mode 100644 (file)
index 0000000..c2f036a
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
+#define __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
+
+#include "Backend.h"
+
+#include <dlfcn.h>
+#include <string>
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+using DLHandle = void *;
+
+class DynamicLoader
+{
+public:
+  DynamicLoader(const char *file, int flags = RTLD_LAZY);
+  ~DynamicLoader();
+
+  DynamicLoader(const DynamicLoader &) = delete;
+
+  std::shared_ptr<Backend> getInstance();
+
+private:
+  DLHandle _handle;
+  std::string _filepath;
+  std::string _allocSymbol;
+  std::string _deallocSymbol;
+  std::shared_ptr<Backend> _backend;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
index 5b15388..751849e 100644 (file)
@@ -15,9 +15,9 @@
  */
 
 #include "Server.h"
+#include "util/Logging.h"
 
 #include <thread>
-#include <util/Logging.h>
 
 namespace npud
 {
@@ -27,19 +27,39 @@ namespace core
 std::atomic_bool Server::_isRunning(false);
 
 Server::Server() noexcept
-  : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique<Signal>())
+  : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique<Signal>()),
+    _core(std::make_unique<Core>()), _dbus(std::make_unique<DBus>())
 {
 }
 
+bool Server::isServiceReady()
+{
+  if (!_isRunning.load())
+  {
+    VERBOSE(Server) << "Server is not started." << std::endl;
+    return false;
+  }
+
+  if (!_dbus->isReady())
+  {
+    VERBOSE(Server) << "DBus service is not ready." << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
 void Server::run(void)
 {
   VERBOSE(Server) << "Starting Server\n";
 
   if (_isRunning.exchange(true))
   {
-    throw std::runtime_error("Mainloop is already running.");
+    return;
   }
 
+  _core->init();
+
   g_main_loop_run(_mainloop.get());
 }
 
@@ -49,7 +69,7 @@ void Server::stop(void)
 
   if (!_isRunning.load())
   {
-    throw std::runtime_error("Mainloop is not running");
+    return;
   }
 
   while (!g_main_loop_is_running(_mainloop.get()))
@@ -57,6 +77,8 @@ void Server::stop(void)
     std::this_thread::yield();
   }
 
+  _core->deinit();
+
   g_main_loop_quit(_mainloop.get());
   _isRunning = false;
 }
index e2f37f8..09f115c 100644 (file)
@@ -18,6 +18,8 @@
 #define __ONE_SERVICE_NPUD_CORE_SERVER_H__
 
 #include "Signal.h"
+#include "Core.h"
+#include "DBus.h"
 
 #include <glib.h>
 #include <memory>
@@ -31,15 +33,23 @@ namespace core
 class Server
 {
 public:
+  Server(const Server &) = delete;
+  Server &operator=(const Server &) = delete;
+
   void run(void);
   void stop(void);
 
+  bool isRunning() { return _isRunning.load(); }
+  bool isServiceReady();
+
   static Server &instance(void)
   {
     static Server server;
     return server;
   }
 
+  const Core &core(void) { return *_core.get(); }
+
 private:
   Server() noexcept;
 
@@ -47,6 +57,8 @@ private:
 
   std::unique_ptr<GMainLoop, void (*)(GMainLoop *)> _mainloop;
   std::unique_ptr<Signal> _signal;
+  std::unique_ptr<Core> _core;
+  std::unique_ptr<DBus> _dbus;
 };
 
 } // namespace core
index 085535a..4b2cc0b 100644 (file)
@@ -15,9 +15,8 @@
  */
 
 #include "Signal.h"
-
 #include "Server.h"
-#include <util/Logging.h>
+#include "util/Logging.h"
 
 #include <csignal>
 
@@ -32,14 +31,12 @@ void Signal::init(void)
 {
   // NOTE Types of signals
   // SIGTERM: termination request, sent to the program
-  // SIGSEGV: invalid memory access (segmentation fault)
   // SIGINT:  external interrupt, usually initiated by the user
   // SIGILL:   invalid program image, such as invalid instruction
   // SIGABRT:  abnormal termination condition, as is e.g. initiated by std::abort()
   // SIGFPE:   erroneous arithmetic operation such as divide by zero
   // from https://en.cppreference.com/w/cpp/utility/program/SIG_types
   std::signal(SIGTERM, handleSignal);
-  std::signal(SIGSEGV, handleSignal);
   std::signal(SIGINT, handleSignal);
   std::signal(SIGILL, handleSignal);
   std::signal(SIGABRT, handleSignal);
diff --git a/runtime/service/npud/core/ir/DataType.h b/runtime/service/npud/core/ir/DataType.h
new file mode 100644 (file)
index 0000000..923088e
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
+#define __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
+
+#include <cstdlib>
+
+namespace npud
+{
+namespace core
+{
+namespace ir
+{
+
+enum class DataType
+{
+  INT8 = 0,
+  UINT8,
+  QUANT_UINT8_ASYMM,
+  INT16,
+  UINT16,
+  QUANT_INT16_SYMM,
+  INT32,
+  UINT32,
+  FLOAT32,
+  INT64,
+  UINT64,
+  FLOAT64,
+};
+
+} // namespace ir
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
diff --git a/runtime/service/npud/core/ir/Layout.h b/runtime/service/npud/core/ir/Layout.h
new file mode 100644 (file)
index 0000000..a28e69a
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
+#define __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
+
+#include <functional>
+#include <stdexcept>
+#include <string>
+
+namespace npud
+{
+namespace core
+{
+namespace ir
+{
+
+enum class Layout
+{
+  UNKNOWN = 0,
+  NHWC,
+  NCHW
+};
+
+} // namespace ir
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
index bd885b2..642201c 100644 (file)
@@ -15,8 +15,7 @@
  */
 
 #include "Server.h"
-
-#include <util/Logging.h>
+#include "util/Logging.h"
 
 using namespace npud;
 
diff --git a/runtime/service/npud/core/util/Config.lst b/runtime/service/npud/core/util/Config.lst
new file mode 100644 (file)
index 0000000..2fb9993
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONFIG
+#error  Define CONFIG before including this file
+#endif
+
+//     Name                    | Type         | Default
+CONFIG(NPUD_LOG_ENABLE         , bool         , "0")
+CONFIG(DEVICE_MODULE_PATH      , std::string  , "/usr/lib/npud/devices")
diff --git a/runtime/service/npud/org.tizen.npud.conf b/runtime/service/npud/org.tizen.npud.conf
new file mode 100644 (file)
index 0000000..025ca67
--- /dev/null
@@ -0,0 +1,9 @@
+<!DOCTYPE node PUBLIC "-//freedesktop//DTD D-BUS Object Introspection 1.0//EN"
+  "http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd">
+<busconfig>
+  <policy context="default">
+    <allow own="org.tizen.npud"/>
+    <allow send_destination="org.tizen.npud"/>
+    <allow receive_sender="org.tizen.npud"/>
+  </policy>
+</busconfig>
diff --git a/runtime/service/npud/org.tizen.npud.xml b/runtime/service/npud/org.tizen.npud.xml
new file mode 100644 (file)
index 0000000..09b310a
--- /dev/null
@@ -0,0 +1,162 @@
+<!DOCTYPE node PUBLIC "-//freedesktop//DTD D-BUS Object Introspection 1.0//EN"
+  "http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd">
+<node name="/">
+  <!-- org.tizen.npud.core:
+       @short_description: Npud interface
+
+       The interface used to run AI models on npu devices.
+  -->
+  <interface name="org.tizen.npud.core">
+    <!--
+      device_get_available_list:
+      @error: The error status of the function.
+
+      Get all available npu device lists.
+    -->
+    <method name="device_get_available_list">
+           <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      context_create:
+      @device_id: The device numger to use.
+      @priority: The device priority.
+      @ctx: The Context handle.
+      @error: The error status of the function.
+
+      Create context.
+    -->
+    <method name="context_create">
+      <arg name="device_id" type="i" direction="in" />
+      <arg name="priority" type="i" direction="in" />
+      <arg name="ctx" type="t" direction="out" />
+      <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      context_destroy:
+      @ctx: The Context handle to destroy.
+      @error: The error status of the function.
+
+      Destroy context.
+    -->
+    <method name="context_destroy">
+           <arg name="ctx" type="t" direction="in"  />
+           <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      buffers_create:
+      @ctx: The Context handle.
+      @buffers: The array of buffer structure. (i:type, t:address, u:size)
+      @out_buffers: The array of buffer sturcture containing created buffer address.
+      @error: The error status of the function.
+
+      Create buffer array.
+    -->
+    <method name="buffers_create">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="buffers" type="a(itu)" direction="in" />
+      <arg name="out_buffers" type="a(itu)" direction="out" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      buffers_destroy:
+      @ctx: The Context handle.
+      @buffers: The array of buffer structure. (i:type, t:address, u:size)
+      @error: The error status of the function.
+
+      Destroy buffer array.
+    -->
+    <method name="buffers_destroy">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="buffers" type="a(itu)" direction="in" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      network_create:
+      @ctx: The context handle.
+      @model_path: The model path to run.
+      @nw_handle: The Network handle.
+      @error: The error status of the function.
+
+      Create network.
+
+      TODO Support file descriptor input
+    -->
+    <method name="network_create">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="model_path" type="s" direction="in"  />
+      <arg name="nw_handle" type="u" direction="out"  />
+      <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      network_destroy:
+      @ctx: The context handle.
+      @nw_handle: The Network handle.
+      @error: The error status of the function.
+
+      Destroy network.
+    -->
+    <method name="network_destroy">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="nw_handle" type="u" direction="in" />
+      <arg name="error" type="i" direction="out"  />
+    </method>
+    <!--
+      request_create:
+      @ctx: The context handle.
+      @nw_handle: The Network handle.
+      @rq_handle: The Request handle.
+      @error: The error status of the function.
+
+      Create request.
+    -->
+    <method name="request_create">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="nw_handle" type="u" direction="in" />
+      <arg name="rq_handle" type="u" direction="out" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      request_destroy:
+      @ctx: The context handle.
+      @rq_handle: The Request handle.
+      @error: The error status of the function.
+
+      Destroy request.
+    -->
+    <method name="request_destroy">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="rq_handle" type="u" direction="in" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      request_set_data:
+      @ctx: The context handle.
+      @rq_handle: The Request handle.
+      @input_buffers: The input buffer datas.
+      @output_buffers: The output buffer datas.
+      @error: The error status of the function.
+
+      Set request data.
+    -->
+    <method name="request_set_data">
+      <arg name="ctx" type="t" direction="in" />
+      <arg name="rq_handle" type="u" direction="in" />
+      <arg name="input_buffers" type="a(itu)" direction="in" />
+      <arg name="output_buffers" type="a(itu)" direction="in" />
+      <arg name="error" type="i" direction="out" />
+    </method>
+    <!--
+      execute_run:
+      @ctx: The context handle.
+      @rq_handle: The Request handle.
+      @error: The error status of the function.
+
+      Execute run.
+    -->
+    <method name="execute_run">
+           <arg name="ctx" type="t" direction="in"  />
+           <arg name="rq_handle" type="u" direction="in"  />
+           <arg name="error" type="i" direction="out"  />
+    </method>
+  </interface>
+</node>
diff --git a/runtime/service/npud/tests/CMakeLists.txt b/runtime/service/npud/tests/CMakeLists.txt
new file mode 100644 (file)
index 0000000..b97823b
--- /dev/null
@@ -0,0 +1,17 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+file(GLOB_RECURSE TESTS "*.cc")
+
+add_executable(npud_gtest ${TESTS})
+
+set_target_properties(npud_gtest PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud_gtest PUBLIC ${NPUD_INCLUDE_DIRS})
+target_include_directories(npud_gtest PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_link_libraries(npud_gtest PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud_gtest PRIVATE ${LIB_PTHREAD})
+target_link_libraries(npud_gtest PRIVATE npud_core)
+target_link_libraries(npud_gtest PRIVATE gtest_main dl)
+
+install(TARGETS npud_gtest DESTINATION npud-gtest)
diff --git a/runtime/service/npud/tests/core/DBus.test.cc b/runtime/service/npud/tests/core/DBus.test.cc
new file mode 100644 (file)
index 0000000..1c52a23
--- /dev/null
@@ -0,0 +1,608 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <core/Server.h>
+#include <gtest/gtest.h>
+#include <thread>
+#include <gio/gio.h>
+#include <dbus-core.h>
+#include <iostream>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// DBusTest setup/teardown
+//
+class DBusTest : public ::testing::Test
+{
+protected:
+  static void runTask()
+  {
+    auto &server = Server::instance();
+    server.run();
+  }
+
+  void SetUp() override
+  {
+    std::thread child = std::thread(runTask);
+    child.detach();
+    auto &server = Server::instance();
+    while (server.isServiceReady() != true)
+    {
+    }
+  }
+
+  void TearDown() override
+  {
+    auto &server = Server::instance();
+    if (server.isRunning())
+    {
+      server.stop();
+    }
+  }
+
+  NpudCore *getProxy()
+  {
+    GError *error = nullptr;
+    NpudCore *proxy = nullptr;
+    proxy = npud_core_proxy_new_for_bus_sync(G_BUS_TYPE_SYSTEM, G_DBUS_PROXY_FLAGS_NONE,
+                                             "org.tizen.npud", "/org/tizen/npud", NULL, &error);
+    if (error)
+    {
+      g_error_free(error);
+    }
+    return proxy;
+  }
+
+  const std::string &getModel()
+  {
+    if (model.empty())
+    {
+      auto model_path = std::getenv("GTEST_MODEL_PATH");
+      model = model_path + std::string("/mv1.q8/mv1.q8.tvn");
+    }
+    if (access(model.c_str(), F_OK) != 0)
+    {
+      model.clear();
+    }
+    return model;
+  }
+
+private:
+  std::string model;
+};
+
+//
+// DBusTest
+//
+TEST_F(DBusTest, get_proxy)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+}
+
+TEST_F(DBusTest, device_get_available_list)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  npud_core_call_device_get_available_list_sync(proxy, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, context_create)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, context_destroy)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  npud_core_call_context_destroy_sync(proxy, out_ctx, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_context_destroy_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  guint64 out_ctx = 0;
+  npud_core_call_context_destroy_sync(proxy, out_ctx, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, network_create)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_create_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  guint64 out_ctx = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_create_invalid_model)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  // Invalid model
+  const gchar *model_path = "invalid.tvn";
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, network_destroy)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_destroy_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  // Invalid ctx
+  out_ctx = -1;
+  npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_destroy_invalid_nw_handle)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_nw_handle = -1;
+  npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, request_create)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+                                     NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_create_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, 0, out_nw_handle, &out_rq_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_create_invalid_nw)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, out_ctx, 0, &out_rq_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, request_destroy)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+                                     NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  npud_core_call_request_destroy_sync(proxy, out_ctx, out_rq_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_destroy_invalid_ctx)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  const gchar *model_path = this->getModel().c_str();
+  guint out_nw_handle = 0;
+  npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  guint out_rq_handle = 0;
+  npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+                                     NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  npud_core_call_request_destroy_sync(proxy, 0, out_rq_handle, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_destroy_invalid_rq)
+{
+  NpudCore *proxy = this->getProxy();
+  ASSERT_NE(proxy, nullptr);
+
+  GError *error = NULL;
+  gint out_error = -1;
+  gint arg_device_id = 0;
+  gint arg_priority = 0;
+  guint64 out_ctx = 0;
+  npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+                                     &error);
+  if (error)
+  {
+    g_error_free(error);
+    error = NULL;
+  }
+  ASSERT_EQ(out_error, 0);
+
+  out_error = -1;
+  npud_core_call_request_destroy_sync(proxy, out_ctx, 0, &out_error, NULL, &error);
+  if (error)
+  {
+    g_error_free(error);
+  }
+  ASSERT_NE(out_error, 0);
+}
+
+} // unnamed namespace
diff --git a/runtime/service/npud/tests/core/Server.test.cc b/runtime/service/npud/tests/core/Server.test.cc
new file mode 100644 (file)
index 0000000..f24a4e9
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/Server.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// ServerTest setup/teardown
+//
+class ServerTest : public ::testing::Test
+{
+protected:
+  static void runTask()
+  {
+    auto &server = Server::instance();
+    server.run();
+  }
+
+  void SetUp() override
+  {
+    std::thread child = std::thread(runTask);
+    child.detach();
+    auto &server = Server::instance();
+    while (server.isRunning() != true)
+    {
+    }
+  }
+
+  void TearDown() override
+  {
+    auto &server = Server::instance();
+    if (server.isRunning())
+    {
+      server.stop();
+    }
+  }
+};
+
+//
+// ServerTest
+//
+TEST_F(ServerTest, run)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+}
+
+TEST_F(ServerTest, stop)
+{
+  auto &server = Server::instance();
+  server.stop();
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+} // unnamed namespace
diff --git a/runtime/service/npud/tests/core/Signal.test.cc b/runtime/service/npud/tests/core/Signal.test.cc
new file mode 100644 (file)
index 0000000..459e7d4
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/Server.h"
+#include "core/Signal.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+#include <csignal>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// SignalTest setup/teardown
+//
+class SignalTest : public ::testing::Test
+{
+protected:
+  static void runTask()
+  {
+    auto &server = Server::instance();
+    server.run();
+  }
+
+  void SetUp() override
+  {
+    std::thread child = std::thread(runTask);
+    child.detach();
+    auto &server = Server::instance();
+    while (server.isRunning() != true)
+    {
+    }
+  }
+
+  void TearDown() override
+  {
+    auto &server = Server::instance();
+    if (server.isRunning())
+    {
+      server.stop();
+    }
+  }
+};
+
+//
+// SignalTest
+//
+TEST_F(SignalTest, raise_SIGTERM)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGTERM);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGINT)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGINT);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGILL)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGILL);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGABRT)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGABRT);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGFPE)
+{
+  auto &server = Server::instance();
+  ASSERT_EQ(server.isRunning(), true);
+  std::raise(SIGFPE);
+  ASSERT_EQ(server.isRunning(), false);
+}
+
+} // unnamed namespace
diff --git a/runtime/service/npud/util/Config.lst b/runtime/service/npud/util/Config.lst
deleted file mode 100644 (file)
index d45b373..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CONFIG
-#error  Define CONFIG before including this file
-#endif
-
-//     Name                    | Type         | Default
-CONFIG(NPUD_LOG_ENABLE         , bool         , "0")
index c1fa308..80c3cce 100644 (file)
@@ -7,13 +7,10 @@ if (NOT BUILD_ONERT)
   return()
 endif(NOT BUILD_ONERT)
 
-if (ANDROID_BOOST_ROOT)
-  set(BOOST_ROOT ${ANDROID_BOOST_ROOT})
-endif (ANDROID_BOOST_ROOT)
-
-nnfw_find_package(Boost REQUIRED)
 nnfw_find_package(GTest)
 
+# NNAPI gtest requires c++17
+set(CMAKE_CXX_STANDARD 17)
 
 set(GENERATED_CPPS "${CMAKE_CURRENT_SOURCE_DIR}/src/generated/all_generated_V1_2_cts_tests.cpp"
                    "${CMAKE_CURRENT_SOURCE_DIR}/src/generated/all_generated_V1_1_cts_tests.cpp"
@@ -51,7 +48,6 @@ endif(GENERATE_RUNTIME_NNAPI_TESTS)
 set(RUNTIME_NNAPI_TEST_SRC_INC ${CMAKE_CURRENT_SOURCE_DIR}/include
                                ${CMAKE_CURRENT_SOURCE_DIR}/src)
 target_include_directories(${RUNTIME_NNAPI_TEST} PRIVATE ${RUNTIME_NNAPI_TEST_SRC_INC})
-target_include_directories(${RUNTIME_NNAPI_TEST} PRIVATE ${Boost_INCLUDE_DIRS})
 
 # Define NNTEST_ONLY_PUBLIC_API to avoid android dependency
 target_compile_definitions(${RUNTIME_NNAPI_TEST} PRIVATE NNTEST_ONLY_PUBLIC_API)
@@ -60,17 +56,15 @@ target_link_libraries(${RUNTIME_NNAPI_TEST} nnfw_lib_nnapi)
 target_link_libraries(${RUNTIME_NNAPI_TEST} gtest gmock)
 target_link_libraries(${RUNTIME_NNAPI_TEST} ${LIB_PTHREAD} dl)
 
-install(TARGETS ${RUNTIME_NNAPI_TEST} DESTINATION unittest)
+install(TARGETS ${RUNTIME_NNAPI_TEST} DESTINATION nnapi-gtest)
 
 # Default test backend: cpu
 set(SKIPLIST_FILE_NAME ${RUNTIME_NNAPI_TEST}.skip.${TARGET_PLATFORM}.cpu)
 install(FILES ${SKIPLIST_FILE_NAME}
-        DESTINATION unittest
+        DESTINATION nnapi-gtest
         RENAME ${RUNTIME_NNAPI_TEST}.skip
         OPTIONAL)
 
 # Install skiplist file for target as backup
 FILE(GLOB SKIPLIST_TARGET ${CMAKE_CURRENT_SOURCE_DIR}/${RUNTIME_NNAPI_TEST}.skip.${TARGET_PLATFORM}*)
-FILE(GLOB SKIPLIST_NOARCH ${CMAKE_CURRENT_SOURCE_DIR}/${RUNTIME_NNAPI_TEST}.skip.noarch.*)
-list(APPEND SKIPLIST_ALL_RUNTIME ${SKIPLIST_TARGET} ${SKIPLIST_NOARCH})
-install(FILES ${SKIPLIST_ALL_RUNTIME} DESTINATION unittest OPTIONAL)
+install(FILES ${SKIPLIST_TARGET} DESTINATION nnapi-gtest OPTIONAL)
index af19008..724d4cd 100644 (file)
@@ -27,9 +27,7 @@
 #include "NeuralNetworksExShim.h"
 
 #include <math.h>
-// Fix for onert: use boost::optional instead of std::optional
-// TODO in onert: introduce and use internal optional library
-#include <boost/optional.hpp>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -104,10 +102,7 @@ struct SymmPerChannelQuantParams {
 struct OperandType {
     ANeuralNetworksOperandType operandType;
     std::vector<uint32_t> dimensions;
-    // Fix for onert:
-    //  Use boost::optional instead of std::optional
-    //  Default value: std::nullopt -> boost::none
-    boost::optional<SymmPerChannelQuantParams> channelQuant;
+    std::optional<SymmPerChannelQuantParams> channelQuant;
 
     OperandType(const OperandType& other)
         : operandType(other.operandType),
@@ -127,7 +122,7 @@ struct OperandType {
     }
 
     OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0)
-        : dimensions(std::move(d)), channelQuant(boost::none) {
+        : dimensions(std::move(d)), channelQuant(std::nullopt) {
         operandType = {
                 .type = static_cast<int32_t>(type),
                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
diff --git a/tests/nnapi/nnapi_gtest.skip.noarch.interp b/tests/nnapi/nnapi_gtest.skip.noarch.interp
deleted file mode 100644 (file)
index e0ed8d7..0000000
+++ /dev/null
@@ -1,743 +0,0 @@
-GeneratedTests.abs_
-GeneratedTests.abs_1D_float_nnfw
-GeneratedTests.abs_2D_float_nnfw
-GeneratedTests.abs_3D_float_nnfw
-GeneratedTests.abs_4D_float_nnfw
-GeneratedTests.abs_dynamic_nnfw
-GeneratedTests.add_broadcast_quant8
-GeneratedTests.add_dynamic_nnfw
-GeneratedTests.add_quant8
-GeneratedTests.argmax_1
-GeneratedTests.argmax_1_quant8
-GeneratedTests.argmax_2
-GeneratedTests.argmax_2_quant8
-GeneratedTests.argmax_3
-GeneratedTests.argmax_3_axis_as_input_nnfw
-GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
-GeneratedTests.argmax_3_quant8
-GeneratedTests.argmax_dynamic_nnfw
-GeneratedTests.argmax_float_1_nnfw
-GeneratedTests.argmax_float_2_nnfw
-GeneratedTests.argmax_int32_nnfw
-GeneratedTests.argmax_neg_axis_float_nnfw
-GeneratedTests.argmax_neg_axis_int32_nnfw
-GeneratedTests.argmax_quant8_neg_axis_nnfw
-GeneratedTests.argmax_quant8_nnfw
-GeneratedTests.argmin_1
-GeneratedTests.argmin_1_quant8
-GeneratedTests.argmin_2
-GeneratedTests.argmin_2_quant8
-GeneratedTests.argmin_3
-GeneratedTests.argmin_3_quant8
-GeneratedTests.avg_pool_quant8_1
-GeneratedTests.avg_pool_quant8_2
-GeneratedTests.avg_pool_quant8_3
-GeneratedTests.avg_pool_quant8_4
-GeneratedTests.avg_pool_quant8_5
-GeneratedTests.batch_matmul_ex_dynamic_nnfw
-GeneratedTests.batch_matmul_ex_float_adj_x
-GeneratedTests.batch_matmul_ex_float_adj_y
-GeneratedTests.batch_matmul_ex_float_batch2
-GeneratedTests.batch_matmul_ex_float_broadcast
-GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy
-GeneratedTests.batch_matmul_ex_float_broadcast_adj_x
-GeneratedTests.batch_matmul_ex_float_simple
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
-GeneratedTests.broadcast_to_ex_1D_nnfw
-GeneratedTests.broadcast_to_ex_2D_nnfw
-GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw
-GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw
-GeneratedTests.cast_dynamic_float32_to_int32_nnfw
-GeneratedTests.cast_float16_to_float16
-GeneratedTests.cast_float16_to_float32
-GeneratedTests.cast_float16_to_float32_relaxed
-GeneratedTests.cast_float16_to_int32
-GeneratedTests.cast_float16_to_quant8
-GeneratedTests.cast_float16_to_quant8_overflow
-GeneratedTests.cast_float32_to_float16
-GeneratedTests.cast_float32_to_float16_relaxed
-GeneratedTests.cast_float32_to_float32
-GeneratedTests.cast_float32_to_float32_relaxed
-GeneratedTests.cast_float32_to_int32
-GeneratedTests.cast_float32_to_int32_nnfw
-GeneratedTests.cast_float32_to_int32_relaxed
-GeneratedTests.cast_float32_to_quant8
-GeneratedTests.cast_float32_to_quant8_overflow
-GeneratedTests.cast_float32_to_quant8_overflow_relaxed
-GeneratedTests.cast_float32_to_quant8_relaxed
-GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_float32
-GeneratedTests.cast_int32_to_float32_nnfw
-GeneratedTests.cast_int32_to_float32_relaxed
-GeneratedTests.cast_int32_to_int32
-GeneratedTests.cast_int32_to_quant8
-GeneratedTests.cast_int32_to_quant8_overflow
-GeneratedTests.cast_quant8_to_float16
-GeneratedTests.cast_quant8_to_float32
-GeneratedTests.cast_quant8_to_float32_relaxed
-GeneratedTests.cast_quant8_to_int32
-GeneratedTests.cast_quant8_to_quant8
-GeneratedTests.concat_dynamic_nnfw
-GeneratedTests.concat_quant8_1
-GeneratedTests.concat_quant8_2
-GeneratedTests.concat_quant8_3
-GeneratedTests.conv_dynamic_nnfw
-GeneratedTests.conv_quant8
-GeneratedTests.conv_quant8_2
-GeneratedTests.conv_quant8_channels
-GeneratedTests.conv_quant8_channels_weights_as_inputs
-GeneratedTests.conv_quant8_large
-GeneratedTests.conv_quant8_large_weights_as_inputs
-GeneratedTests.conv_quant8_overflow
-GeneratedTests.conv_quant8_overflow_weights_as_inputs
-GeneratedTests.conv_quant8_weights_as_inputs
-GeneratedTests.conv2d_dilation_nnfw
-GeneratedTests.conv2d_dilation_nnfw_quant8
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
-GeneratedTests.conv2d_dilation_nnfw_2
-GeneratedTests.conv2d_dilation_nnfw_quant8_2
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
-GeneratedTests.cos_ex_1D_float_nnfw
-GeneratedTests.cos_ex_4D_float_nnfw
-GeneratedTests.cos_ex_dynamic_nnfw
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
-GeneratedTests.depthwise_conv2d_quant8
-GeneratedTests.depthwise_conv2d_quant8_2
-GeneratedTests.depthwise_conv2d_quant8_large
-GeneratedTests.depthwise_conv2d_quant8_large_weights_as_inputs
-GeneratedTests.depthwise_conv2d_quant8_weights_as_inputs
-GeneratedTests.dequantize
-GeneratedTests.dequantize_v1_2_1d_quant8_asymm
-GeneratedTests.dequantize_v1_2_2d_quant8_asymm
-GeneratedTests.dequantize_v1_2_3d_quant8_symm
-GeneratedTests.dequantize_v1_2_4d_quant8_symm
-GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
-GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
-GeneratedTests.dequantize_v1_2
-GeneratedTests.dequantize_v1_2_zero_sized
-GeneratedTests.dequantize_v1_2_zero_sized_float16
-GeneratedTests.div_
-GeneratedTests.div_broadcast_float
-GeneratedTests.div_broadcast_float_4D_2D_nnfw
-GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
-GeneratedTests.embedding_lookup
-GeneratedTests.embedding_lookup_2d_nnfw
-GeneratedTests.embedding_lookup_4d_nnfw
-GeneratedTests.equal_1D_float_nnfw
-GeneratedTests.equal_4D_float_nnfw
-GeneratedTests.equal_boolean
-GeneratedTests.equal_broadcast
-GeneratedTests.equal_broadcast_4D_2D_float_nnfw
-GeneratedTests.equal_broadcast_float_nnfw
-GeneratedTests.equal_broadcast_quant8_nnfw
-GeneratedTests.equal_dynamic_float_nnfw
-GeneratedTests.equal_quant8_nnfw
-GeneratedTests.equal_quantized_different_scale
-GeneratedTests.equal_quantized_different_zero_point
-GeneratedTests.equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.equal_simple
-GeneratedTests.exp_
-GeneratedTests.exp_1D_float_nnfw
-GeneratedTests.exp_2D_float_nnfw
-GeneratedTests.exp_3D_float_nnfw
-GeneratedTests.exp_4D_float_nnfw
-GeneratedTests.exp_dynamic_nnfw
-GeneratedTests.expand_dims
-GeneratedTests.expand_dims_2
-GeneratedTests.expand_dims_3
-GeneratedTests.expand_dims_4
-GeneratedTests.expand_dims_dynamic_nnfw_1
-GeneratedTests.expand_dims_dynamic_nnfw_2
-GeneratedTests.expand_dims_int32
-GeneratedTests.expand_dims_int32_2
-GeneratedTests.expand_dims_int32_3
-GeneratedTests.expand_dims_int32_4
-GeneratedTests.expand_dims_quant8
-GeneratedTests.expand_dims_quant8_2
-GeneratedTests.expand_dims_quant8_3
-GeneratedTests.expand_dims_quant8_4
-GeneratedTests.fill_ex_1D_float
-GeneratedTests.fill_ex_4D_float
-GeneratedTests.fill_ex_dynamic_nnfw
-GeneratedTests.floor_
-GeneratedTests.fully_connected_dynamic_nnfw
-GeneratedTests.fully_connected_hybrid_1_nnfw
-GeneratedTests.fully_connected_hybrid_2_nnfw
-GeneratedTests.fully_connected_quant8
-GeneratedTests.fully_connected_quant8_2
-GeneratedTests.fully_connected_quant8_large
-GeneratedTests.fully_connected_quant8_large_weights_as_inputs
-GeneratedTests.fully_connected_quant8_weights_as_inputs
-GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
-GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
-GeneratedTests.gather_dynamic_nnfw
-GeneratedTests.gather_float16
-GeneratedTests.gather_float16_2
-GeneratedTests.gather_float16_3
-GeneratedTests.gather_float16_4
-GeneratedTests.gather_float16_5
-GeneratedTests.gather_float16_6
-GeneratedTests.gather_float16_7
-GeneratedTests.gather_float16_8
-GeneratedTests.greater_broadcast_quant8_nnfw
-GeneratedTests.greater_dynamic_float_nnfw
-GeneratedTests.greater_equal_boolean
-GeneratedTests.greater_equal_broadcast
-GeneratedTests.greater_equal_broadcast_quant8_nnfw
-GeneratedTests.greater_equal_dynamic_float_nnfw
-GeneratedTests.greater_equal_nnfw
-GeneratedTests.greater_equal_quant8_nnfw
-GeneratedTests.greater_equal_quantized_different_scale
-GeneratedTests.greater_equal_quantized_different_zero_point
-GeneratedTests.greater_equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.greater_equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.greater_equal_simple
-GeneratedTests.greater_quant8_nnfw
-GeneratedTests.hashtable_lookup_float
-GeneratedTests.hashtable_lookup_float_4D_nnfw
-GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
-GeneratedTests.l2_normalization_quant8_nnfw
-GeneratedTests.l2_pool_float
-GeneratedTests.l2_pool_float_2
-GeneratedTests.l2_pool_float_large
-GeneratedTests.less_boolean
-GeneratedTests.less_broadcast
-GeneratedTests.less_broadcast_quant8_nnfw
-GeneratedTests.less_dynamic_float_nnfw
-GeneratedTests.less_equal_broadcast_quant8_nnfw
-GeneratedTests.less_equal_dynamic_float_nnfw
-GeneratedTests.less_equal_quant8_nnfw
-GeneratedTests.less_nnfw
-GeneratedTests.less_quant8_nnfw
-GeneratedTests.less_quantized_different_scale
-GeneratedTests.less_quantized_different_zero_point
-GeneratedTests.less_quantized_overflow_first_input_if_requantized
-GeneratedTests.less_quantized_overflow_second_input_if_requantized
-GeneratedTests.less_simple
-GeneratedTests.local_response_norm_float_1
-GeneratedTests.local_response_norm_float_2
-GeneratedTests.local_response_norm_float_3
-GeneratedTests.local_response_norm_float_4
-GeneratedTests.log_4D_float_nnfw
-GeneratedTests.log_dynamic_nnfw
-GeneratedTests.log_softmax_nnfw
-GeneratedTests.log_softmax_nnfw_2
-GeneratedTests.log_softmax_nnfw_3
-GeneratedTests.log_softmax_nnfw_4
-GeneratedTests.log_softmax_nnfw_5
-GeneratedTests.log_softmax_nnfw_quant8
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
-GeneratedTests.logical_not
-GeneratedTests.logical_not_1D_nnfw
-GeneratedTests.logical_not_4D_nnfw
-GeneratedTests.logical_not_dynamic_nnfw
-GeneratedTests.logical_or_1D_nnfw
-GeneratedTests.logical_or_2D_nnfw
-GeneratedTests.logical_or_3D_nnfw
-GeneratedTests.logical_or_4D_nnfw
-GeneratedTests.logical_or_broadcast
-GeneratedTests.logical_or_broadcast_4D_2D_nnfw
-GeneratedTests.logical_or_broadcast_nnfw
-GeneratedTests.logical_or_dynamic_nnfw
-GeneratedTests.logical_or_simple
-GeneratedTests.logistic_dynamic_nnfw
-GeneratedTests.logistic_quant8_1
-GeneratedTests.logistic_quant8_2
-GeneratedTests.lsh_projection
-GeneratedTests.lsh_projection_2
-GeneratedTests.lsh_projection_weights_as_inputs
-GeneratedTests.lstm
-GeneratedTests.lstm2
-GeneratedTests.lstm2_state
-GeneratedTests.lstm2_state2
-GeneratedTests.lstm3
-GeneratedTests.lstm3_state
-GeneratedTests.lstm3_state2
-GeneratedTests.lstm3_state3
-GeneratedTests.lstm_state
-GeneratedTests.lstm_state2
-GeneratedTests.matrix_band_part_ex_4D_float
-GeneratedTests.matrix_band_part_ex_dynamic_nnfw
-GeneratedTests.max_pool_quant8_1
-GeneratedTests.max_pool_quant8_2
-GeneratedTests.max_pool_quant8_3
-GeneratedTests.max_pool_quant8_4
-GeneratedTests.maximum_broadcast
-GeneratedTests.maximum_broadcast_quant8
-GeneratedTests.maximum_dynamic_nnfw
-GeneratedTests.maximum_overflow
-GeneratedTests.maximum_quant8_nnfw
-GeneratedTests.maximum_simple
-GeneratedTests.maximum_simple_quant8
-GeneratedTests.mean
-GeneratedTests.mean_4D_float_reducing_C_nnfw
-GeneratedTests.mean_4D_float_reducing_HW_nnfw
-GeneratedTests.mean_axis01_1_nnfw
-GeneratedTests.mean_axis01_2_nnfw
-GeneratedTests.mean_float_1
-GeneratedTests.mean_float_2
-GeneratedTests.mean_quant8_1
-GeneratedTests.mean_quant8_2
-GeneratedTests.minimum_broadcast
-GeneratedTests.minimum_broadcast_quant8
-GeneratedTests.minimum_dynamic_nnfw
-GeneratedTests.minimum_overflow
-GeneratedTests.minimum_quant8_nnfw
-GeneratedTests.minimum_simple
-GeneratedTests.minimum_simple_quant8
-GeneratedTests.minimum_int32
-GeneratedTests.mul_broadcast_quant8
-GeneratedTests.mul_dynamic_nnfw
-GeneratedTests.mul_quant8
-GeneratedTests.neg
-GeneratedTests.neg_1D_float_nnfw
-GeneratedTests.neg_2D_float_nnfw
-GeneratedTests.neg_3D_float_nnfw
-GeneratedTests.neg_3D_int_nnfw
-GeneratedTests.neg_4D_float_nnfw
-GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.neg_dynamic_nnfw
-GeneratedTests.not_equal_boolean
-GeneratedTests.not_equal_broadcast
-GeneratedTests.not_equal_broadcast_4D_2D_float_nnfw
-GeneratedTests.not_equal_broadcast_float_nnfw
-GeneratedTests.not_equal_broadcast_quant8_nnfw
-GeneratedTests.not_equal_dynamic_float_nnfw
-GeneratedTests.not_equal_float_nnfw
-GeneratedTests.not_equal_quant8_nnfw
-GeneratedTests.not_equal_quantized_different_scale
-GeneratedTests.not_equal_quantized_different_zero_point
-GeneratedTests.not_equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.not_equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.not_equal_simple
-GeneratedTests.one_hot_ex_dynamic_nnfw
-GeneratedTests.one_hot_ex_float_1_nnfw
-GeneratedTests.one_hot_ex_float_2_nnfw
-GeneratedTests.one_hot_ex_float_off_value_constant_zero_nnfw
-GeneratedTests.pack_ex_2D_float_1
-GeneratedTests.pack_ex_2D_float_2
-GeneratedTests.pack_ex_2D_int_1
-GeneratedTests.pack_ex_2D_int_2
-GeneratedTests.pack_ex_dynamic_nnfw
-GeneratedTests.pad_dynamic_nnfw
-GeneratedTests.pad_quant8_nnfw
-GeneratedTests.pad_v2_1_float
-GeneratedTests.pad_v2_1_quant8
-GeneratedTests.pad_v2_all_dims
-GeneratedTests.pad_v2_all_dims_quant8
-GeneratedTests.pad_v2_low_rank
-GeneratedTests.pad_v2_low_rank_quant8
-GeneratedTests.pow_2D_float_nnfw
-GeneratedTests.pow_broadcast_float_nnfw
-GeneratedTests.pow_broadcast_float_nnfw_2
-GeneratedTests.pow_broadcast_float_nnfw_3
-GeneratedTests.pow_dynamic_nnfw
-GeneratedTests.prelu
-GeneratedTests.prelu_broadcast_float_1_nnfw
-GeneratedTests.prelu_broadcast_quant8_1_nnfw
-GeneratedTests.prelu_float_1_nnfw
-GeneratedTests.prelu_quant8
-GeneratedTests.prelu_quant8_1_nnfw
-GeneratedTests.prelu_quant8_2
-GeneratedTests.prelu_quant8_3
-GeneratedTests.prelu_quant8_4
-GeneratedTests.prelu_weight_as_input
-GeneratedTests.prelu_weight_as_input_quant8
-GeneratedTests.prelu_weight_as_input_quant8_2
-GeneratedTests.prelu_weight_as_input_quant8_3
-GeneratedTests.prelu_weight_as_input_quant8_4
-GeneratedTests.quantize_quant8
-GeneratedTests.quantize_quant8_2
-GeneratedTests.quantize_quant8_3
-GeneratedTests.quantize_quant8_4
-GeneratedTests.quantize_quant8_5
-GeneratedTests.quantize_quant8_6
-GeneratedTests.quantize_quant8_7
-GeneratedTests.quantize_quant8_8
-GeneratedTests.quantize_zero_sized
-GeneratedTests.range_ex_float_1
-GeneratedTests.range_ex_float_1_all_constant_inputs
-GeneratedTests.range_ex_float_1_dynamic_nnfw
-GeneratedTests.range_ex_float_2
-GeneratedTests.range_ex_float_2_dynamic_nnfw
-GeneratedTests.reduce_all
-GeneratedTests.reduce_all_2
-GeneratedTests.reduce_all_2D_nnfw
-GeneratedTests.reduce_all_3
-GeneratedTests.reduce_all_4D_nnfw
-GeneratedTests.reduce_all_dynamic_nnfw
-GeneratedTests.reduce_any
-GeneratedTests.reduce_any_2
-GeneratedTests.reduce_any_2D_nnfw
-GeneratedTests.reduce_any_3
-GeneratedTests.reduce_any_4D_nnfw
-GeneratedTests.reduce_max
-GeneratedTests.reduce_max_2
-GeneratedTests.reduce_max_2D_float_nnfw
-GeneratedTests.reduce_max_2D_int32_nnfw
-GeneratedTests.reduce_max_3
-GeneratedTests.reduce_max_4
-GeneratedTests.reduce_max_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_max_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_max_float_1_nnfw
-GeneratedTests.reduce_max_float_2_nnfw
-GeneratedTests.reduce_max_float_nnfw
-GeneratedTests.reduce_max_quant8
-GeneratedTests.reduce_max_quant8_1_nnfw
-GeneratedTests.reduce_max_quant8_2
-GeneratedTests.reduce_max_quant8_2_nnfw
-GeneratedTests.reduce_max_quant8_3
-GeneratedTests.reduce_max_quant8_4
-GeneratedTests.reduce_mean_dynamic_1_nnfw
-GeneratedTests.reduce_mean_dynamic_2_nnfw
-GeneratedTests.reduce_min
-GeneratedTests.reduce_min_2
-GeneratedTests.reduce_min_3
-GeneratedTests.reduce_min_4
-GeneratedTests.reduce_min_dynamic_nnfw
-GeneratedTests.reduce_min_float_1_nnfw
-GeneratedTests.reduce_min_float_2_nnfw
-GeneratedTests.reduce_min_float_nnfw
-GeneratedTests.reduce_min_quant8
-GeneratedTests.reduce_min_quant8_2
-GeneratedTests.reduce_min_quant8_3
-GeneratedTests.reduce_min_quant8_4
-GeneratedTests.reduce_prod
-GeneratedTests.reduce_prod_2
-GeneratedTests.reduce_prod_2D_float_nnfw
-GeneratedTests.reduce_prod_3
-GeneratedTests.reduce_prod_4
-GeneratedTests.reduce_prod_4D_float_nnfw
-GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_prod_dynamic_1_nnfw
-GeneratedTests.reduce_prod_dynamic_2_nnfw
-GeneratedTests.reduce_sum
-GeneratedTests.reduce_sum_2
-GeneratedTests.reduce_sum_2D_float_nnfw
-GeneratedTests.reduce_sum_3
-GeneratedTests.reduce_sum_4
-GeneratedTests.reduce_sum_4D_float_nnfw
-GeneratedTests.reduce_sum_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_sum_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_sum_dynamic_1_nnfw
-GeneratedTests.reduce_sum_dynamic_2_nnfw
-GeneratedTests.relu1_quant8_1
-GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_quant8_1
-GeneratedTests.relu6_quant8_2
-GeneratedTests.relu_quant8_1
-GeneratedTests.relu_quant8_2
-GeneratedTests.reshape_dynamic_nnfw
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
-GeneratedTests.resize_bilinear_quant8_nnfw
-GeneratedTests.resize_nearest_neighbor_shape_nhwc
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
-GeneratedTests.resize_nearest_neighbor_shape_nchw
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
-GeneratedTests.resize_nearest_neighbor_scale_nhwc
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
-GeneratedTests.resize_nearest_neighbor_scale_nchw
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
-GeneratedTests.resize_nearest_neighbor_shape_nchw_2
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
-GeneratedTests.resize_nearest_neighbor_scale_nchw_2
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
-GeneratedTests.resize_nearest_neighbor_shape_nchw_3
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
-GeneratedTests.resize_nearest_neighbor_scale_nchw_3
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
-GeneratedTests.resize_nearest_neighbor_shape_nchw_4
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
-GeneratedTests.resize_nearest_neighbor_scale_nchw_4
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
-GeneratedTests.resize_nearest_neighbor_shape_nchw_5
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
-GeneratedTests.resize_nearest_neighbor_scale_nchw_5
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
-GeneratedTests.resize_nearest_neighbor_shape_nchw_6
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
-GeneratedTests.resize_nearest_neighbor_scale_nchw_6
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
-GeneratedTests.resize_nearest_neighbor_shape_nchw_7
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
-GeneratedTests.resize_nearest_neighbor_scale_nchw_7
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
-GeneratedTests.resize_nearest_neighbor_shape_nchw_8
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
-GeneratedTests.resize_nearest_neighbor_scale_nchw_8
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
-GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
-GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
-GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
-GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
-GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
-GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
-GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
-GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
-GeneratedTests.reverse_ex_1d
-GeneratedTests.reverse_ex_3d
-GeneratedTests.reverse_ex_dynamic_1D
-GeneratedTests.reverse_ex_dynamic_3D
-GeneratedTests.rnn
-GeneratedTests.rnn_state
-GeneratedTests.round_ex_1D_float
-GeneratedTests.round_ex_4D_float
-GeneratedTests.round_ex_dynamic_nnfw
-GeneratedTests.rsqrt
-GeneratedTests.rsqrt_1D_float_nnfw
-GeneratedTests.rsqrt_2D_float_nnfw
-GeneratedTests.rsqrt_3D_float_nnfw
-GeneratedTests.rsqrt_4D_float_nnfw
-GeneratedTests.rsqrt_dynamic_nnfw
-GeneratedTests.select_v1_2_five_dim
-GeneratedTests.select_v1_2_five_dim_quant8
-GeneratedTests.select_v1_2_one_dim
-GeneratedTests.select_v1_2_one_dim_quant8
-GeneratedTests.select_v1_2_two_dim
-GeneratedTests.select_v1_2_two_dim_quant8
-GeneratedTests.select_v2_ex_broadcast_1d_single_value
-GeneratedTests.select_v2_ex_broadcast_2d_one
-GeneratedTests.select_v2_ex_broadcast_2d_two
-GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw
-GeneratedTests.select_v2_ex_broadcast_less_4d
-GeneratedTests.select_v2_ex_float
-GeneratedTests.shape_ex_dynamic_nnfw
-GeneratedTests.sin_1D_float_nnfw
-GeneratedTests.sin_4D_float_nnfw
-GeneratedTests.sin_dynamic_nnfw
-GeneratedTests.slice
-GeneratedTests.slice_2
-GeneratedTests.slice_3
-GeneratedTests.slice_4
-GeneratedTests.slice_5
-GeneratedTests.slice_6
-GeneratedTests.slice_7
-GeneratedTests.slice_8
-GeneratedTests.slice_dynamic_nnfw
-GeneratedTests.slice_zero_sized
-GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.softmax_dynamic_nnfw
-GeneratedTests.softmax_quant8_1
-GeneratedTests.softmax_quant8_2
-GeneratedTests.space_to_batch
-GeneratedTests.space_to_batch_dynamic_float_nnfw
-GeneratedTests.space_to_batch_float_1
-GeneratedTests.space_to_batch_float_1_nnfw
-GeneratedTests.space_to_batch_float_2
-GeneratedTests.space_to_batch_float_3
-GeneratedTests.space_to_batch_quant8_1
-GeneratedTests.space_to_batch_quant8_1_nnfw
-GeneratedTests.space_to_batch_quant8_2
-GeneratedTests.space_to_batch_quant8_2_nnfw
-GeneratedTests.space_to_batch_quant8_3
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
-GeneratedTests.split_1D_float_nnfw
-GeneratedTests.split_1D_int32_nnfw
-GeneratedTests.split_4D_float_1_nnfw
-GeneratedTests.split_4D_float_2_nnfw
-GeneratedTests.split_4D_float_3_nnfw
-GeneratedTests.split_4D_int32_1_nnfw
-GeneratedTests.split_4D_int32_2_nnfw
-GeneratedTests.split_4D_int32_3_nnfw
-GeneratedTests.split_4D_int32_4_nnfw
-GeneratedTests.split_4D_int32_5_nnfw
-GeneratedTests.split_4D_quant8_nnfw
-GeneratedTests.split_dynamic_float_nnfw
-GeneratedTests.split_float_1
-GeneratedTests.split_float_2
-GeneratedTests.split_float_3
-GeneratedTests.split_float_4
-GeneratedTests.split_float_5
-GeneratedTests.split_float_5_axis_as_input_nnfw
-GeneratedTests.split_int32_1
-GeneratedTests.split_int32_1_relaxed
-GeneratedTests.split_int32_2
-GeneratedTests.split_int32_2_relaxed
-GeneratedTests.split_int32_3
-GeneratedTests.split_int32_3_relaxed
-GeneratedTests.split_int32_4
-GeneratedTests.split_int32_4_relaxed
-GeneratedTests.split_quant8_1
-GeneratedTests.split_quant8_1_relaxed
-GeneratedTests.split_quant8_2
-GeneratedTests.split_quant8_2_relaxed
-GeneratedTests.split_quant8_3
-GeneratedTests.split_quant8_4
-GeneratedTests.split_v_ex_1D_float_1_nnfw
-GeneratedTests.split_v_ex_1D_float_2_nnfw
-GeneratedTests.split_v_ex_1D_int32_nnfw
-GeneratedTests.split_v_ex_4D_float_1_nnfw
-GeneratedTests.split_v_ex_4D_float_2_nnfw
-GeneratedTests.split_v_ex_4D_float_3_nnfw
-GeneratedTests.split_v_ex_4D_float_4_nnfw
-GeneratedTests.split_v_ex_4D_int32_1_nnfw
-GeneratedTests.split_v_ex_4D_int32_2_nnfw
-GeneratedTests.split_v_ex_4D_int32_3_nnfw
-GeneratedTests.split_v_ex_4D_int32_4_nnfw
-GeneratedTests.sqrt_
-GeneratedTests.sqrt_1D_float_nnfw
-GeneratedTests.sqrt_2D_float_nnfw
-GeneratedTests.sqrt_3D_float_nnfw
-GeneratedTests.sqrt_4D_float_nnfw
-GeneratedTests.squared_difference_ex_1D_float
-GeneratedTests.squared_difference_ex_2D_float
-GeneratedTests.squared_difference_ex_3D_float
-GeneratedTests.squared_difference_ex_4D_float
-GeneratedTests.squared_difference_ex_broadcast_4D_2D_float
-GeneratedTests.squared_difference_ex_broadcast_float
-GeneratedTests.squared_difference_ex_dynamic_nnfw
-GeneratedTests.squeeze
-GeneratedTests.squeeze_2D_float_1_nnfw
-GeneratedTests.squeeze_dynamic_float_nnfw
-GeneratedTests.squeeze_float_1
-GeneratedTests.squeeze_float_1_relaxed
-GeneratedTests.squeeze_quant8_1
-GeneratedTests.squeeze_relaxed
-GeneratedTests.stateless_random_uniform_ex_nnfw
-GeneratedTests.strided_slice
-GeneratedTests.strided_slice_dynamic_nnfw
-GeneratedTests.strided_slice_float_1
-GeneratedTests.strided_slice_float_10
-GeneratedTests.strided_slice_float_11
-GeneratedTests.strided_slice_float_2
-GeneratedTests.strided_slice_float_3
-GeneratedTests.strided_slice_float_4
-GeneratedTests.strided_slice_float_5
-GeneratedTests.strided_slice_float_6
-GeneratedTests.strided_slice_float_7
-GeneratedTests.strided_slice_float_8
-GeneratedTests.strided_slice_float_9
-GeneratedTests.strided_slice_qaunt8_10
-GeneratedTests.strided_slice_qaunt8_11
-GeneratedTests.strided_slice_quant8_1
-GeneratedTests.strided_slice_quant8_2
-GeneratedTests.strided_slice_quant8_3
-GeneratedTests.strided_slice_quant8_4
-GeneratedTests.strided_slice_quant8_5
-GeneratedTests.strided_slice_quant8_6
-GeneratedTests.strided_slice_quant8_7
-GeneratedTests.strided_slice_quant8_8
-GeneratedTests.strided_slice_quant8_9
-GeneratedTests.sub_dynamic_nnfw
-GeneratedTests.sub_v1_2_broadcast_quant8
-GeneratedTests.sub_v1_2_quant8
-GeneratedTests.sub_v1_2_zero_sized
-GeneratedTests.sub_v1_2_zero_sized_quant8
-GeneratedTests.svdf
-GeneratedTests.svdf2
-GeneratedTests.svdf_bias_present
-GeneratedTests.svdf_state
-GeneratedTests.tanh_v1_2
-GeneratedTests.tanh_v1_2_2
-GeneratedTests.tanh_v1_2_zero_sized
-GeneratedTests.tanh_v1_2_zero_sized_quant8
-GeneratedTests.tanh_v1_dynamic_nnfw
-GeneratedTests.tile_1
-GeneratedTests.tile_1_dynamic_float32_nnfw
-GeneratedTests.tile_1_float16
-GeneratedTests.tile_1_quant8
-GeneratedTests.tile_2
-GeneratedTests.tile_2_dynamic_float32_nnfw
-GeneratedTests.tile_2_float16
-GeneratedTests.tile_2_int32
-GeneratedTests.tile_2_quant8
-GeneratedTests.tile_3
-GeneratedTests.tile_3_dynamic_float32_nnfw
-GeneratedTests.tile_3_float16
-GeneratedTests.tile_3_int32
-GeneratedTests.tile_3_quant8
-GeneratedTests.topk_v2
-GeneratedTests.topk_v2_1D_float_nnfw
-GeneratedTests.topk_v2_1D_int32_nnfw
-GeneratedTests.topk_v2_1D_quant8_nnfw
-GeneratedTests.topk_v2_2
-GeneratedTests.topk_v2_2D_float_nnfw
-GeneratedTests.topk_v2_2D_int32_nnfw
-GeneratedTests.topk_v2_2D_quant8_nnfw
-GeneratedTests.topk_v2_3
-GeneratedTests.topk_v2_4
-GeneratedTests.topk_v2_5
-GeneratedTests.topk_v2_6
-GeneratedTests.transpose
-GeneratedTests.transpose_2D_nnfw
-GeneratedTests.transpose_3D_nnfw
-GeneratedTests.transpose_dynamic_nnfw
-GeneratedTests.transpose_float_1
-GeneratedTests.transpose_float_1_perms_as_input_nnfw
-GeneratedTests.transpose_quant8_1
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
-GeneratedTests.transpose_v1_2_zero_sized
-GeneratedTests.transpose_v1_2_zero_sized_quant8
-GeneratedTests.unidirectional_sequence_lstm_1step
-GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
-GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
-GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
-GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
-GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
-GeneratedTests.unpack_ex_3D_float_1
-GeneratedTests.unpack_ex_3D_float_2
-GeneratedTests.unpack_ex_3D_int_1
-GeneratedTests.unpack_ex_3D_int_2
-GeneratedTests.unpack_ex_dynamic_nnfw
-GeneratedTests.zeros_like_ex_2D_float
-GeneratedTests.zeros_like_ex_4D_int32
-GeneratedTests.zeros_like_ex_dynamic_float32
index 022f3fa..10ce8a6 100644 (file)
@@ -27,9 +27,7 @@
 //#include "NeuralNetworksWrapperExtensions.h"
 
 #include <math.h>
-// Fix for onert: use boost::optional instead of std::optional
-// TODO in onert: introduce and use internal optional library
-#include <boost/optional.hpp>
+#include <optional>
 #include <string>
 #include <vector>
 
index 2ea2ce8..93cc980 100644 (file)
@@ -38,11 +38,11 @@ target_link_libraries(${RUNTIME_NNFW_API_TEST} gtest gmock)
 target_link_libraries(${RUNTIME_NNFW_API_TEST} ${LIB_PTHREAD} dl)
 target_link_libraries(${RUNTIME_NNFW_API_TEST} circle_schema)
 
-install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest_standalone)
+install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest)
 
 # Install nnpackage test model (add)
 set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/add)
-set(NNPACKAGE_INSTALL_TARGET unittest_standalone/nnfw_api_gtest_models)
+set(NNPACKAGE_INSTALL_TARGET unittest/nnfw_api_gtest_models)
 
 install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add)
 
@@ -64,7 +64,5 @@ install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}
 
 # Install nnpackage test model (mobilenet)
 set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/)
-set(NNPACKAGE_INSTALL_TARGET unittest_standalone/nnfw_api_gtest_models)
-
 install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/mobilenet_v1_1.0_224)
 
index 4c48236..0b77593 100644 (file)
@@ -206,8 +206,6 @@ TEST_F(ValidationTestAddModelLoaded, debug_set_config)
   NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "USE_SCHEDULER", "1"));
   NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "PROFILING_MODE", "0"));
   NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "PROFILING_MODE", "1"));
-  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "DISABLE_COMPILE", "0"));
-  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "DISABLE_COMPILE", "1"));
   SUCCEED();
 }
 
diff --git a/tests/nnfw_api/src/ValidationTestPipelineSession.test.cc b/tests/nnfw_api/src/ValidationTestPipelineSession.test.cc
deleted file mode 100644 (file)
index 1d92095..0000000
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "fixtures.h"
-#include "common.h"
-#include <fstream>
-#include <stdio.h>
-#include <json/json.h>
-#include <thread>
-
-void build_partition_map()
-{
-  Json::Value root;
-  Json::Value graphs(Json::arrayValue);
-  int num = 31;
-
-  for (int i = 0; i < num; i++)
-  {
-    if (i < 7)
-      graphs.append(Json::Value(0));
-    else
-      graphs.append(Json::Value(1));
-  }
-
-  root["partition_map"] = graphs;
-  root["num_partitions"] = 2;
-
-  Json::StyledWriter sw;
-  std::string jsonString = sw.write(root);
-
-  FILE *pFile = NULL;
-
-  pFile = fopen("./partition_map.json", "wt");
-  fwrite(jsonString.c_str(), jsonString.length(), 1, pFile);
-  fclose(pFile);
-}
-
-TEST_F(ValidationTestPipelineSession, create_pipeline_001)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-  SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, pipeline_session_test_model)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-  SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, prepare_pipeline_001)
-{
-  std::ifstream readFile("./partition_map.json");
-
-  if (readFile.good())
-  {
-    remove("./partition_map.json");
-  }
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  ASSERT_EQ(nnfw_prepare_pipeline(_session, "./partition_map.json"), NNFW_STATUS_ERROR);
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-}
-
-TEST_F(ValidationTestPipelineSession, prepare_pipeline_002)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-
-  SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, input_tensorinfo_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  nnfw_tensorinfo t_input;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &t_input));
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-
-  SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, output_tensorinfo_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  nnfw_tensorinfo t_output;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &t_output));
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-
-  SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, input_size_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  uint32_t input_num = -1;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &input_num));
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  ASSERT_EQ(input_num, 1);
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, output_size_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  uint32_t output_num = -1;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &output_num));
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  ASSERT_EQ(output_num, 1);
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, set_input_tensorinfo_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  nnfw_tensorinfo t_input_original;
-  nnfw_tensorinfo t_input_after;
-  nnfw_tensorinfo t_input = {NNFW_TYPE_TENSOR_FLOAT32, 4, {1, 224, 224, 3}};
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &t_input_original));
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &t_input));
-  NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &t_input_after));
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  ASSERT_TRUE(tensorInfoEqual(t_input_original, t_input_after));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, input_output_tensorindex)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  uint32_t input_index = 100;
-  NNFW_ENSURE_SUCCESS(nnfw_input_tensorindex(_session, "input", &input_index));
-  ASSERT_EQ(input_index, 0);
-
-  uint32_t output_index = 100;
-  NNFW_ENSURE_SUCCESS(
-    nnfw_output_tensorindex(_session, "MobilenetV1/Predictions/Reshape_1", &output_index));
-  ASSERT_EQ(output_index, 0);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_create_pipeline_001)
-{
-  ASSERT_EQ(nnfw_create_session(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestPipelineSession, neg_pipeline_session_model_load)
-{
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  ASSERT_EQ(nnfw_load_model_from_modelfile(
-              nullptr, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()),
-            NNFW_STATUS_UNEXPECTED_NULL);
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-}
-
-TEST_F(ValidationTestPipelineSession, neg_prepare_pipeline_001)
-{
-  ASSERT_EQ(nnfw_prepare_pipeline(nullptr, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestPipelineSession, neg_set_in_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  float input_buf[1 * 224 * 224 * 3];
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input_buf, sizeof(input_buf)),
-            NNFW_STATUS_ERROR);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_set_out_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  float output_buf[1 * 1001];
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, output_buf, sizeof(output_buf)),
-            NNFW_STATUS_ERROR);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_input_tensorinfo_pipeline_001)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  nnfw_tensorinfo t_input;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  ASSERT_EQ(nnfw_input_tensorinfo(nullptr, 0, &t_input), NNFW_STATUS_UNEXPECTED_NULL);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_input_tensorinfo_pipeline_002)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  nnfw_tensorinfo t_input;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  ASSERT_EQ(nnfw_input_tensorinfo(_session, 1, &t_input), NNFW_STATUS_ERROR);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_output_tensorinfo_pipeline_001)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  nnfw_tensorinfo t_output;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  ASSERT_EQ(nnfw_output_tensorinfo(nullptr, 0, &t_output), NNFW_STATUS_UNEXPECTED_NULL);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_output_tensorinfo_pipeline_002)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  nnfw_tensorinfo t_output;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 1, &t_output), NNFW_STATUS_ERROR);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_input_output_size_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  uint32_t input_num = -1;
-  uint32_t output_num = -1;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  ASSERT_EQ(nnfw_input_size(nullptr, &input_num), NNFW_STATUS_UNEXPECTED_NULL);
-  ASSERT_EQ(input_num, -1);
-  ASSERT_EQ(nnfw_output_size(nullptr, &output_num), NNFW_STATUS_UNEXPECTED_NULL);
-  ASSERT_EQ(output_num, -1);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_set_input_tensorinfo_pipeline)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-  nnfw_tensorinfo t_input = {NNFW_TYPE_TENSOR_FLOAT32, 4, {1, 224, 224, 3}};
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  ASSERT_EQ(nnfw_set_input_tensorinfo(nullptr, 0, &t_input), NNFW_STATUS_UNEXPECTED_NULL);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_input_output_tensorindex)
-{
-  std::vector<void *> dummy1;
-  std::vector<uint32_t> dummy2;
-
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  uint32_t input_index = 100;
-  ASSERT_EQ(nnfw_input_tensorindex(_session, "input1", &input_index), NNFW_STATUS_ERROR);
-  ASSERT_EQ(input_index, 100);
-
-  uint32_t output_index = 100;
-  ASSERT_EQ(nnfw_output_tensorindex(_session, "MobilenetV1/Predictions/Reshape_2", &output_index),
-            NNFW_STATUS_ERROR);
-  ASSERT_EQ(output_index, 100);
-
-  NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_run_pipeline)
-{
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  auto producer = [this]() {
-    std::vector<void *> inputs;
-    std::vector<uint32_t> lengths;
-    inputs.clear();
-    lengths.clear();
-    NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, (void *)&inputs, (void *)&lengths));
-  };
-
-  auto consumer = [this]() {
-    std::vector<void *> outputs;
-    ASSERT_EQ(nnfw_pop_pipeline_output(_session, (void *)&outputs), NNFW_STATUS_ERROR);
-  };
-
-  auto producer_thread = std::thread(producer);
-  auto consumer_thread = std::thread(consumer);
-
-  producer_thread.join();
-  consumer_thread.join();
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, run_pipeline)
-{
-  build_partition_map();
-
-  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
-    _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
-  auto producer = [this]() {
-    std::vector<void *> inputs;
-    std::vector<uint32_t> lengths;
-    inputs.clear();
-    lengths.clear();
-    NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, (void *)&inputs, (void *)&lengths));
-  };
-
-  auto producer_thread = std::thread(producer);
-
-  producer_thread.join();
-  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
-  remove("./partition_map.json");
-
-  SUCCEED();
-}
index 7ab9861..e6baf26 100644 (file)
@@ -70,7 +70,7 @@ $ ./tests/scripts/test-driver.sh \
 - input.h5 (input data)
 - expected.h5 (expected outpute data)
 
-`nnpkg_test` uses `nnpackage_run` internally to run `nnpackage`.
+`nnpkg_test` uses `onert_run` internally to run `nnpackage`.
 
 Then, it compares through `difftool` (either `i5diff` or `h5diff`).
 
@@ -95,8 +95,8 @@ Options:
          (dumped file are always deleted on success) (default=0)
 
 Environment variables:
-   nnpackage_run    path to nnpackage_run (default=Product/out/bin/nnpackage_run)
-   difftool         path to i5diff or h5diff (default=h5diff)
+   onert_run    path to onert_run (default=Product/out/bin/onert_run)
+   difftool     path to i5diff or h5diff (default=h5diff)
 
 Examples:
     nnpkg_test.sh Add_000                => run ./Add_000 and check output
index 1779411..923f90c 100644 (file)
@@ -92,7 +92,7 @@ $BRIDGE shell tar -zxf $TEST_ROOT/nnpkg.tar.gz -C $TEST_ROOT/nnpkg
 $BRIDGE shell rm $TEST_ROOT/nnpkg.tar.gz
 
 # 1. Run
-$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/nnpackage_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
+$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/onert_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
 
 # 2. Pull result file
 echo "Pulling data from target to trace.json"
diff --git a/tests/scripts/benchmark_nnapi.sh b/tests/scripts/benchmark_nnapi.sh
deleted file mode 100755 (executable)
index 6799923..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-source $MY_PATH/common.sh
-
-BENCHMARK_DRIVER_BIN=
-BENCHMARK_REPORT_DIR=
-BENCHMARK_MODELS_FILE=
-MODEL_TEST_ROOT_PATH=
-TEST_OP="false"
-BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
-BACKEND_LIST="acl_cl acl_neon cpu" #TODO: accept this list as argument
-EXECUTORS="Linear Parallel" #TODO: accept this list as argument
-
-function Usage()
-{
-    echo "Usage: ./$0 --reportdir=. --driverbin=Product/out/bin/tflite_run"
-}
-
-for i in "$@"
-do
-    case $i in
-        -h|--help|help)
-            Usage
-            exit 1
-            ;;
-        --test_op)
-            TEST_OP="true"
-            ;;
-        --driverbin=*)
-            BENCHMARK_DRIVER_BIN=${i#*=}
-            ;;
-        --reportdir=*)
-            BENCHMARK_REPORT_DIR=${i#*=}
-            BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
-            ;;
-        --modelfilepath=*)
-            TEST_LIST_PATH=${i#*=}
-            MODEL_TEST_ROOT_PATH=$TEST_LIST_PATH/tests
-            ;;
-    esac
-    shift
-done
-
-function get_benchmark_op_list()
-{
-    local TEST_DIRS="$@"
-    local TESTS_TO_RUN=""
-
-    if [[ $# -eq 0 ]]; then
-        TEST_DIRS="."
-    fi
-
-    shift $#
-
-    pushd $MODEL_TEST_ROOT_PATH > /dev/null
-    for DIR in $TEST_DIRS; do
-        if [ -d "$DIR" ]; then
-            TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | grep -v '^MODELS/' | sort)
-            TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
-        fi
-    done
-    popd > /dev/null
-
-    BENCHMARK_MODEL_LIST=$(echo "${TESTS_TO_RUN}")
-}
-
-function profile_for_he_shed()
-{
-
-    local REPORT_MODEL_DIR=$1
-    local RUN_TEST_SH=$2
-    local BENCHMARK_DRIVER_BIN=$3
-    local MODEL=$4
-    local PROFILING_RUN_CNT=$5
-
-    export USE_SCHEDULER=1
-    export PROFILING_MODE=1
-    export EXECUTOR="Dataflow"
-    export ONERT_LOG_ENABLE=1
-
-    rm "exec_time.json" 2>/dev/null
-    for ((j = 1 ; j <= $PROFILING_RUN_CNT ; j++)); do
-        # Save the verbose log of each run
-        LOG_FILE=$REPORT_MODEL_DIR/tflite_profiling_$j.txt
-
-        print_with_dots "Profiling run #$j out of $PROFILING_RUN_CNT"
-
-        $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
-        RET=$?
-        if [[ $RET -ne 0 ]]; then
-            echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
-            exit $RET
-        fi
-        echo "finished"
-        # Save the exec_time.json of each run
-        cp "exec_time.json" $REPORT_MODEL_DIR/"exec_time_$j.json"
-    done
-    unset USE_SCHEDULER PROFILING_MODE EXECUTOR ONERT_LOG_ENABLE
-}
-
-function run_with_he_scheduler()
-{
-    local REPORT_MODEL_DIR=$1
-    local RUN_TEST_SH=$2
-    local BENCHMARK_DRIVER_BIN=$3
-    local MODEL=$4
-    local EXECUTOR=$5
-
-    LOG_FILE=$REPORT_MODEL_DIR/tflite_onert_with_he_scheduler_in_$EXECUTOR.txt
-    export EXECUTOR=$EXECUTOR
-    export GRAPH_DOT_DUMP=1
-    export USE_SCHEDULER=1
-    export ONERT_LOG_ENABLE=1
-
-    print_with_dots "TFLite onert $EXECUTOR with HEScheduler"
-
-    RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
-    echo "$RESULT ms"
-
-    mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_$EXECUTOR.dot"
-    unset EXECUTOR GRAPH_DOT_DUMP USE_SCHEDULER ONERT_LOG_ENABLE
-}
-
-function run_onert_with_all_config()
-{
-    local MODEL=$1
-    local REPORT_MODEL_DIR=$2
-    local PAUSE_TIME_IN_SEC=$3
-    local BENCHMARK_DRIVER_BIN=$4
-    local EXECUTORS=$5
-    local BACKEND_LIST=$6
-
-    export USE_NNAPI=1
-
-    # Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
-    #     collect metrics for one unmeasured backend. On the last run metrics for data transfer
-    PROFILING_RUN_CNT=1
-    BACKENDS_TO_USE=
-    for backend in $BACKEND_LIST; do
-        BACKENDS_TO_USE+=$backend';'
-        ((++PROFILING_RUN_CNT))
-    done
-    export BACKENDS=$BACKENDS_TO_USE
-    if [ "$TEST_OP" == "false" ]; then
-        profile_for_he_shed $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $PROFILING_RUN_CNT
-    fi
-
-    for executor in $EXECUTORS; do
-        export EXECUTOR=$executor
-        if [ "$TEST_OP" == "false" ]; then
-            run_with_he_scheduler $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $executor
-        fi
-        for backend in $BACKEND_LIST; do
-            export OP_BACKEND_ALLOPS=$backend
-            run_benchmark_and_print "tflite_onert_"$executor"_executor_$backend" "TFLite onert $executor Executor $backend"\
-                                    $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
-        done
-    done
-    unset USE_NNAPI EXECUTOR OP_BACKEND_ALLOPS BACKENDS
-}
-
-function run_benchmark_test()
-{
-    local LOG_FILE=
-    local RESULT_FILE=
-    local RESULT=
-    local REPORT_MODEL_DIR=
-
-    export COUNT=5
-    export ONERT_LOG_ENABLE=1
-    echo
-    echo "============================================"
-    echo
-    date +'%Y-%m-%d %H:%M:%S %s'
-    echo
-    local i=0
-    for MODEL in $BENCHMARK_MODEL_LIST; do
-
-        STATUS="enabled"
-        if [ "$TEST_OP" == "true" ]; then
-            source $MODEL_TEST_ROOT_PATH/$MODEL/config.sh
-        fi
-
-        # Skip 'disabled' tests
-        if [ $(tr '[:upper:]' '[:lower:]' <<< "$STATUS") == "disabled" ]; then
-            continue
-        fi
-
-        echo "Benchmark test with `basename $BENCHMARK_DRIVER_BIN` & `echo $MODEL`"
-        echo $MODEL >> $BENCHMARK_MODELS_FILE
-
-        REPORT_MODEL_DIR=$BENCHMARK_REPORT_DIR/$MODEL
-        mkdir -p $REPORT_MODEL_DIR
-
-        # TFLite+CPU
-        unset USE_NNAPI
-        run_benchmark_and_print "tflite_cpu" "TFLite CPU" $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
-
-        # run onert
-        if [ "$TEST_OP" == "true" ]; then
-          # Operation test don't need to test each scheduler
-          run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "Linear" "$BACKEND_LIST"
-        else
-          run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "$EXECUTORS" "$BACKEND_LIST"
-        fi
-
-        if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
-            echo ""
-        fi
-        i=$((i+1))
-    done
-    echo "============================================"
-    unset COUNT
-}
-
-if [ ! -e "$BENCHMARK_REPORT_DIR" ]; then
-    mkdir -p $BENCHMARK_REPORT_DIR
-fi
-
-if [ "$TEST_OP" == "true" ]; then
-    get_benchmark_op_list
-fi
-
-rm -rf $BENCHMARK_MODELS_FILE
-
-echo ""
-# print the result AND append to log file
-run_benchmark_test 2>&1 | tee -a onert_benchmarks.txt
-echo ""
index 52043f4..a180cd9 100755 (executable)
@@ -1,66 +1,58 @@
 #!/bin/bash
 
-usage()
-{
-  echo "$0 <options>"
-  echo "Options"
-  echo "--nnpackage_run : specific nnpackage_run path"
-  echo "--tflite_run : specific tflite_run path"
-  echo "--dir : the dir path of models"
-  echo "--list : the model list"
-  echo "--out  : the file name of out results"
-  echo "--tv   : for tv"
-  exit 1
-}
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $MY_PATH/common.sh
 
-scripts_dir="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-nnfw_dir="${scripts_dir}/../.."
-nnpackage_run="${nnfw_dir}/Product/out/bin/nnpackage_run"
-tflite_run="${nnfw_dir}/Product/out/bin/tflite_run"
+# Caution: DO NOT USE "pipefail"
+#          We should run test all nnpackages
+
+onert_run="$INSTALL_PATH/bin/onert_run"
+tflite_run="$INSTALL_PATH/bin/tflite_run"
 base_name="$(basename $0)"
 base_name="${base_name%.*}"
 outfile="${base_name}_result.txt"
 dir=""
-list="${scripts_dir}/list/${base_name}_model_list.txt"
+list="$INSTALL_PATH/test/list/benchmark_nnpkg_model_list.txt"
 tv_on="false"
 
+function usage()
+{
+  echo "Usage: ${BASH_SOURCE[0]} [OPTIONS]"
+  echo "Options"
+  echo "    --dir=PATH    : the dir path of models"
+  echo "    --list=FILE   : the model list (default: $list)"
+  echo "    --out=FILE    : the file name of out results (default: $outfile)"
+  echo "    --tv          : for tv"
+  echo "    --help        : display this help message and exit"
+  exit 1
+}
+
 for i in "$@"
 do
-case $i in
-  --nnpackage_run=*)
-    nnpackage_run="${i#*=}"
-    ;;
-  --tflite_run=*)
-    tflite_run="${i#*=}"
-    ;;
-  --out=*)
-    outfile="${i#*=}"
-    ;;
-  --dir=*)
-    dir="${i#*=}"
-    ;;
-  --list=*)
-    list="${i#*=}"
-    ;;
-  --tv)
-    tv_on="true"
-    ;;
-  *)
-    ;;
-esac
-shift
+  case $i in
+    --out=*)
+      outfile="${i#*=}"
+      ;;
+    --dir=*)
+      dir="${i#*=}"
+      ;;
+    --list=*)
+      list="${i#*=}"
+      ;;
+    --tv)
+      tv_on="true"
+      ;;
+    --help)
+      usage
+      exit 1
+      ;;
+    *)
+      ;;
+  esac
+  shift
 done
 
-if ! [ -f ${nnpackage_run} ]; then
-  echo "nnpackage_run file does not exists."
-  usage
-fi
-
-if ! [ -f ${tflite_run} ]; then
-  echo "tflite_run file does not exists."
-  usage
-fi
-
 if ! [ -f ${list} ]; then
   echo "model list file does not exists."
   usage
@@ -95,7 +87,7 @@ done
 for i in "${model_lists[@]}"; do
   echo "${i} result" | tee -a ${outfile}
 
-  CMD="${nnpackage_run} -r 10 -m 1 -p 1"
+  CMD="${onert_run} -r 10 -m 1 -p 1"
   if [ "$tv_on" == "true" ]; then
     ${CMD}="${CMD} -g 1"
   fi
@@ -128,7 +120,7 @@ for i in "${model_lists[@]}"; do
 
   echo "" >> ${outfile}
 
-  TFLITE_CMD="LD_LIBRARY_PATH=./Product/out/lib THREAD=3 ${tflite_run} -r 10 -m 1 -p 1" 
+  TFLITE_CMD="THREAD=3 ${tflite_run} -r 10 -m 1 -p 1"
   if [ "$tv_on" == "true" ]; then
     TFLITE_CMD="${TFLITE_CMD} -g 1"
   fi
@@ -143,4 +135,4 @@ for i in "${model_lists[@]}"; do
   sleep 20 # for avoiding cpu overheated
 done # ${model_lists}
 
-${scripts_dir}/merge_result_of_benchmark_nnpkg.py -i . -o . -l ${list}
+python3 $MY_PATH/merge_result_of_benchmark_nnpkg.py -i . -o . -l ${list}
diff --git a/tests/scripts/benchmark_ops.sh b/tests/scripts/benchmark_ops.sh
new file mode 100755 (executable)
index 0000000..b1b3f69
--- /dev/null
@@ -0,0 +1,183 @@
+#!/bin/bash
+#
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $MY_PATH/common.sh
+
+# Caution: DO NOT USE "pipefail"
+#          We should run test all operators
+
+ONERT_DRIVER_BIN=$INSTALL_PATH/bin/onert_run
+TFLITE_DRIVER_BIN=$INSTALL_PATH/bin/tflite_run
+REPORT_DIR=$ROOT_PATH/report
+BENCHMARK_REPORT_DIR=$REPORT_DIR/benchmark_op
+BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
+MODEL_TEST_ROOT_PATH=$INSTALL_PATH/test/models/tflite
+BENCHMARK_MODEL_LIST=
+BACKEND_LIST="acl_cl acl_neon cpu"
+TEST_DIRS="."
+
+function Usage()
+{
+    echo "Usage: ${BASH_SOURCE[0]} [OPTIONS]"
+    echo ""
+    echo "Options:"
+    echo "      --backends=STRING       Backends to test. (default='$BACKEND_LIST')"
+    echo "      --list=FILE             List file to test. Test all if list option is not passed"
+}
+
+for i in "$@"
+do
+    case $i in
+        -h|--help|help)
+            Usage
+            exit 1
+            ;;
+        --list=*)
+            TEST_LIST_PATH=${i#*=}
+            TEST_DIRS=$(grep -v '#' $TEST_LIST_PATH | tr '\n' ' ' )
+            ;;
+        --backends=*)
+            BACKEND_LIST=${i#*=}
+            ;;
+    esac
+    shift
+done
+
+function get_benchmark_op_list()
+{
+    local TESTS_TO_RUN=""
+
+    pushd $MODEL_TEST_ROOT_PATH > /dev/null
+    for DIR in $TEST_DIRS; do
+        if [ -d "$DIR" ]; then
+            TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | grep -v '^MODELS/' | sort)
+            TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
+        fi
+    done
+    popd > /dev/null
+
+    BENCHMARK_MODEL_LIST=$(echo "${TESTS_TO_RUN}")
+}
+
+function run_benchmark_and_print()
+{
+
+    local WRITE_FILE_NAME=$1
+    local MSG=$2
+    local MODEL=$3
+    local REPORT_MODEL_DIR=$4
+    local DRIVER_BIN=$5
+
+    LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
+    RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
+    print_with_dots $MSG
+    RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
+    echo "$RESULT ms"
+    echo "$MSG $RESULT" > $RESULT_FILE
+}
+
+function run_onert_with_all_config()
+{
+    local MODEL=$1
+    local REPORT_MODEL_DIR=$2
+    local BENCHMARK_DRIVER_BIN=$3
+
+    # Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
+    #     collect metrics for one unmeasured backend. On the last run metrics for data transfer
+    PROFILING_RUN_CNT=1
+    BACKENDS_TO_USE=
+    for backend in $BACKEND_LIST; do
+        BACKENDS_TO_USE+=$backend';'
+        ((++PROFILING_RUN_CNT))
+    done
+    export BACKENDS=$BACKENDS_TO_USE
+    export EXECUTOR="Linear"
+    for backend in $BACKEND_LIST; do
+        export OP_BACKEND_ALLOPS=$backend
+        run_benchmark_and_print "onert_$backend" "ONERT-${backend^^}"\
+                                $MODEL $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN
+    done
+    unset EXECUTOR OP_BACKEND_ALLOPS BACKENDS
+}
+
+function run_benchmark_test()
+{
+    local LOG_FILE=
+    local RESULT_FILE=
+    local RESULT=
+    local REPORT_MODEL_DIR=
+
+    export COUNT=5
+    echo
+    echo "============================================"
+    echo
+    date +'%Y-%m-%d %H:%M:%S %s'
+    echo
+    local i=0
+    for MODEL in $BENCHMARK_MODEL_LIST; do
+
+        STATUS="enabled"
+        source $MODEL_TEST_ROOT_PATH/$MODEL/config.sh
+
+        # Skip 'disabled' tests
+        if [ $(tr '[:upper:]' '[:lower:]' <<< "$STATUS") == "disabled" ]; then
+            continue
+        fi
+
+        echo "Benchmark test `echo $MODEL`"
+        echo $MODEL >> $BENCHMARK_MODELS_FILE
+
+        REPORT_MODEL_DIR=$BENCHMARK_REPORT_DIR/$MODEL
+        mkdir -p $REPORT_MODEL_DIR
+
+        # TFLite+CPU
+        run_benchmark_and_print "tflite_cpu" "TFLite-CPU" $MODEL $REPORT_MODEL_DIR $TFLITE_DRIVER_BIN
+
+        # run onert
+        # Operation test don't need to test each scheduler
+        run_onert_with_all_config $MODEL $REPORT_MODEL_DIR $ONERT_DRIVER_BIN
+
+        if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
+            echo ""
+        fi
+        i=$((i+1))
+    done
+    echo "============================================"
+    unset COUNT
+}
+
+if [ ! -e "$BENCHMARK_REPORT_DIR" ]; then
+    mkdir -p $BENCHMARK_REPORT_DIR
+fi
+
+get_benchmark_op_list
+
+rm -rf $BENCHMARK_MODELS_FILE
+
+# Model download server setting
+prepare_test_model
+
+echo ""
+# print the result AND append to log file
+run_benchmark_test 2>&1 | tee -a $REPORT_DIR/onert_benchmarks.txt
+echo ""
+
+# Make json file.
+# functions to fill json with benchmark results
+source $MY_PATH/print_to_json.sh
+print_to_json $BENCHMARK_REPORT_DIR $REPORT_DIR "benchmark_op_result.json"
index ba71217..7f8e0f8 100644 (file)
@@ -9,7 +9,7 @@ command_exists() {
 progname=$(basename "${BASH_SOURCE[0]}")
 indir="."
 outdir="."
-nnpkg_run=${nnpkg_run:-"nnpackage_run"}
+nnpkg_run=${nnpkg_run:-"onert_run"}
 difftool=${difftool:-"h5diff"}
 delete_dumped_on_failure=0
 verbose_diff=0
@@ -31,8 +31,8 @@ usage() {
   echo "    -v   verbose result diff (default=$verbose_diff)"
   echo ""
   echo "Environment variables:"
-  echo "   nnpackage_run    path to nnpackage_run (default=nnpackage_run)"
-  echo "   difftool         path to i5diff or h5diff (default=h5diff)"
+  echo "   onert_run    path to onert_run (default=onert_run)"
+  echo "   difftool     path to i5diff or h5diff (default=h5diff)"
   echo ""
   echo "Examples:"
   echo "    $0 $progname Add_000                => run $indir/Add_000 and check output"
index 7c65254..0a418dc 100644 (file)
@@ -14,9 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
-
+MODEL_ROOT_DIR=$DRIVER_PATH/models
 MD5_CHECK="on"
 
 function Usage()
@@ -25,6 +23,7 @@ function Usage()
     echo ""
     echo "Options:"
     echo "          --ignoremd5     Ignore MD5 check when download model files"
+    echo "          --cachedir=PATH Set downloaded resouces cache directory (default: $CACHE_PATH)"
     echo "      -h, --help          Display this help message and exit"
 }
 
@@ -38,6 +37,9 @@ do
         --ignoremd5)
             MD5_CHECK="off"
             ;;
+        --cachedir=*)
+            CACHE_PATH=${i#*=}
+            ;;
         *)
             echo "Unknown option: $i"
             exit 1
@@ -46,6 +48,107 @@ do
     shift
 done
 
+function find_tests()
+{
+    local TEST_DIRS="$@"
+    local TESTS_TO_DOWNLOAD=""
+
+    if [[ $# -eq 0 ]]; then
+        TEST_DIRS="."
+    fi
+
+    shift $#
+
+    pushd $MODEL_ROOT_DIR > /dev/null
+    for DIR in $TEST_DIRS; do
+        if [ -d "$DIR" ]; then
+            TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | sort)
+            TESTS_TO_DOWNLOAD="$TESTS_TO_DOWNLOAD $TESTS_FOUND"
+        else
+            echo "Test $DIR was not found. This test is not added." 1>&2
+        fi
+    done
+    popd > /dev/null
+
+    echo $TESTS_TO_DOWNLOAD
+}
+
+function need_download()
+{
+    LOCAL_PATH=$1
+    REMOTE_URL=$2
+    if [ ! -e $LOCAL_PATH ]; then
+        return 0;
+    fi
+    # Ignore checking md5 in cache
+    # TODO Use "--md5" option only and remove IGNORE_MD5 environment variable
+    if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
+        return 1
+    fi
+    if [ "$MD5_CHECK" = "off" ]; then
+        return 1
+    fi
+
+    LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
+    REMOTE_HASH=$(curl --netrc-optional -kLsS $REMOTE_URL | md5sum  | awk '{ print $1 }')
+    # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
+    if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
+        echo "Downloaded file is outdated or incomplete."
+        return 0
+    fi
+    return 1
+}
+
+function download_tests()
+{
+    SELECTED_TESTS=$@
+
+    echo ""
+    echo "Downloading tests:"
+    echo "======================"
+    for TEST_NAME in $SELECTED_TESTS; do
+        echo $TEST_NAME
+    done
+    echo "======================"
+
+    if [ ! -e $CACHE_PATH ]; then
+        mkdir -p $CACHE_PATH
+    fi
+
+    i=0
+    for TEST_NAME in $SELECTED_TESTS; do
+        # Test configure initialization
+        ((i++))
+        MODELFILE_URL_BASE=""
+        MODELFILE_NAME=""
+        source $MODEL_ROOT_DIR/$TEST_NAME/config.sh
+
+        MODELFILE=$CACHE_PATH/$MODELFILE_NAME
+        MODELFILE_URL="$MODELFILE_URL_BASE/$MODELFILE_NAME"
+        if [ -n  "$MODELFILE_SERVER" ]; then
+            MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
+        fi
+
+        # Download model file
+        # Download unless we have it in cache (Also check md5sum)
+        if need_download "$MODELFILE" "$MODELFILE_URL"; then
+            echo ""
+            echo "Download test file for $TEST_NAME"
+            echo "======================"
+
+            rm -f $MODELFILE # Remove invalid file if exists
+            pushd $CACHE_PATH > /dev/null
+            echo "Download $MODELFILE_URL"
+            curl --netrc-optional -kLOsS $MODELFILE_URL
+            if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+                unzip -o $MODELFILE_NAME -d ${MODELFILE_NAME%.zip}
+            fi
+            popd > /dev/null
+        fi
+
+    done
+}
+
 # Check MODELFILE_SERVER
 if [[ -z "$MODELFILE_SERVER" ]]; then
     echo "Fail to download models: Please set MODELFILE_SERVER to download model"
@@ -53,4 +156,6 @@ if [[ -z "$MODELFILE_SERVER" ]]; then
 fi
 echo "Download from $MODELFILE_SERVER"
 
-$INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
+# Download tflite model
+TESTS_TO_DOWNLOAD=$(find_tests tflite)
+download_tests $TESTS_TO_DOWNLOAD
index 135ebea..9eb66dc 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
 UNITTEST_REPORT_DIR=
-UNITTEST_TEST_DIR=$INSTALL_DIR/unittest
+UNITTEST_TEST_DIR=$INSTALL_PATH/unittest
 UNITTEST_RESULT=0
 UNITTEST_RUN_ALL=""
 
index fff1106..c621cc4 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
-
-MD5_CHECK="on"
-TFLITE_LOADER="loader"
 REPORT_DIR="report"
 TEST_LIST_FILE=
 
@@ -27,10 +22,9 @@ function Usage()
     echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
     echo ""
     echo "Options:"
-    echo "      --ignoremd5             Ignore MD5 check when download model files"
-    echo "      --api=(nnapi|loader)    TFLite model file loading API (default=$TFLITE_LOADER)"
     echo "      --reportdir=PATH        Path to write report (default=$REPORT_DIR)"
     echo "      --list=FILE             List file to test. Test all if list option is not passed"
+    echo "      --cachedir=PATH         Set downloaded resouces cache directory (default: $CACHE_PATH)"
 }
 
 for i in "$@"
@@ -40,18 +34,15 @@ do
             Usage
             exit 1
             ;;
-        --ignoremd5)
-            MD5_CHECK="off"
-            ;;
-        --api=*)
-            TFLITE_LOADER=${i#*=}
-            ;;
         --reportdir=*)
             REPORT_DIR=${i#*=}
             ;;
         --list=*)
             TEST_LIST_FILE=${i#*=}
             ;;
+        --cachedir=*)
+            CACHE_PATH=${i#*=}
+            ;;
         *)
             echo "Unknown option: $i"
             exit 1
@@ -70,24 +61,13 @@ fi
 
 TEST_RESULT=0
 TAP_NAME=verification_test.tap
-TEST_NAME="Verification"
-TEST_DRIVER=
-
-if [[ $TFLITE_LOADER == "nnapi" ]]; then
-    TEST_NAME="NNAPI Verification"
-    TEST_DRIVER=nnapi_test
-elif [[ $TFLITE_LOADER == "loader" ]]; then
-    TEST_NAME="Loader Verification"
-    TEST_DRIVER=tflite_comparator
-else
-    Usage
-    exit 1
-fi
+TEST_NAME="Loader Verification"
+TEST_DRIVER=tflite_comparator
 
-$INSTALL_DIR/test/models/run_test.sh --driverbin=$TEST_DRIVER \
-    --download=off --run=on \
+$INSTALL_PATH/test/models/run_test.sh --driverbin=$TEST_DRIVER \
     --reportdir=$REPORT_DIR \
     --tapname=$TAP_NAME \
+    --cachedir=$CACHE_PATH \
     ${MODELLIST:-} > $REPORT_DIR/verification_test.log 2>&1
 TEST_RESULT=$?
 
index a2a261a..b0c44ae 100755 (executable)
 # limitations under the License.
 
 MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_PATH="$(cd ${MY_PATH}/../../ && pwd)"
+
+# Install path on CI
+INSTALL_PATH=$ROOT_PATH/Product/out
+TEST_CACHE_PATH=$INSTALL_PATH/test/cache
+
+function prepare_test_model()
+{
+  # Model download server setting
+  if [[ -z "${MODELFILE_SERVER}" ]]; then
+    echo "Model file server is not set. Try to use default setting."
+  else
+    echo "Model Server: ${MODELFILE_SERVER}"
+  fi
+  $INSTALL_PATH/test/onert-test prepare-model --cachedir=$TEST_CACHE_PATH
+}
 
 function get_result_of_benchmark_test()
 {
@@ -23,7 +39,7 @@ function get_result_of_benchmark_test()
     local LOG_FILE=$3
 
     local RET=0
-    $MY_PATH/models/run_test.sh --driverbin="$DRIVER_BIN  -r 5 -w 3" $MODEL > $LOG_FILE 2>&1
+    $INSTALL_PATH/test/models/run_test.sh --driverbin="$DRIVER_BIN -r 5 -w 3" --cachedir=$TEST_CACHE_PATH $MODEL > $LOG_FILE 2>&1
     RET=$?
     if [[ $RET -ne 0 ]]; then
         echo "Testing $MODEL aborted... exit code: $RET"
@@ -34,15 +50,6 @@ function get_result_of_benchmark_test()
     echo "$RESULT"
 }
 
-function print_result_of_benchmark_test()
-{
-    local NAME=$1
-    local RESULT=$2
-    local RESULT_FILE=$3
-
-    echo "$NAME $RESULT" > $RESULT_FILE
-}
-
 function print_with_dots()
 {
     PRINT_WIDTH=45
@@ -52,23 +59,3 @@ function print_with_dots()
     printf '%s' "$MSG"
     printf '%*.*s ' 0 $padlength "$pad"
 }
-
-
-function run_benchmark_and_print()
-{
-    local WRITE_FILE_NAME=$1
-    local MSG=$2
-    local MODEL=$3
-    local REPORT_MODEL_DIR=$4
-    local PAUSE_TIME_IN_SEC=$5
-    local DRIVER_BIN=$6
-    local BENCHMARK_RUN_TEST_SH=$7
-
-    LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
-    RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
-    print_with_dots $MSG
-    RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
-    echo "$RESULT ms"
-    print_result_of_benchmark_test "$MSG" "$RESULT" $RESULT_FILE
-    sleep $PAUSE_TIME_IN_SEC
-}
diff --git a/tests/scripts/list/nnapi_test.aarch64.list b/tests/scripts/list/nnapi_test.aarch64.list
deleted file mode 100644 (file)
index c74e898..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
diff --git a/tests/scripts/list/nnapi_test.armv7l.list b/tests/scripts/list/nnapi_test.armv7l.list
deleted file mode 100644 (file)
index c74e898..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
diff --git a/tests/scripts/list/nnpkg_test_list.noarch.interp b/tests/scripts/list/nnpkg_test_list.noarch.interp
deleted file mode 100644 (file)
index cc4e034..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-Add_000.opt
-#ArgMax_000.opt
-#ArgMax_001.opt
-#ArgMax_002.opt
-#ArgMax_003.opt
-AveragePool2D_000.opt
-#AveragePool2D_U8_000.opt
-Concatenation_000.opt
-Conv2D_000.opt
-Conv2D_001.opt
-Conv2D_002.opt
-Conv2D_004.opt
-DepthwiseConv2D_000.opt
-DepthwiseConv2D_002.opt
-FullyConnected_000.opt
-FullyConnected_001.opt
-FullyConnected_003.opt
-#L2Normalize_U8_000.opt
-Logistic_000.opt
-#Logistic_U8_000.opt
-MaxPool2D_000.opt
-#Mean_000.opt
-#Mean_001.opt
-Mul_000.opt
-#Net_TConv_BN_000.opt
-#Net_UnpackAdd_001.opt
-Pad_000.opt
-Quantization_000.opt
-Reshape_000.opt
-Reshape_001.opt
-Reshape_002.opt
-Softmax_000.opt
-#SpaceToDepth_U8_000.opt
-#Split_000.opt
-#Tanh_U8_000.opt
-#TransposeConv_000.opt
-#TransposeConv_001.opt
-#Transpose_000.opt
-#Unpack_000.opt
-#Unpack_001.opt
-#Unpack_002.opt
-#Unpack_003.opt
index 24fa980..d856e97 100644 (file)
@@ -8,10 +8,14 @@ concat
 conv_2d
 depthwise_conv_2d
 div
+embedding_lookup
 exp
 floor
-fullyconnected
+fullyconnected/fc1
+fullyconnected/matmul2x2
+fullyconnected/weights_as_input
 gather
+hashtable_lookup
 l2_normalization
 max
 max_pool_2d
index 0d443a7..9ee9e6a 100644 (file)
@@ -7,8 +7,10 @@ concat
 conv_2d
 depthwise_conv_2d
 div
+embedding_lookup
 floor
 gather
+hashtable_lookup
 l2_normalization
 logistic
 max
index 24fa980..d856e97 100644 (file)
@@ -8,10 +8,14 @@ concat
 conv_2d
 depthwise_conv_2d
 div
+embedding_lookup
 exp
 floor
-fullyconnected
+fullyconnected/fc1
+fullyconnected/matmul2x2
+fullyconnected/weights_as_input
 gather
+hashtable_lookup
 l2_normalization
 max
 max_pool_2d
index 8cce41a..3c8166b 100644 (file)
@@ -7,9 +7,11 @@ concat
 conv_2d
 depthwise_conv_2d
 div
+embedding_lookup
 floor
 fullyconnected
 gather
+hashtable_lookup
 l2_normalization
 logistic
 max
index cf32977..aedab63 100644 (file)
@@ -1,6 +1,5 @@
 MODELS/inception_module
 MODELS/mobilenet
-MODELS/mobilenet_quant8
 abs
 add
 average_pool_2d
diff --git a/tests/scripts/list/tflite_comparator.noarch.interp.list b/tests/scripts/list/tflite_comparator.noarch.interp.list
deleted file mode 100644 (file)
index 3555ee2..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected/fc1
-logistic
-max_pool_2d
-pad
-relu
-relu6
-reshape/reshape1
-softmax
-tanh
index 5750ec4..201a9de 100644 (file)
@@ -1,6 +1,5 @@
 MODELS/inception_module
 MODELS/mobilenet
-MODELS/mobilenet_quant8
 add
 average_pool_2d
 concat
index 195fd37..7e69df0 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
 #
index 4c11a48..52ac8cd 100755 (executable)
@@ -29,52 +29,20 @@ function command_exists() {
 
 function Usage()
 {
-    echo "Usage: ./$0 --driverbin={such as tflite_run} {tests to test or empty for all of tests}"
-    echo "Usage: ./$0 --driverbin=Product/out/bin/tflite_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
+    echo "Usage: ${BASH_SOURCE[0]} [OPTIONS] {tests to test or empty for all of tests}"
+    echo "(Ex) : ${BASH_SOURCE[0]} --driverbin=Product/out/bin/onert_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
     echo ""
-    echo "--download            - (default=on) Download model files"
-    echo "--run                 - (default=on) Test model files"
-    echo "--driverbin           - (default=../../Product/out/bin/tflite_run) Runner for runnning model tests"
-    echo "--reportdir           - (default=report) Directory to place tap files"
-    echo "--tapname             - (default=framework_test.tap) File name to be written for tap"
-    echo "--md5                 - (default=on) MD5 check when download model files"
-    echo "--configdir           - (default=$TEST_ROOT_PATH) Config directory to download and test model"
-    echo "--cachedir            - (default=$CACHE_ROOT_PATH) Directory to download model"
+    echo "--driverbin           : Runner for runnning model tests"
+    echo "--reportdir           : (default=$REPORT_DIR) Directory to place tap files"
+    echo "--tapname             : (default=$TAP_NAME) File name to be written for tap"
+    echo "--configdir           : (default=$TEST_ROOT_PATH) Config directory to download and test model"
+    echo "--cachedir            : (default=$CACHE_ROOT_PATH) Directory to download model"
     echo ""
 }
 
-function need_download()
-{
-    LOCAL_PATH=$1
-    REMOTE_URL=$2
-    if [ ! -e $LOCAL_PATH ]; then
-        return 0;
-    fi
-    # Ignore checking md5 in cache
-    # TODO Use "--md5" option only and remove IGNORE_MD5 environment variable
-    if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
-        return 1
-    fi
-    if [ "$MD5_CHECK" = "off" ]; then
-        return 1
-    fi
-
-    LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
-    REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum  | awk '{ print $1 }')
-    # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
-    if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
-        echo "Downloaded file is outdated or incomplete."
-        return 0
-    fi
-    return 1
-}
-
-DRIVER_BIN=""
+DRIVER_BIN="$NNFW_HOME/Product/out/bin/onert_run"
 TAP_NAME="framework_test.tap"
 TEST_LIST=()
-DOWNLOAD_MODEL="on"
-RUN_TEST="on"
-MD5_CHECK="on"
 
 # Support environment variable setting for mirror server
 FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
@@ -95,15 +63,6 @@ do
         --tapname=*)
             TAP_NAME=${i#*=}
             ;;
-        --download=*)
-            DOWNLOAD_MODEL=${i#*=}
-            ;;
-        --md5=*)
-            MD5_CHECK=${i#*=}
-            ;;
-        --run=*)
-            RUN_TEST=${i#*=}
-            ;;
         --configdir=*)
             TEST_ROOT_PATH=${i#*=}
             ;;
@@ -121,17 +80,13 @@ if [[ ${#TEST_LIST[@]} -eq 0 ]]; then
     RUN_DISABLED="false"
 fi
 
-if [ ! -n "$DRIVER_BIN" ]; then
-    DRIVER_BIN="$NNFW_HOME/Product/out/bin/tflite_run"
-fi
-
 if [ ! -d "$TEST_ROOT_PATH" ]; then
     echo "Cannot find config directory for test: please set proper configdir"
     exit 1
 fi
 
 # Check test driver setting
-if ! command_exists $DRIVER_BIN && [ "$RUN_TEST" = "on" ]; then
+if ! command_exists $DRIVER_BIN ; then
     echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
     exit 1
 fi
@@ -194,56 +149,6 @@ run_tests()
     return $TOTAL_RESULT
 }
 
-download_tests()
-{
-    SELECTED_TESTS=$@
-
-    echo ""
-    echo "Downloading tests:"
-    echo "======================"
-    for TEST_NAME in $SELECTED_TESTS; do
-        echo $TEST_NAME
-    done
-    echo "======================"
-
-    i=0
-    for TEST_NAME in $SELECTED_TESTS; do
-        # Test configure initialization
-        ((i++))
-        MODELFILE_URL_BASE=""
-        MODELFILE_NAME=""
-        source $TEST_ROOT_PATH/$TEST_NAME/config.sh
-
-        MODELFILE=$CACHE_ROOT_PATH/$MODELFILE_NAME
-        MODELFILE_URL="$MODELFILE_URL_BASE/$MODELFILE_NAME"
-        if [ -n  "$FIXED_MODELFILE_SERVER" ]; then
-            MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
-        fi
-
-        # Download model file
-        if [ ! -e $CACHE_ROOT_PATH ]; then
-            mkdir -p $CACHE_ROOT_PATH
-        fi
-
-        # Download unless we have it in cache (Also check md5sum)
-        if need_download "$MODELFILE" "$MODELFILE_URL"; then
-            echo ""
-            echo "Download test file for $TEST_NAME"
-            echo "======================"
-
-            rm -f $MODELFILE # Remove invalid file if exists
-            pushd $CACHE_ROOT_PATH
-            wget -nv $MODELFILE_URL
-            if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
-                unzip -o $MODELFILE_NAME -d ${MODELFILE_NAME%.zip}
-            fi
-            popd
-        fi
-
-    done
-}
-
-
 find_tests()
 {
     local TEST_DIRS="$@"
@@ -271,12 +176,6 @@ find_tests()
 
 mkdir -p $REPORT_DIR
 TESTS_TO_RUN=$(find_tests ${TEST_LIST[@]})
+run_tests $TESTS_TO_RUN
 
-if [ "$DOWNLOAD_MODEL" = "on" ]; then
-    download_tests $TESTS_TO_RUN
-fi
-
-if [ "$RUN_TEST" = "on" ]; then
-    run_tests $TESTS_TO_RUN
-fi
 exit $?
index 99c107c..f5ed474 100644 (file)
 
 [[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
 
+# Common variables which are used on test commands
+# DRIVER_PATH: test driver and related resources forder
+# INSTALL_PATH: test package installed folder
+# CACHE_PATH: cache folder for test resource download
 DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 INSTALL_PATH="$(dirname $DRIVER_PATH)"
-COMMAND_PATH=$INSTALL_PATH/test/command
-BIN_PATH=$INSTALL_PATH/bin
+CACHE_PATH=$DRIVER_PATH/cache
 
-export PATH=$BIN_PATH:$PATH
+export PATH=$INSTALL_PATH/bin:$PATH
+
+COMMAND_PATH=$DRIVER_PATH/command
 
 function Usage()
 {
diff --git a/tests/scripts/test-driver.sh b/tests/scripts/test-driver.sh
deleted file mode 100755 (executable)
index aa97d95..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-# NOTE: Supposed that this script would be executed with an artifact path.
-#       The artifact path has tests/(test suite) and Product/
-#       Reference this PR(https://github.sec.samsung.net/STAR/nnfw/pull/375).
-
-function Usage()
-{
-    echo "Usage: ./$0 --artifactpath=.    # run all tests"
-    echo "Usage: ./$0 --artifactpath=/home/dragon/nnfw --frameworktest --verification --benchmark    # run fw test & verfication and benchmark"
-    echo ""
-    echo "--artifactpath            - (default={test-driver.sh's path}/../../) it should contain tests/ and Product/"
-    echo ""
-    echo "Following options are needed when you want to tests of specific types. If you don't pass any one, unittest and verification will be run"
-    echo "--frameworktest           - (default=off) run framework test"
-    echo "--verification            - (default=on) run verification"
-    echo "--frameworktest_list_file - filepath of model list for test"
-    echo ""
-    echo "Following option is only needed when you want to test benchmark."
-    echo "--benchmark_onert_op     - (default=off) run benchmark per operation on onert"
-    echo ""
-    echo "etc."
-    echo "--framework_driverbin     - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
-    echo "--verification_driverbin  - (default=../../Product/out/bin/nnapi_test) runner for runnning verification tests"
-    echo ""
-    echo "--reportdir               - (default=\$ARTIFACT_PATH/report) directory to save report"
-    echo ""
-}
-
-TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-ARTIFACT_PATH="$TEST_DRIVER_DIR/../../"
-FRAMEWORK_DRIVER_BIN=""
-VERIFICATION_DRIVER_BIN=""
-ALLTEST_ON="true"
-FRAMEWORKTEST_ON="false"
-VERIFICATION_ON="false"
-BENCHMARK_ONERT_OP_ON="false"
-REPORT_DIR=""
-
-for i in "$@"
-do
-    case $i in
-        -h|--help|help)
-            Usage
-            exit 1
-            ;;
-        --artifactpath=*)
-            ARTIFACT_PATH=${i#*=}
-            ;;
-        --framework_driverbin=*)
-            FRAMEWORK_DRIVER_BIN=${i#*=}
-            ;;
-        --verification_driverbin=*)
-            VERIFICATION_DRIVER_BIN=${i#*=}
-            ;;
-        --frameworktest)
-            ALLTEST_ON="false"
-            FRAMEWORKTEST_ON="true"
-            ;;
-        --frameworktest_list_file=*)
-            FRAMEWORKTEST_LIST_FILE=$PWD/${i#*=}
-            if [ ! -e "$FRAMEWORKTEST_LIST_FILE" ]; then
-                echo "Pass on with proper frameworktest_list_file"
-                exit 1
-            fi
-            ;;
-        --verification)
-            ALLTEST_ON="false"
-            VERIFICATION_ON="true"
-            ;;
-        --benchmark_onert_op)
-            ALLTEST_ON="false"
-            BENCHMARK_ONERT_OP_ON="true"
-            ;;
-        --reportdir=*)
-            REPORT_DIR=${i#*=}
-            ;;
-        *)
-            # Be careful that others params are handled as $ARTIFACT_PATH
-            ARTIFACT_PATH="$i"
-            ;;
-    esac
-    shift
-done
-
-ARTIFACT_PATH="$(readlink -f $ARTIFACT_PATH)"
-
-if [ -z "$UNIT_TEST_DIR" ]; then
-    UNIT_TEST_DIR=$ARTIFACT_PATH/Product/out/unittest
-fi
-
-if [ -z "$REPORT_DIR" ]; then
-    REPORT_DIR=$ARTIFACT_PATH/report
-fi
-
-source $TEST_DRIVER_DIR/common.sh
-
-# Run tflite_run with various tflite models
-if [ "$FRAMEWORKTEST_ON" == "true" ]; then
-    if [ -z "$FRAMEWORK_DRIVER_BIN" ]; then
-        FRAMEWORK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
-    fi
-
-    $TEST_DRIVER_DIR/test_framework.sh \
-        --driverbin=$FRAMEWORK_DRIVER_BIN \
-        --reportdir=$REPORT_DIR \
-        --tapname=framework_test.tap \
-        --logname=framework_test.log \
-        --testname="Frameworktest" \
-        --frameworktest_list_file=${FRAMEWORKTEST_LIST_FILE:-}
-fi
-
-# Run nnapi_test with various tflite models
-if [ "$ALLTEST_ON" == "true" ] || [ "$VERIFICATION_ON" == "true" ]; then
-    if [ -z "$VERIFICATION_DRIVER_BIN" ]; then
-        VERIFICATION_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/nnapi_test
-    fi
-
-    # verification uses the same script as frameworktest does
-    $TEST_DRIVER_DIR/test_framework.sh \
-        --driverbin=$VERIFICATION_DRIVER_BIN \
-        --reportdir=$REPORT_DIR \
-        --tapname=verification_test.tap \
-        --logname=verification_test.log \
-        --testname="Verification" \
-        --frameworktest_list_file=${FRAMEWORKTEST_LIST_FILE:-}
-fi
-
-if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
-    DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
-
-    $TEST_DRIVER_DIR/benchmark_nnapi.sh \
-        --test_op \
-        --driverbin=$DRIVER_BIN \
-        --reportdir=$REPORT_DIR/benchmark_op \
-        --modelfilepath=$ARTIFACT_PATH/tests/scripts/models
-fi
-
-# Make json file. Actually, this process is only needed on CI. That's why it is in test-driver.sh.
-if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
-    # functions to fill json with benchmark results
-    source $ARTIFACT_PATH/tests/scripts/print_to_json.sh
-    if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
-        print_to_json $REPORT_DIR/benchmark_op $REPORT_DIR "benchmark_op_result.json"
-    else
-        print_to_json $REPORT_DIR/benchmark $REPORT_DIR "benchmark_result.json"
-    fi
-fi
diff --git a/tests/scripts/test_framework.sh b/tests/scripts/test_framework.sh
deleted file mode 100755 (executable)
index 6bf9c89..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-FWTEST_DRIVER_BIN=
-FWTEST_REPORT_DIR=
-FWTEST_TAP_NAME=
-FWTEST_LOG_NAME=
-FWTEST_TEST_NAME=
-
-function Usage()
-{
-    echo "Usage Example:"
-    echo "./$0 \\"
-    echo "  --driverbin=Product/out/bin/tflite_run \\  # Test driver path"
-    echo "  --frameworktest_list_file=tests/scripts/list/frameworktest_list.armv7l.cpu.txt \\"
-    echo "  --reportdir=report \\            # Directory for the report files will be saved"
-    echo "  --tapname=framework_test.tap \\  # Tap file name"
-    echo "  --logname=framework_test.log \\  # Log file name"
-    echo "  --testname=Frameworktest         # Name of the test just a label of tests"
-
-    exit 1
-}
-
-for i in "$@"
-do
-    case $i in
-        -h|--help|help)
-            Usage
-            ;;
-        --driverbin=*)
-            FWTEST_DRIVER_BIN=${i#*=}
-            ;;
-        --reportdir=*)
-            FWTEST_REPORT_DIR=${i#*=}
-            ;;
-        --tapname=*)
-            FWTEST_TAP_NAME=${i#*=}
-            ;;
-        --logname=*)
-            FWTEST_LOG_NAME=${i#*=}
-            ;;
-        --testname=*)
-            FWTEST_TEST_NAME=${i#*=}
-            ;;
-        --frameworktest_list_file=*)
-            FRAMEWORKTEST_LIST_FILE=${i#*=}
-            ;;
-    esac
-    shift
-done
-
-[ ! -z "$FWTEST_DRIVER_BIN" ] || Usage
-[ ! -z "$FWTEST_REPORT_DIR" ] || Usage
-[ ! -z "$FWTEST_TAP_NAME" ] || Usage
-[ ! -z "$FWTEST_LOG_NAME" ] || Usage
-[ ! -z "$FWTEST_TEST_NAME" ] || Usage
-
-if [ ! -e "$FWTEST_REPORT_DIR" ]; then
-    mkdir -p $FWTEST_REPORT_DIR
-fi
-
-echo ""
-echo "============================================"
-echo "$FWTEST_TEST_NAME with $(basename $FWTEST_DRIVER_BIN) ..."
-
-if [ ! -z "$FRAMEWORKTEST_LIST_FILE" ]; then
-    MODELLIST=$(cat "${FRAMEWORKTEST_LIST_FILE}")
-fi
-
-$MY_PATH/models/run_test.sh --driverbin=$FWTEST_DRIVER_BIN \
-    --reportdir=$FWTEST_REPORT_DIR \
-    --tapname=$FWTEST_TAP_NAME \
-    ${MODELLIST:-} \
-    > $FWTEST_REPORT_DIR/$FWTEST_LOG_NAME 2>&1
-FWTEST_RESULT=$?
-if [[ $FWTEST_RESULT -ne 0 ]]; then
-    echo ""
-    cat $FWTEST_REPORT_DIR/$FWTEST_TAP_NAME
-    echo ""
-    echo "$FWTEST_TEST_NAME failed... exit code: $FWTEST_RESULT"
-    echo "============================================"
-    echo ""
-    exit $FWTEST_RESULT
-fi
-
-echo ""
-cat $FWTEST_REPORT_DIR/$FWTEST_TAP_NAME
-echo "============================================"
-echo ""
index 639cf3f..6ee2890 100755 (executable)
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -eo pipefail
+
 MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 source $MY_PATH/common.sh
 
@@ -7,11 +9,10 @@ BACKEND_CNT=3
 # Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
 #     collect metrics for one unmeasured backend. On the last run metrics for data transfer
 PROFILING_RUN_CNT=$((BACKEND_CNT+1))
-TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-ARTIFACT_PATH="$TEST_DRIVER_DIR/../.."
-BENCHMARK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
+ARTIFACT_PATH="$MY_PATH/../.."
+BENCHMARK_DRIVER_BIN=$INSTALL_PATH/bin/onert_run
 REPORT_DIR=$ARTIFACT_PATH/report
-RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/models/run_test.sh
+RUN_TEST_SH=$INSTALL_PATH/test/models/run_test.sh
 BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
 
 if [ ! -e "$RUN_TEST_SH" ]; then
@@ -19,10 +20,18 @@ if [ ! -e "$RUN_TEST_SH" ]; then
     exit 1
 fi
 
-
 BENCHMARK_REPORT_DIR=$REPORT_DIR/benchmark
 BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
 
+# Cleanup report files
+rm -rf $BENCHMARK_REPORT_DIR
+rm -f $BENCHMARK_MODELS_FILE
+mkdir -p $BENCHMARK_REPORT_DIR
+touch $BENCHMARK_MODELS_FILE
+
+# Prepare models
+prepare_test_model
+
 function run_without_sched()
 {
     local RESULT_SCH_INT=$1
@@ -39,7 +48,7 @@ function run_without_sched()
 
     RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
 
-    printf -v RESULT_INT '%d' $RESULT 2>/dev/null
+    printf -v RESULT_INT '%.0f' $RESULT
     PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
     echo "$RESULT ms. Parallel scheduler is $PERCENTAGE% faster"
 }
@@ -53,11 +62,10 @@ function run_benchmark_test()
     export COUNT=5
     echo "============================================"
     local i=0
-    export USE_NNAPI=1
     export BACKENDS="acl_cl;acl_neon;cpu"
     # Remove metrics so that profiler can get metrics for operations
     #      with input&output sizes the same as the model
-    rm "exec_time.json" 2>/dev/null
+    rm -f "exec_time.json" 2>/dev/null
     for MODEL in $BENCHMARK_MODEL_LIST; do
 
         echo "Benchmark test with `basename $BENCHMARK_DRIVER_BIN` & `echo $MODEL`"
@@ -79,7 +87,7 @@ function run_benchmark_test()
 
             print_with_dots "Profiling run #$j out of $PROFILING_RUN_CNT"
 
-            $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
+            RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
             RET=$?
             if [[ $RET -ne 0 ]]; then
                 echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
@@ -91,7 +99,6 @@ function run_benchmark_test()
         done
         unset ONERT_LOG_ENABLE
 
-
 ##################################################################################
         # Turn off profiling
 ##################################################################################
@@ -108,7 +115,7 @@ function run_benchmark_test()
         RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
         echo "$RESULT ms"
 
-        printf -v RESULT_SCH_INT '%d' $RESULT 2>/dev/null
+        printf -v RESULT_SCH_INT '%.0f' $RESULT
 
         mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel.dot"
 
@@ -122,7 +129,7 @@ function run_benchmark_test()
 
         RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
 
-        printf -v RESULT_INT '%d' $RESULT 2>/dev/null
+        printf -v RESULT_INT '%.0f' $RESULT
         PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
         echo "$RESULT ms. Parallel scheduler is $PERCENTAGE% faster"
 
@@ -178,7 +185,6 @@ function run_benchmark_test()
     unset BACKENDS
     echo "============================================"
     unset COUNT
-    unset USE_NNAPI
 }
 
 echo ""
diff --git a/tests/tools/nnapi_test/CMakeLists.txt b/tests/tools/nnapi_test/CMakeLists.txt
deleted file mode 100644 (file)
index eac649b..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-if(NOT BUILD_NNAPI_TEST)
-  return()
-endif(NOT BUILD_NNAPI_TEST)
-
-list(APPEND SOURCES "src/nnapi_test.cc")
-list(APPEND SOURCES "src/args.cc")
-
-nnfw_find_package(Boost REQUIRED program_options)
-
-add_executable(nnapi_test ${SOURCES})
-target_include_directories(nnapi_test PRIVATE ${Boost_INCLUDE_DIRS})
-target_link_libraries(nnapi_test nnfw_lib_tflite)
-target_link_libraries(nnapi_test ${Boost_PROGRAM_OPTIONS_LIBRARY})
-install(TARGETS nnapi_test DESTINATION bin)
diff --git a/tests/tools/nnapi_test/src/args.cc b/tests/tools/nnapi_test/src/args.cc
deleted file mode 100644 (file)
index 420e092..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-namespace nnapi_test
-{
-
-Args::Args(const int argc, char **argv)
-{
-  Initialize();
-  try
-  {
-    Parse(argc, argv);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "The argments that cannot be parsed: " << e.what() << '\n';
-    print(argv);
-    exit(255);
-  }
-}
-
-void Args::print(char **argv)
-{
-  std::cout << "nnapi_test\n\n";
-  std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
-  std::cout << _options;
-  std::cout << "\n";
-}
-
-void Args::Initialize(void)
-{
-  // General options
-  po::options_description general("General options", 100);
-
-  // clang-format off
-  general.add_options()
-    ("help,h", "Print available options")
-    ("tflite", po::value<std::string>()->required())
-    ("seed", po::value<int>()->default_value(0), "The seed of random inputs")
-    ("num_runs", po::value<int>()->default_value(2), "The number of runs")
-    ;
-  // clang-format on
-
-  _options.add(general);
-  _positional.add("tflite", 1);
-  _positional.add("seed", 2);
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-
-  if (vm.count("help"))
-  {
-    print(argv);
-
-    exit(0);
-  }
-
-  po::notify(vm);
-  if (vm.count("tflite"))
-  {
-    _tflite_filename = vm["tflite"].as<std::string>();
-
-    if (_tflite_filename.empty())
-    {
-      std::cerr << "Please specify tflite file.\n";
-      print(argv);
-      exit(255);
-    }
-    else
-    {
-      if (access(_tflite_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
-        exit(255);
-      }
-    }
-  }
-
-  if (vm.count("seed"))
-  {
-    _seed = vm["seed"].as<int>();
-  }
-
-  if (vm.count("num_runs"))
-  {
-    _num_runs = vm["num_runs"].as<int>();
-    if (_num_runs < 0)
-    {
-      std::cerr << "num_runs value must be greater than 0.\n";
-      exit(255);
-    }
-  }
-}
-
-} // end of namespace nnapi_test
diff --git a/tests/tools/nnapi_test/src/args.h b/tests/tools/nnapi_test/src/args.h
deleted file mode 100644 (file)
index 486fbef..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNAPI_TEST_ARGS_H__
-#define __NNAPI_TEST_ARGS_H__
-
-#include <boost/program_options.hpp>
-#include <string>
-
-namespace po = boost::program_options;
-
-namespace nnapi_test
-{
-
-class Args
-{
-public:
-  Args(const int argc, char **argv);
-  void print(char **argv);
-
-  const std::string &getTfliteFilename(void) const { return _tflite_filename; }
-  const int getSeed(void) const { return _seed; }
-  const int getNumRuns(void) const { return _num_runs; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::positional_options_description _positional;
-  po::options_description _options;
-
-  std::string _tflite_filename;
-  int _seed;
-  int _num_runs;
-};
-
-} // end of namespace nnapi_test
-
-#endif // __NNAPI_TEST_ARGS_H__
diff --git a/tests/tools/nnapi_test/src/nnapi_test.cc b/tests/tools/nnapi_test/src/nnapi_test.cc
deleted file mode 100644 (file)
index 314f162..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/model.h"
-
-#include "tflite/interp/FlatBufferBuilder.h"
-#include "tflite/RandomTestRunner.h"
-
-#include <iostream>
-#include <stdexcept>
-
-#include "args.h"
-
-using namespace tflite;
-using namespace nnfw::tflite;
-using namespace nnapi_test;
-
-int main(const int argc, char **argv)
-{
-  Args args(argc, argv);
-
-  const auto filename = args.getTfliteFilename();
-
-  StderrReporter error_reporter;
-
-  auto model = FlatBufferModel::BuildFromFile(filename.c_str(), &error_reporter);
-
-  if (model == nullptr)
-  {
-    // error_reporter must have shown the error message already
-    return 1;
-  }
-
-  const nnfw::tflite::FlatBufferBuilder builder(*model);
-
-  try
-  {
-    const auto seed = static_cast<uint32_t>(args.getSeed());
-    auto runner = nnfw::tflite::RandomTestRunner::make(seed);
-    const auto num_runs = static_cast<size_t>(args.getNumRuns());
-    runner.compile(builder);
-    return runner.run(num_runs);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    return 1;
-  }
-}
diff --git a/tests/tools/nnpackage_run/CMakeLists.txt b/tests/tools/nnpackage_run/CMakeLists.txt
deleted file mode 100644 (file)
index 49070d3..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT BUILD_NNPACKAGE_RUN)
-  return()
-endif(NOT BUILD_NNPACKAGE_RUN)
-
-if(NOT BUILD_ONERT)
-  return()
-endif(NOT BUILD_ONERT)
-
-list(APPEND NNPACKAGE_RUN_SRCS "src/nnpackage_run.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/args.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/nnfw_util.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/randomgen.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/rawformatter.cc")
-
-nnfw_find_package(Boost REQUIRED program_options)
-nnfw_find_package(Ruy QUIET)
-nnfw_find_package(HDF5 QUIET)
-
-if (HDF5_FOUND)
-  list(APPEND NNPACKAGE_RUN_SRCS "src/h5formatter.cc")
-endif()
-
-add_executable(nnpackage_run ${NNPACKAGE_RUN_SRCS})
-
-if (HDF5_FOUND)
-  target_compile_definitions(nnpackage_run PRIVATE ONERT_HAVE_HDF5=1)
-  target_include_directories(nnpackage_run PRIVATE ${HDF5_INCLUDE_DIRS})
-  target_link_libraries(nnpackage_run ${HDF5_CXX_LIBRARIES})
-else()
-  message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in nnpackage_run.")
-endif(HDF5_FOUND)
-
-target_include_directories(nnpackage_run PRIVATE src)
-target_include_directories(nnpackage_run PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(nnpackage_run nnfw_lib_tflite jsoncpp)
-target_link_libraries(nnpackage_run nnfw-dev)
-target_link_libraries(nnpackage_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(nnpackage_run nnfw_lib_benchmark)
-if(Ruy_FOUND AND PROFILE_RUY)
-  target_link_libraries(nnpackage_run ruy_instrumentation)
-  target_link_libraries(nnpackage_run ruy_profiler)
-endif(Ruy_FOUND AND PROFILE_RUY)
-
-install(TARGETS nnpackage_run DESTINATION bin)
diff --git a/tests/tools/nnpackage_run/README.md b/tests/tools/nnpackage_run/README.md
deleted file mode 100644 (file)
index 898cc84..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-# nnpackage_run
-
-`nnpackage_run` is a tool to run `nnpackage`.
-
-It takes `nnpackage` as input. It uses **runtime API** internally.
-
-## Usage
-
-### Simple run
-
-This will run with random input data
-
-```
-$ ./nnpackage_run path_to_nnpackage_directory
-```
-
-Output would look like:
-
-```
-nnfw_prepare takes 425.235 ms
-nnfw_run     takes 2.525 ms
-```
diff --git a/tests/tools/nnpackage_run/src/allocation.h b/tests/tools/nnpackage_run/src/allocation.h
deleted file mode 100644 (file)
index 20e21eb..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_ALLOCATION_H__
-#define __NNPACKAGE_RUN_ALLOCATION_H__
-
-#include <cstdlib>
-#include <cstdint>
-
-namespace nnpkg_run
-{
-class Allocation
-{
-public:
-  Allocation() : data_(nullptr) {}
-  ~Allocation() { free(data_); }
-  void *data() const { return data_; }
-  void *alloc(uint64_t sz) { return data_ = malloc(sz); }
-
-private:
-  void *data_;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_ALLOCATION_H__
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/nnpackage_run/src/args.cc
deleted file mode 100644 (file)
index ba034ce..0000000
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <functional>
-#include <iostream>
-#include <json/json.h>
-
-namespace
-{
-
-// This function parses a json object and returns as a vector of integers
-// For example,
-// [0, [1, 2, 3, 4], 3, 40, 4, []] in JSON
-// is converted to:
-// {
-//  0 -> [1, 2, 3, 4]
-//  3 -> 40
-//  4 -> []
-// } in std::unordered_map. Note that the value type is still Json::Value.
-std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonval)
-{
-  if (!jsonval.isArray() || (jsonval.size() % 2 != 0))
-  {
-    std::cerr << "JSON argument must be an even-sized array in JSON\n";
-    exit(1);
-  }
-
-  std::unordered_map<uint32_t, Json::Value> ret;
-  for (uint32_t i = 0; i < jsonval.size(); i += 2)
-  {
-    if (!jsonval[i].isUInt())
-    {
-      std::cerr << "Key values(values in even indices) must be unsigned integers\n";
-      exit(1);
-    }
-    uint32_t key = jsonval[i].asUInt();
-    Json::Value val = jsonval[i + 1];
-    ret[key] = jsonval[i + 1];
-  }
-  return ret;
-}
-
-// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
-void handleShapeJsonParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
-{
-  Json::Value root;
-  Json::Reader reader;
-  if (!reader.parse(shape_str, root, false))
-  {
-    std::cerr << "Invalid JSON format for output_sizes \"" << shape_str << "\"\n";
-    exit(1);
-  }
-
-  auto arg_map = argArrayToMap(root);
-  for (auto &pair : arg_map)
-  {
-    uint32_t key = pair.first;
-    Json::Value &shape_json = pair.second;
-    if (!shape_json.isArray())
-    {
-      std::cerr << "All the values must be list: " << shape_str << "\n";
-      exit(1);
-    }
-
-    std::vector<int> shape;
-    for (auto &dim_json : shape_json)
-    {
-      if (!dim_json.isUInt())
-      {
-        std::cerr << "All the dims should be dim >= 0: " << shape_str << "\n";
-        exit(1);
-      }
-
-      shape.emplace_back(dim_json.asUInt64());
-    }
-
-    shape_map[key] = shape;
-  }
-}
-
-} // namespace
-
-namespace nnpkg_run
-{
-
-Args::Args(const int argc, char **argv)
-{
-  Initialize();
-  Parse(argc, argv);
-}
-
-void Args::Initialize(void)
-{
-  auto process_nnpackage = [&](const std::string &package_filename) {
-    _package_filename = package_filename;
-
-    std::cerr << "Package Filename " << _package_filename << std::endl;
-    if (_package_filename.empty())
-    {
-      // TODO Print usage instead of the below message
-      std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
-                << "\n";
-
-      exit(1);
-    }
-    else
-    {
-      if (access(_package_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "nnpackage not found: " << _package_filename << "\n";
-      }
-    }
-  };
-
-  auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
-    Json::Value root;
-    Json::Reader reader;
-    if (!reader.parse(output_sizes_json_str, root, false))
-    {
-      std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
-      exit(1);
-    }
-
-    auto arg_map = argArrayToMap(root);
-    for (auto &pair : arg_map)
-    {
-      uint32_t key = pair.first;
-      Json::Value &val_json = pair.second;
-      if (!val_json.isUInt())
-      {
-        std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
-        exit(1);
-      }
-      uint32_t val = val_json.asUInt();
-      _output_sizes[key] = val;
-    }
-  };
-
-  auto process_shape_prepare = [&](const std::string &shape_str) {
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (shape_str == "H5" || shape_str == "h5")
-    {
-      _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
-      return;
-    }
-#endif
-    try
-    {
-      handleShapeJsonParam(_shape_prepare, shape_str);
-    }
-    catch (const std::exception &e)
-    {
-      std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
-      exit(1);
-    }
-  };
-
-  auto process_shape_run = [&](const std::string &shape_str) {
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (shape_str == "H5" || shape_str == "h5")
-    {
-      _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
-      return;
-    }
-#endif
-    try
-    {
-      handleShapeJsonParam(_shape_run, shape_str);
-    }
-    catch (const std::exception &e)
-    {
-      std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
-      exit(1);
-    }
-  };
-
-  // General options
-  po::options_description general("General options", 100);
-
-  // clang-format off
-  general.add_options()
-    ("help,h", "Print available options")
-    ("version", "Print version and exit immediately")
-    ("nnpackage", po::value<std::string>()->required()->notifier(process_nnpackage))
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
-    ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
-#endif
-    ("dump:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_raw_filename = v; }), "Raw Output filename")
-    ("load:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_raw_filename = v; }), "Raw Input filename")
-    ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
-        "The output buffer size in JSON 1D array\n"
-        "If not given, the model's output sizes are used\n"
-        "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
-    ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
-    ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(us) between runs (as default no delay")
-    ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
-    ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
-    ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
-         "Write report\n"
-         "{exec}-{nnpkg}-{backend}.csv will be generated.\n"
-         "e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
-         "{nnpkg} name may be changed to realpath if you use symbolic-link.")
-    ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
-         "Please refer to the description of 'shape_run'")
-    ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
-         "'--shape_prepare: set shape of tensors before compilation (before calling nnfw_prepare()).\n"
-         "'--shape_run: set shape of tensors before running (before calling nnfw_run()).\n"
-         "Allowed value:.\n"
-         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [] (scalar).\n"
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-         "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
-         "if '--load' option is provided but '--shape_prepare' or '--shape_run' is not provided,\n"
-         "'--shape_run h5' will be used by default.\n"
-#endif
-         "For detailed description, please consutl the description of nnfw_set_input_tensorinfo()\n"
-         )
-    ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
-         "Verbose level\n"
-         "0: prints the only result. Messages btw run don't print\n"
-         "1: prints result and message btw run\n"
-         "2: prints all of messages to print\n")
-    ;
-  // clang-format on
-
-  _options.add(general);
-  _positional.add("nnpackage", 1);
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-
-  {
-    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
-      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
-      {
-        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
-                                            "' cannot be given at once.");
-      }
-    };
-
-    // calling, e.g., "nnpackage_run .. -- shape_prepare .. --shape_run .." should theoretically
-    // work but allowing both options together on command line makes the usage and implemenation
-    // of nnpackage_run too complicated. Therefore let's not allow those option together.
-    conflicting_options("shape_prepare", "shape_run");
-  }
-
-  if (vm.count("help"))
-  {
-    std::cout << "nnpackage_run\n\n";
-    std::cout << "Usage: " << argv[0] << " path to nnpackage root directory [<options>]\n\n";
-    std::cout << _options;
-    std::cout << "\n";
-
-    exit(0);
-  }
-
-  if (vm.count("version"))
-  {
-    _print_version = true;
-    return;
-  }
-
-  try
-  {
-    po::notify(vm);
-  }
-  catch (const std::bad_cast &e)
-  {
-    std::cerr << "Bad cast error - " << e.what() << '\n';
-    exit(1);
-  }
-
-  // This must be run after `notify` as `_warm_up_runs` must have been processed before.
-  if (vm.count("mem_poll"))
-  {
-    // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
-    if (_mem_poll && _warmup_runs == 0)
-    {
-      _warmup_runs = 1;
-    }
-  }
-}
-
-bool Args::shapeParamProvided()
-{
-  bool provided = false;
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-  // "--shape_run h5" or "--shape_prepare h5" was provided
-  provided = (getWhenToUseH5Shape() != WhenToUseH5Shape::NOT_PROVIDED);
-#endif
-  // specific shape was provided
-  // e.g., "--shape_run '[0, [10, 1]]'" or "--shape_prepare '[0, [10, 1]]'"
-  provided |= (!getShapeMapForPrepare().empty()) || (!getShapeMapForRun().empty());
-
-  return provided;
-}
-
-} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/args.h b/tests/tools/nnpackage_run/src/args.h
deleted file mode 100644 (file)
index 82015af..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_ARGS_H__
-#define __NNPACKAGE_RUN_ARGS_H__
-
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include <boost/program_options.hpp>
-
-#include "types.h"
-
-namespace po = boost::program_options;
-
-namespace nnpkg_run
-{
-
-using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
-
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-enum class WhenToUseH5Shape
-{
-  NOT_PROVIDED, // Param not provided
-  PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
-  RUN,     // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
-};
-#endif
-
-class Args
-{
-public:
-  Args(const int argc, char **argv);
-  void print(void);
-
-  const std::string &getPackageFilename(void) const { return _package_filename; }
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-  const std::string &getDumpFilename(void) const { return _dump_filename; }
-  const std::string &getLoadFilename(void) const { return _load_filename; }
-  WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
-#endif
-  const std::string &getDumpRawFilename(void) const { return _dump_raw_filename; }
-  const std::string &getLoadRawFilename(void) const { return _load_raw_filename; }
-  const int getNumRuns(void) const { return _num_runs; }
-  const int getWarmupRuns(void) const { return _warmup_runs; }
-  const int getRunDelay(void) const { return _run_delay; }
-  std::unordered_map<uint32_t, uint32_t> getOutputSizes(void) const { return _output_sizes; }
-  const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
-  const bool getMemoryPoll(void) const { return _mem_poll; }
-  const bool getWriteReport(void) const { return _write_report; }
-  const bool printVersion(void) const { return _print_version; }
-  TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
-  TensorShapeMap &getShapeMapForRun() { return _shape_run; }
-  /// @brief Return true if "--shape_run" or "--shape_prepare" is provided
-  bool shapeParamProvided();
-  const int getVerboseLevel(void) const { return _verbose_level; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::positional_options_description _positional;
-  po::options_description _options;
-
-  std::string _package_filename;
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-  std::string _dump_filename;
-  std::string _load_filename;
-  WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::NOT_PROVIDED;
-#endif
-  std::string _dump_raw_filename;
-  std::string _load_raw_filename;
-  TensorShapeMap _shape_prepare;
-  TensorShapeMap _shape_run;
-  int _num_runs;
-  int _warmup_runs;
-  int _run_delay;
-  std::unordered_map<uint32_t, uint32_t> _output_sizes;
-  bool _gpumem_poll;
-  bool _mem_poll;
-  bool _write_report;
-  bool _print_version = false;
-  int _verbose_level;
-};
-
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_ARGS_H__
diff --git a/tests/tools/nnpackage_run/src/formatter.h b/tests/tools/nnpackage_run/src/formatter.h
deleted file mode 100644 (file)
index 0dca340..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_FORMATTER_H__
-#define __NNPACKAGE_RUN_FORMATTER_H__
-
-#include <string>
-#include <vector>
-
-#include "types.h"
-#include "allocation.h"
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class Formatter
-{
-public:
-  virtual ~Formatter() = default;
-  Formatter(nnfw_session *sess) : session_(sess) {}
-  virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
-  virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
-  virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
-  {
-    return std::vector<TensorShape>();
-  };
-
-protected:
-  nnfw_session *session_;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_FORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc
deleted file mode 100644 (file)
index c0e670b..0000000
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "h5formatter.h"
-#include "nnfw.h"
-#include "nnfw_util.h"
-
-#include <iostream>
-#include <stdexcept>
-#include <H5Cpp.h>
-
-namespace
-{
-nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
-{
-  std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
-  H5::DataSpace data_space = data_set.getSpace();
-  int rank = data_space.getSimpleExtentNdims();
-  h5_shape.resize(rank);
-
-  // read shape info from H5 file
-  data_space.getSimpleExtentDims(h5_shape.data(), NULL);
-
-  nnpkg_run::TensorShape shape;
-  for (auto dim : h5_shape)
-    shape.emplace_back(static_cast<int>(dim));
-
-  return shape;
-}
-} // namespace
-
-namespace nnpkg_run
-{
-static const char *h5_value_grpname = "value";
-
-std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
-{
-  uint32_t num_inputs;
-  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
-  std::vector<TensorShape> tensor_shapes;
-
-  try
-  {
-    H5::Exception::dontPrint();
-
-    H5::H5File file(filename, H5F_ACC_RDONLY);
-    H5::Group value_group = file.openGroup(h5_value_grpname);
-
-    // Constraints: if there are n data set names, they should be unique and
-    //              one of [ "0", "1", .. , "n-1" ]
-    for (uint32_t i = 0; i < num_inputs; ++i)
-    {
-      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
-      H5::DataType type = data_set.getDataType();
-      auto shape = getShape(data_set);
-
-      tensor_shapes.emplace_back(shape);
-    }
-
-    return tensor_shapes;
-  }
-  catch (const H5::Exception &e)
-  {
-    H5::Exception::printErrorStack();
-    std::exit(-1);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    std::exit(-1);
-  }
-}
-
-void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
-{
-  uint32_t num_inputs;
-  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
-  try
-  {
-    // Turn off the automatic error printing.
-    H5::Exception::dontPrint();
-
-    H5::H5File file(filename, H5F_ACC_RDONLY);
-    H5::Group value_group = file.openGroup(h5_value_grpname);
-    for (uint32_t i = 0; i < num_inputs; ++i)
-    {
-      nnfw_tensorinfo ti;
-      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
-
-      // TODO Add Assert(nnfw shape, h5 file shape size)
-
-      // allocate memory for data
-      auto bufsz = bufsize_for(&ti);
-      inputs[i].alloc(bufsz);
-
-      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
-      H5::DataType type = data_set.getDataType();
-      switch (ti.dtype)
-      {
-        case NNFW_TYPE_TENSOR_FLOAT32:
-          if (type == H5::PredType::IEEE_F32BE || type == H5::PredType::IEEE_F32LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_FLOAT);
-          else
-            throw std::runtime_error("model input type is f32. But h5 data type is different.");
-          break;
-        case NNFW_TYPE_TENSOR_INT32:
-          if (type == H5::PredType::STD_I32BE || type == H5::PredType::STD_I32LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT32);
-          else
-            throw std::runtime_error("model input type is i32. But h5 data type is different.");
-          break;
-        case NNFW_TYPE_TENSOR_INT64:
-          if (type == H5::PredType::STD_I64BE || type == H5::PredType::STD_I64LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT64);
-          else
-            throw std::runtime_error("model input type is i64. But h5 data type is different.");
-          break;
-        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-        case NNFW_TYPE_TENSOR_BOOL:
-        case NNFW_TYPE_TENSOR_UINT8:
-          if (type == H5::PredType::STD_U8BE || type == H5::PredType::STD_U8LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
-          else
-            throw std::runtime_error(
-              "model input type is qasymm8, bool or uint8. But h5 data type is different.");
-          break;
-        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
-          if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
-            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
-          else
-            throw std::runtime_error("model input type is int8. But h5 data type is different.");
-          break;
-        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
-          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
-        default:
-          throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
-      }
-      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
-      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
-    }
-  }
-  catch (const H5::Exception &e)
-  {
-    H5::Exception::printErrorStack();
-    std::exit(-1);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    std::exit(-1);
-  }
-};
-
-void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
-{
-  uint32_t num_outputs;
-  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
-  try
-  {
-    // Turn off the automatic error printing.
-    H5::Exception::dontPrint();
-
-    H5::H5File file(filename, H5F_ACC_TRUNC);
-    H5::Group value_group = file.createGroup(h5_value_grpname);
-    for (uint32_t i = 0; i < num_outputs; i++)
-    {
-      nnfw_tensorinfo ti;
-      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
-      std::vector<hsize_t> dims(ti.rank);
-      for (uint32_t j = 0; j < ti.rank; ++j)
-      {
-        if (ti.dims[j] >= 0)
-          dims[j] = static_cast<hsize_t>(ti.dims[j]);
-        else
-        {
-          std::cerr << "Negative dimension in output tensor" << std::endl;
-          exit(-1);
-        }
-      }
-      H5::DataSpace data_space(ti.rank, dims.data());
-      switch (ti.dtype)
-      {
-        case NNFW_TYPE_TENSOR_FLOAT32:
-        {
-          H5::DataSet data_set =
-            value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_INT32:
-        {
-          H5::DataSet data_set =
-            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_INT64:
-        {
-          H5::DataSet data_set =
-            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_UINT8:
-        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-        {
-          H5::DataSet data_set =
-            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_BOOL:
-        {
-          H5::DataSet data_set =
-            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
-        {
-          H5::DataSet data_set =
-            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
-          break;
-        }
-        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
-          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
-        default:
-          throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
-      }
-    }
-  }
-  catch (const H5::Exception &e)
-  {
-    H5::Exception::printErrorStack();
-    std::exit(-1);
-  }
-  catch (const std::runtime_error &e)
-  {
-    std::cerr << "Error during dumpOutputs on nnpackage_run : " << e.what() << std::endl;
-    std::exit(-1);
-  }
-};
-
-} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/h5formatter.h b/tests/tools/nnpackage_run/src/h5formatter.h
deleted file mode 100644 (file)
index ca2d5a5..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_H5FORMATTER_H__
-#define __NNPACKAGE_RUN_H5FORMATTER_H__
-
-#include "allocation.h"
-#include "formatter.h"
-#include "types.h"
-
-#include <string>
-#include <vector>
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class H5Formatter : public Formatter
-{
-public:
-  H5Formatter(nnfw_session *sess) : Formatter(sess) {}
-  std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
-  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
-  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_H5FORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.cc b/tests/tools/nnpackage_run/src/nnfw_util.cc
deleted file mode 100644 (file)
index da98da5..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cassert>
-#include <string>
-#include "nnfw.h"
-
-namespace nnpkg_run
-{
-uint64_t num_elems(const nnfw_tensorinfo *ti)
-{
-  uint64_t n = 1;
-  for (uint32_t i = 0; i < ti->rank; ++i)
-  {
-    assert(ti->dims[i] >= 0);
-    n *= ti->dims[i];
-  }
-  return n;
-}
-
-uint64_t bufsize_for(const nnfw_tensorinfo *ti)
-{
-  static int elmsize[] = {
-    sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 */
-    sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 */
-    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
-    sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
-    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
-    sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
-    sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
-    sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
-  };
-  return elmsize[ti->dtype] * num_elems(ti);
-}
-
-} // namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.h b/tests/tools/nnpackage_run/src/nnfw_util.h
deleted file mode 100644 (file)
index 6fe547e..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_NNFW_UTIL_H__
-#define __NNPACKAGE_RUN_NNFW_UTIL_H__
-
-#include "nnfw.h"
-
-#define NNPR_ENSURE_STATUS(a)        \
-  do                                 \
-  {                                  \
-    if ((a) != NNFW_STATUS_NO_ERROR) \
-    {                                \
-      exit(-1);                      \
-    }                                \
-  } while (0)
-
-namespace nnpkg_run
-{
-uint64_t num_elems(const nnfw_tensorinfo *ti);
-uint64_t bufsize_for(const nnfw_tensorinfo *ti);
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_UTIL_H__
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc
deleted file mode 100644 (file)
index 7a58053..0000000
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "allocation.h"
-#include "args.h"
-#include "benchmark.h"
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-#include "h5formatter.h"
-#endif
-#include "nnfw.h"
-#include "nnfw_util.h"
-#include "nnfw_internal.h"
-#include "randomgen.h"
-#include "rawformatter.h"
-#ifdef RUY_PROFILER
-#include "ruy/profiler/profiler.h"
-#endif
-
-#include <boost/program_options.hpp>
-#include <cassert>
-#include <chrono>
-#include <cstdlib>
-#include <iostream>
-#include <libgen.h>
-#include <stdexcept>
-#include <unordered_map>
-#include <vector>
-
-static const char *default_backend_cand = "cpu";
-
-void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
-                       std::vector<nnpkg_run::TensorShape> shapes)
-{
-  for (uint32_t i = 0; i < shapes.size(); i++)
-    shape_map[i] = shapes[i];
-}
-
-int main(const int argc, char **argv)
-{
-  using namespace nnpkg_run;
-
-  try
-  {
-    Args args(argc, argv);
-    auto nnpackage_path = args.getPackageFilename();
-    if (args.printVersion())
-    {
-      uint32_t version;
-      NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
-      std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
-                << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
-      exit(0);
-    }
-
-#ifdef RUY_PROFILER
-    ruy::profiler::ScopeProfile ruy_profile;
-#endif
-
-    // TODO Apply verbose level to phases
-    const int verbose = args.getVerboseLevel();
-    benchmark::Phases phases(
-      benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
-
-    nnfw_session *session = nullptr;
-    NNPR_ENSURE_STATUS(nnfw_create_session(&session));
-
-    // ModelLoad
-    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
-    });
-
-    char *available_backends = std::getenv("BACKENDS");
-    if (available_backends)
-      NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
-
-    uint32_t num_inputs;
-    NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
-
-    // verify input and output
-
-    auto verifyInputTypes = [session]() {
-      uint32_t sz;
-      NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
-      for (uint32_t i = 0; i < sz; ++i)
-      {
-        nnfw_tensorinfo ti;
-        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
-
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
-        {
-          std::cerr << "E: not supported input type" << std::endl;
-          exit(-1);
-        }
-      }
-    };
-
-    auto verifyOutputTypes = [session]() {
-      uint32_t sz;
-      NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
-
-      for (uint32_t i = 0; i < sz; ++i)
-      {
-        nnfw_tensorinfo ti;
-        NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
-
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
-        {
-          std::cerr << "E: not supported output type" << std::endl;
-          exit(-1);
-        }
-      }
-    };
-
-    auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
-      for (auto tensor_shape : tensor_shape_map)
-      {
-        auto ind = tensor_shape.first;
-        auto &shape = tensor_shape.second;
-        nnfw_tensorinfo ti;
-        // to fill dtype
-        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
-
-        bool set_input = false;
-        if (ti.rank != shape.size())
-        {
-          set_input = true;
-        }
-        else
-        {
-          for (int i = 0; i < ti.rank; i++)
-          {
-            if (ti.dims[i] != shape.at(i))
-            {
-              set_input = true;
-              break;
-            }
-          }
-        }
-        if (!set_input)
-          continue;
-
-        ti.rank = shape.size();
-        for (int i = 0; i < ti.rank; i++)
-          ti.dims[i] = shape.at(i);
-        NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
-      }
-    };
-
-    verifyInputTypes();
-    verifyOutputTypes();
-
-// set input shape before compilation
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-
-    auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
-      assert(!h5_file.empty());
-      auto shapes = H5Formatter(session).readTensorShapes(h5_file);
-      overwriteShapeMap(shape_map, shapes);
-    };
-
-    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
-      fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
-#endif
-    setTensorInfo(args.getShapeMapForPrepare());
-
-    // prepare execution
-
-    // TODO When nnfw_{prepare|run} are failed, can't catch the time
-    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
-      NNPR_ENSURE_STATUS(nnfw_prepare(session));
-    });
-
-// set input shape after compilation and before execution
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
-        (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
-      fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
-#endif
-    setTensorInfo(args.getShapeMapForRun());
-
-    // prepare input
-    std::vector<Allocation> inputs(num_inputs);
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (!args.getLoadFilename().empty())
-      H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
-    else if (!args.getLoadRawFilename().empty())
-      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
-    else
-      RandomGenerator(session).generate(inputs);
-#else
-    if (!args.getLoadRawFilename().empty())
-      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
-    else
-      RandomGenerator(session).generate(inputs);
-#endif
-
-    // prepare output
-    uint32_t num_outputs = 0;
-    NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
-    std::vector<Allocation> outputs(num_outputs);
-    auto output_sizes = args.getOutputSizes();
-    for (uint32_t i = 0; i < num_outputs; i++)
-    {
-      nnfw_tensorinfo ti;
-      uint64_t output_size_in_bytes = 0;
-      {
-        auto found = output_sizes.find(i);
-        if (found == output_sizes.end())
-        {
-          NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
-          output_size_in_bytes = bufsize_for(&ti);
-        }
-        else
-        {
-          output_size_in_bytes = found->second;
-        }
-      }
-      outputs[i].alloc(output_size_in_bytes);
-      NNPR_ENSURE_STATUS(
-        nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
-      NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
-    }
-
-    // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
-    // only warmup.
-    if (verbose == 0)
-    {
-      phases.run(
-        "WARMUP",
-        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-        args.getWarmupRuns());
-      phases.run(
-        "EXECUTE",
-        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-        args.getNumRuns(), true);
-    }
-    else
-    {
-      phases.run(
-        "WARMUP",
-        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-        [&](const benchmark::Phase &phase, uint32_t nth) {
-          std::cout << "... "
-                    << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                    << std::endl;
-        },
-        args.getWarmupRuns());
-      phases.run(
-        "EXECUTE",
-        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-        [&](const benchmark::Phase &phase, uint32_t nth) {
-          std::cout << "... "
-                    << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                    << std::endl;
-        },
-        args.getNumRuns(), true);
-    }
-
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    // dump output tensors
-    if (!args.getDumpFilename().empty())
-      H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
-#endif
-    if (!args.getDumpRawFilename().empty())
-      RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
-
-    NNPR_ENSURE_STATUS(nnfw_close_session(session));
-
-    // TODO Apply verbose level to result
-
-    // prepare result
-    benchmark::Result result(phases);
-
-    // to stdout
-    benchmark::printResult(result);
-
-    // to csv
-    if (args.getWriteReport() == false)
-      return 0;
-
-    // prepare csv task
-    std::string exec_basename;
-    std::string nnpkg_basename;
-    std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
-    {
-      char buf[PATH_MAX];
-      char *res = realpath(nnpackage_path.c_str(), buf);
-      if (res)
-      {
-        nnpkg_basename = basename(buf);
-      }
-      else
-      {
-        std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
-        exit(-1);
-      }
-      exec_basename = basename(argv[0]);
-    }
-
-    benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
-
-    return 0;
-  }
-  catch (boost::program_options::error &e)
-  {
-    std::cerr << "E: " << e.what() << std::endl;
-    exit(-1);
-  }
-  catch (std::runtime_error &e)
-  {
-    std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
-    exit(-1);
-  }
-}
diff --git a/tests/tools/nnpackage_run/src/randomgen.cc b/tests/tools/nnpackage_run/src/randomgen.cc
deleted file mode 100644 (file)
index 4789b6b..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "randomgen.h"
-#include "nnfw.h"
-#include "nnfw_util.h"
-#include "misc/RandomGenerator.h"
-
-#include <iostream>
-
-namespace nnpkg_run
-{
-
-template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
-{
-  for (uint64_t i = 0; i < size; i++)
-    reinterpret_cast<T *>(data)[i] = randgen.generate<T>();
-}
-
-void RandomGenerator::generate(std::vector<Allocation> &inputs)
-{
-  // generate random data
-  const int seed = 1;
-  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-  for (uint32_t i = 0; i < inputs.size(); ++i)
-  {
-    nnfw_tensorinfo ti;
-    NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
-    auto input_size_in_bytes = bufsize_for(&ti);
-    inputs[i].alloc(input_size_in_bytes);
-    switch (ti.dtype)
-    {
-      case NNFW_TYPE_TENSOR_FLOAT32:
-        randomData<float>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_BOOL:
-        randomData<bool>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_UINT8:
-        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_INT32:
-        randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_INT64:
-        randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
-        randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
-        break;
-      default:
-        std::cerr << "Not supported input type" << std::endl;
-        std::exit(-1);
-    }
-    NNPR_ENSURE_STATUS(
-      nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
-    NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
-  }
-};
-
-} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/randomgen.h b/tests/tools/nnpackage_run/src/randomgen.h
deleted file mode 100644 (file)
index 898df34..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_RANDOMGEN_H__
-#define __NNPACKAGE_RUN_RANDOMGEN_H__
-
-#include <string>
-#include <vector>
-
-#include "allocation.h"
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class RandomGenerator
-{
-public:
-  RandomGenerator(nnfw_session *sess) : session_(sess) {}
-  void generate(std::vector<Allocation> &inputs);
-
-private:
-  nnfw_session *session_;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_RANDOMGEN_H__
diff --git a/tests/tools/nnpackage_run/src/rawformatter.cc b/tests/tools/nnpackage_run/src/rawformatter.cc
deleted file mode 100644 (file)
index e4b9774..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "rawformatter.h"
-#include "nnfw.h"
-#include "nnfw_util.h"
-
-#include <iostream>
-#include <fstream>
-#include <stdexcept>
-
-namespace nnpkg_run
-{
-void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
-{
-  uint32_t num_inputs;
-  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
-
-  // Support multiple inputs
-  // Option 1: Get comman-separated input file list like --load:raw a,b,c
-  // Option 2: Get prefix --load:raw in
-  //           Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
-  //           query api.
-  //
-  // Currently Option 2 is implemented.
-  try
-  {
-    for (uint32_t i = 0; i < num_inputs; ++i)
-    {
-      nnfw_tensorinfo ti;
-      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
-
-      // allocate memory for data
-      auto bufsz = bufsize_for(&ti);
-      inputs[i].alloc(bufsz);
-
-      std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
-      auto filesz = file.tellg();
-      if (bufsz != filesz)
-      {
-        throw std::runtime_error("Input " + std::to_string(i) +
-                                 " size does not match: " + std::to_string(bufsz) +
-                                 " expected, but " + std::to_string(filesz) + " provided.");
-      }
-      file.seekg(0, std::ios::beg);
-      file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
-      file.close();
-
-      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
-      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
-    }
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    std::exit(-1);
-  }
-};
-
-void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
-{
-  uint32_t num_outputs;
-  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
-  try
-  {
-    for (uint32_t i = 0; i < num_outputs; i++)
-    {
-      nnfw_tensorinfo ti;
-      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
-      auto bufsz = bufsize_for(&ti);
-
-      std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
-      file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
-      file.close();
-      std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
-    }
-  }
-  catch (const std::runtime_error &e)
-  {
-    std::cerr << "Error during dumpOutputs on nnpackage_run : " << e.what() << std::endl;
-    std::exit(-1);
-  }
-}
-} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/rawformatter.h b/tests/tools/nnpackage_run/src/rawformatter.h
deleted file mode 100644 (file)
index 8bfc354..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_RAWFORMATTER_H__
-#define __NNPACKAGE_RUN_RAWFORMATTER_H__
-
-#include "allocation.h"
-#include "formatter.h"
-#include "types.h"
-
-#include <string>
-#include <vector>
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class RawFormatter : public Formatter
-{
-public:
-  RawFormatter(nnfw_session *sess) : Formatter(sess) {}
-  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
-  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_RAWFORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/types.h b/tests/tools/nnpackage_run/src/types.h
deleted file mode 100644 (file)
index 93a7ab2..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_TYPES_H__
-#define __NNPACKAGE_RUN_TYPES_H__
-
-namespace nnpkg_run
-{
-
-using TensorShape = std::vector<int>;
-
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_TYPES_H__
diff --git a/tests/tools/onert_run/CMakeLists.txt b/tests/tools/onert_run/CMakeLists.txt
new file mode 100644 (file)
index 0000000..1d536dd
--- /dev/null
@@ -0,0 +1,45 @@
+if(NOT BUILD_ONERT_RUN)
+  return()
+endif(NOT BUILD_ONERT_RUN)
+
+if(NOT BUILD_ONERT)
+  return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND ONERT_RUN_SRCS "src/onert_run.cc")
+list(APPEND ONERT_RUN_SRCS "src/args.cc")
+list(APPEND ONERT_RUN_SRCS "src/nnfw_util.cc")
+list(APPEND ONERT_RUN_SRCS "src/randomgen.cc")
+list(APPEND ONERT_RUN_SRCS "src/rawformatter.cc")
+
+nnfw_find_package(Boost REQUIRED program_options)
+nnfw_find_package(Ruy QUIET)
+nnfw_find_package(HDF5 QUIET)
+
+if (HDF5_FOUND)
+  list(APPEND ONERT_RUN_SRCS "src/h5formatter.cc")
+endif()
+
+add_executable(onert_run ${ONERT_RUN_SRCS})
+
+if (HDF5_FOUND)
+  target_compile_definitions(onert_run PRIVATE ONERT_HAVE_HDF5=1)
+  target_include_directories(onert_run PRIVATE ${HDF5_INCLUDE_DIRS})
+  target_link_libraries(onert_run ${HDF5_CXX_LIBRARIES})
+else()
+  message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in onert_run.")
+endif(HDF5_FOUND)
+
+target_include_directories(onert_run PRIVATE src)
+target_include_directories(onert_run PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(onert_run nnfw_lib_tflite jsoncpp)
+target_link_libraries(onert_run nnfw-dev)
+target_link_libraries(onert_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(onert_run nnfw_lib_benchmark)
+if(Ruy_FOUND AND PROFILE_RUY)
+  target_link_libraries(onert_run ruy_instrumentation)
+  target_link_libraries(onert_run ruy_profiler)
+endif(Ruy_FOUND AND PROFILE_RUY)
+
+install(TARGETS onert_run DESTINATION bin)
diff --git a/tests/tools/onert_run/README.md b/tests/tools/onert_run/README.md
new file mode 100644 (file)
index 0000000..9dc918e
--- /dev/null
@@ -0,0 +1,22 @@
+# onert_run
+
+`onert_run` is a tool to run `nnpackage`.
+
+It takes `nnpackage` as input. It uses **runtime API** internally.
+
+## Usage
+
+### Simple run
+
+This will run with random input data
+
+```
+$ ./onert_run path_to_nnpackage_directory
+```
+
+Output would look like:
+
+```
+nnfw_prepare takes 425.235 ms
+nnfw_run     takes 2.525 ms
+```
diff --git a/tests/tools/onert_run/src/allocation.h b/tests/tools/onert_run/src/allocation.h
new file mode 100644 (file)
index 0000000..798bf9d
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_ALLOCATION_H__
+#define __ONERT_RUN_ALLOCATION_H__
+
+#include <cstdlib>
+#include <cstdint>
+
+namespace onert_run
+{
+class Allocation
+{
+public:
+  Allocation() : data_(nullptr) {}
+  ~Allocation() { free(data_); }
+  void *data() const { return data_; }
+  void *alloc(uint64_t sz) { return data_ = malloc(sz); }
+
+private:
+  void *data_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_ALLOCATION_H__
diff --git a/tests/tools/onert_run/src/args.cc b/tests/tools/onert_run/src/args.cc
new file mode 100644 (file)
index 0000000..1e9d1aa
--- /dev/null
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <functional>
+#include <iostream>
+#include <sys/stat.h>
+#include <json/json.h>
+
+namespace
+{
+
+// This function parses a json object and returns as a vector of integers
+// For example,
+// [0, [1, 2, 3, 4], 3, 40, 4, []] in JSON
+// is converted to:
+// {
+//  0 -> [1, 2, 3, 4]
+//  3 -> 40
+//  4 -> []
+// } in std::unordered_map. Note that the value type is still Json::Value.
+std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonval)
+{
+  if (!jsonval.isArray() || (jsonval.size() % 2 != 0))
+  {
+    std::cerr << "JSON argument must be an even-sized array in JSON\n";
+    exit(1);
+  }
+
+  std::unordered_map<uint32_t, Json::Value> ret;
+  for (uint32_t i = 0; i < jsonval.size(); i += 2)
+  {
+    if (!jsonval[i].isUInt())
+    {
+      std::cerr << "Key values(values in even indices) must be unsigned integers\n";
+      exit(1);
+    }
+    uint32_t key = jsonval[i].asUInt();
+    Json::Value val = jsonval[i + 1];
+    ret[key] = jsonval[i + 1];
+  }
+  return ret;
+}
+
+// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
+void handleShapeJsonParam(onert_run::TensorShapeMap &shape_map, const std::string &shape_str)
+{
+  Json::Value root;
+  Json::Reader reader;
+  if (!reader.parse(shape_str, root, false))
+  {
+    std::cerr << "Invalid JSON format for output_sizes \"" << shape_str << "\"\n";
+    exit(1);
+  }
+
+  auto arg_map = argArrayToMap(root);
+  for (auto &pair : arg_map)
+  {
+    uint32_t key = pair.first;
+    Json::Value &shape_json = pair.second;
+    if (!shape_json.isArray())
+    {
+      std::cerr << "All the values must be list: " << shape_str << "\n";
+      exit(1);
+    }
+
+    std::vector<int> shape;
+    for (auto &dim_json : shape_json)
+    {
+      if (!dim_json.isUInt())
+      {
+        std::cerr << "All the dims should be dim >= 0: " << shape_str << "\n";
+        exit(1);
+      }
+
+      shape.emplace_back(dim_json.asUInt64());
+    }
+
+    shape_map[key] = shape;
+  }
+}
+
+void checkModelfile(const std::string &model_filename)
+{
+  if (model_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify model file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(model_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "Model file not found: " << model_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+void checkPackage(const std::string &package_filename)
+{
+  if (package_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(package_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "nnpackage not found: " << package_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+} // namespace
+
+namespace onert_run
+{
+
+Args::Args(const int argc, char **argv)
+{
+  Initialize();
+  Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+  auto process_nnpackage = [&](const std::string &package_filename) {
+    _package_filename = package_filename;
+
+    std::cerr << "Package Filename " << _package_filename << std::endl;
+    checkPackage(package_filename);
+  };
+
+  auto process_modelfile = [&](const std::string &model_filename) {
+    _model_filename = model_filename;
+
+    std::cerr << "Model Filename " << _model_filename << std::endl;
+    checkModelfile(model_filename);
+
+    _use_single_model = true;
+  };
+
+  auto process_path = [&](const std::string &path) {
+    struct stat sb;
+    if (stat(path.c_str(), &sb) == 0)
+    {
+      if (sb.st_mode & S_IFDIR)
+      {
+        _package_filename = path;
+        checkPackage(path);
+        std::cerr << "Package Filename " << path << std::endl;
+      }
+      else
+      {
+        _model_filename = path;
+        checkModelfile(path);
+        std::cerr << "Model Filename " << path << std::endl;
+        _use_single_model = true;
+      }
+    }
+    else
+    {
+      std::cerr << "Cannot find: " << path << "\n";
+      exit(1);
+    }
+  };
+
+  auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
+    Json::Value root;
+    Json::Reader reader;
+    if (!reader.parse(output_sizes_json_str, root, false))
+    {
+      std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
+      exit(1);
+    }
+
+    auto arg_map = argArrayToMap(root);
+    for (auto &pair : arg_map)
+    {
+      uint32_t key = pair.first;
+      Json::Value &val_json = pair.second;
+      if (!val_json.isUInt())
+      {
+        std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
+        exit(1);
+      }
+      uint32_t val = val_json.asUInt();
+      _output_sizes[key] = val;
+    }
+  };
+
+  auto process_shape_prepare = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (shape_str == "H5" || shape_str == "h5")
+    {
+      _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
+      return;
+    }
+#endif
+    try
+    {
+      handleShapeJsonParam(_shape_prepare, shape_str);
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
+      exit(1);
+    }
+  };
+
+  auto process_shape_run = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (shape_str == "H5" || shape_str == "h5")
+    {
+      _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
+      return;
+    }
+#endif
+    try
+    {
+      handleShapeJsonParam(_shape_run, shape_str);
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
+      exit(1);
+    }
+  };
+
+  // General options
+  po::options_description general("General options", 100);
+
+  // clang-format off
+  general.add_options()
+    ("help,h", "Print available options")
+    ("version", "Print version and exit immediately")
+    ("nnpackage", po::value<std::string>()->notifier(process_nnpackage), "NN Package file(directory) name")
+    ("modelfile", po::value<std::string>()->notifier(process_modelfile), "NN Model filename")
+    ("path", po::value<std::string>()->notifier(process_path), "NN Package or NN Modelfile path")
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
+    ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
+#endif
+    ("dump:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_raw_filename = v; }), "Raw Output filename")
+    ("load:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_raw_filename = v; }), "Raw Input filename")
+    ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
+        "The output buffer size in JSON 1D array\n"
+        "If not given, the model's output sizes are used\n"
+        "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
+    ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
+    ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
+    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(us) between runs (as default no delay")
+    ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
+    ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
+    ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
+         "Write report\n"
+         "{exec}-{nnpkg|modelfile}-{backend}.csv will be generated.\n"
+         "e.g. onert_run-UNIT_Add_000-acl_cl.csv.\n"
+         "{nnpkg|modelfile} name may be changed to realpath if you use symbolic-link.")
+    ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
+         "Please refer to the description of 'shape_run'")
+    ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
+         "'--shape_prepare: set shape of tensors before compilation (before calling nnfw_prepare()).\n"
+         "'--shape_run: set shape of tensors before running (before calling nnfw_run()).\n"
+         "Allowed value:.\n"
+         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [] (scalar).\n"
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+         "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+         "if '--load' option is provided but '--shape_prepare' or '--shape_run' is not provided,\n"
+         "'--shape_run h5' will be used by default.\n"
+#endif
+         "For detailed description, please consutl the description of nnfw_set_input_tensorinfo()\n"
+         )
+    ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
+         "Verbose level\n"
+         "0: prints the only result. Messages btw run don't print\n"
+         "1: prints result and message btw run\n"
+         "2: prints all of messages to print\n")
+    ;
+  // clang-format on
+
+  _options.add(general);
+  _positional.add("path", -1);
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+            vm);
+
+  if (vm.count("help"))
+  {
+    std::cout << "onert_run\n\n";
+    std::cout << "Usage: " << argv[0] << " path to nnpackage root directory [<options>]\n\n";
+    std::cout << _options;
+    std::cout << "\n";
+
+    exit(0);
+  }
+
+  if (vm.count("version"))
+  {
+    _print_version = true;
+    return;
+  }
+
+  {
+    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
+      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
+      {
+        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
+                                            "' cannot be given at once.");
+      }
+    };
+
+    // calling, e.g., "onert_run .. -- shape_prepare .. --shape_run .." should theoretically
+    // work but allowing both options together on command line makes the usage and implemenation
+    // of onert_run too complicated. Therefore let's not allow those option together.
+    conflicting_options("shape_prepare", "shape_run");
+
+    // Cannot use both single model file and nnpackage at once
+    conflicting_options("modelfile", "nnpackage");
+
+    // Require modelfile, nnpackage, or path
+    if (!vm.count("modelfile") && !vm.count("nnpackage") && !vm.count("path"))
+      throw boost::program_options::error(
+        std::string("Require one of options modelfile, nnpackage, or path."));
+  }
+
+  try
+  {
+    po::notify(vm);
+  }
+  catch (const std::bad_cast &e)
+  {
+    std::cerr << "Bad cast error - " << e.what() << '\n';
+    exit(1);
+  }
+
+  // This must be run after `notify` as `_warm_up_runs` must have been processed before.
+  if (vm.count("mem_poll"))
+  {
+    // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
+    if (_mem_poll && _warmup_runs == 0)
+    {
+      _warmup_runs = 1;
+    }
+  }
+}
+
+bool Args::shapeParamProvided()
+{
+  bool provided = false;
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+  // "--shape_run h5" or "--shape_prepare h5" was provided
+  provided = (getWhenToUseH5Shape() != WhenToUseH5Shape::NOT_PROVIDED);
+#endif
+  // specific shape was provided
+  // e.g., "--shape_run '[0, [10, 1]]'" or "--shape_prepare '[0, [10, 1]]'"
+  provided |= (!getShapeMapForPrepare().empty()) || (!getShapeMapForRun().empty());
+
+  return provided;
+}
+
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/args.h b/tests/tools/onert_run/src/args.h
new file mode 100644 (file)
index 0000000..e35a761
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_ARGS_H__
+#define __ONERT_RUN_ARGS_H__
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <boost/program_options.hpp>
+
+#include "types.h"
+
+namespace po = boost::program_options;
+
+namespace onert_run
+{
+
+using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+enum class WhenToUseH5Shape
+{
+  NOT_PROVIDED, // Param not provided
+  PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+  RUN,     // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+};
+#endif
+
+class Args
+{
+public:
+  Args(const int argc, char **argv);
+  void print(void);
+
+  const std::string &getPackageFilename(void) const { return _package_filename; }
+  const std::string &getModelFilename(void) const { return _model_filename; }
+  const bool useSingleModel(void) const { return _use_single_model; }
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+  const std::string &getDumpFilename(void) const { return _dump_filename; }
+  const std::string &getLoadFilename(void) const { return _load_filename; }
+  WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
+#endif
+  const std::string &getDumpRawFilename(void) const { return _dump_raw_filename; }
+  const std::string &getLoadRawFilename(void) const { return _load_raw_filename; }
+  const int getNumRuns(void) const { return _num_runs; }
+  const int getWarmupRuns(void) const { return _warmup_runs; }
+  const int getRunDelay(void) const { return _run_delay; }
+  std::unordered_map<uint32_t, uint32_t> getOutputSizes(void) const { return _output_sizes; }
+  const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
+  const bool getMemoryPoll(void) const { return _mem_poll; }
+  const bool getWriteReport(void) const { return _write_report; }
+  const bool printVersion(void) const { return _print_version; }
+  TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
+  TensorShapeMap &getShapeMapForRun() { return _shape_run; }
+  /// @brief Return true if "--shape_run" or "--shape_prepare" is provided
+  bool shapeParamProvided();
+  const int getVerboseLevel(void) const { return _verbose_level; }
+
+private:
+  void Initialize();
+  void Parse(const int argc, char **argv);
+
+private:
+  po::positional_options_description _positional;
+  po::options_description _options;
+
+  std::string _package_filename;
+  std::string _model_filename;
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+  std::string _dump_filename;
+  std::string _load_filename;
+  WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::NOT_PROVIDED;
+#endif
+  std::string _dump_raw_filename;
+  std::string _load_raw_filename;
+  TensorShapeMap _shape_prepare;
+  TensorShapeMap _shape_run;
+  int _num_runs;
+  int _warmup_runs;
+  int _run_delay;
+  std::unordered_map<uint32_t, uint32_t> _output_sizes;
+  bool _gpumem_poll;
+  bool _mem_poll;
+  bool _write_report;
+  bool _print_version = false;
+  int _verbose_level;
+  bool _use_single_model = false;
+};
+
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_ARGS_H__
diff --git a/tests/tools/onert_run/src/formatter.h b/tests/tools/onert_run/src/formatter.h
new file mode 100644 (file)
index 0000000..5b73d23
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_FORMATTER_H__
+#define __ONERT_RUN_FORMATTER_H__
+
+#include <string>
+#include <vector>
+
+#include "types.h"
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class Formatter
+{
+public:
+  virtual ~Formatter() = default;
+  Formatter(nnfw_session *sess) : session_(sess) {}
+  virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
+  virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
+  virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
+  {
+    return std::vector<TensorShape>();
+  };
+
+protected:
+  nnfw_session *session_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_FORMATTER_H__
diff --git a/tests/tools/onert_run/src/h5formatter.cc b/tests/tools/onert_run/src/h5formatter.cc
new file mode 100644 (file)
index 0000000..5ea6e4c
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "h5formatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <stdexcept>
+#include <H5Cpp.h>
+
+namespace
+{
+onert_run::TensorShape getShape(H5::DataSet &data_set)
+{
+  std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
+  H5::DataSpace data_space = data_set.getSpace();
+  int rank = data_space.getSimpleExtentNdims();
+  h5_shape.resize(rank);
+
+  // read shape info from H5 file
+  data_space.getSimpleExtentDims(h5_shape.data(), NULL);
+
+  onert_run::TensorShape shape;
+  for (auto dim : h5_shape)
+    shape.emplace_back(static_cast<int>(dim));
+
+  return shape;
+}
+} // namespace
+
+namespace onert_run
+{
+static const char *h5_value_grpname = "value";
+
+std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  std::vector<TensorShape> tensor_shapes;
+
+  try
+  {
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+
+    // Constraints: if there are n data set names, they should be unique and
+    //              one of [ "0", "1", .. , "n-1" ]
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      auto shape = getShape(data_set);
+
+      tensor_shapes.emplace_back(shape);
+    }
+
+    return tensor_shapes;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+
+void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  try
+  {
+    // Turn off the automatic error printing.
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // TODO Add Assert(nnfw shape, h5 file shape size)
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      switch (ti.dtype)
+      {
+        case NNFW_TYPE_TENSOR_FLOAT32:
+          if (type == H5::PredType::IEEE_F32BE || type == H5::PredType::IEEE_F32LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_FLOAT);
+          else
+            throw std::runtime_error("model input type is f32. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_INT32:
+          if (type == H5::PredType::STD_I32BE || type == H5::PredType::STD_I32LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT32);
+          else
+            throw std::runtime_error("model input type is i32. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_INT64:
+          if (type == H5::PredType::STD_I64BE || type == H5::PredType::STD_I64LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT64);
+          else
+            throw std::runtime_error("model input type is i64. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        case NNFW_TYPE_TENSOR_BOOL:
+        case NNFW_TYPE_TENSOR_UINT8:
+          if (type == H5::PredType::STD_U8BE || type == H5::PredType::STD_U8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
+          else
+            throw std::runtime_error(
+              "model input type is qasymm8, bool or uint8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
+          else
+            throw std::runtime_error("model input type is int8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+        default:
+          throw std::runtime_error("onert_run can load f32, i32, qasymm8, bool and uint8.");
+      }
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    // Turn off the automatic error printing.
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_TRUNC);
+    H5::Group value_group = file.createGroup(h5_value_grpname);
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      std::vector<hsize_t> dims(ti.rank);
+      for (uint32_t j = 0; j < ti.rank; ++j)
+      {
+        if (ti.dims[j] >= 0)
+          dims[j] = static_cast<hsize_t>(ti.dims[j]);
+        else
+        {
+          std::cerr << "Negative dimension in output tensor" << std::endl;
+          exit(-1);
+        }
+      }
+      H5::DataSpace data_space(ti.rank, dims.data());
+      switch (ti.dtype)
+      {
+        case NNFW_TYPE_TENSOR_FLOAT32:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_INT32:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_INT64:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_UINT8:
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_BOOL:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+        default:
+          throw std::runtime_error("onert_run can dump f32, i32, qasymm8, bool and uint8.");
+      }
+    }
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/h5formatter.h b/tests/tools/onert_run/src/h5formatter.h
new file mode 100644 (file)
index 0000000..7ebb33f
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_H5FORMATTER_H__
+#define __ONERT_RUN_H5FORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class H5Formatter : public Formatter
+{
+public:
+  H5Formatter(nnfw_session *sess) : Formatter(sess) {}
+  std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_H5FORMATTER_H__
diff --git a/tests/tools/onert_run/src/nnfw_util.cc b/tests/tools/onert_run/src/nnfw_util.cc
new file mode 100644 (file)
index 0000000..0a21395
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+#include <string>
+#include "nnfw.h"
+
+namespace onert_run
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+  uint64_t n = 1;
+  for (uint32_t i = 0; i < ti->rank; ++i)
+  {
+    assert(ti->dims[i] >= 0);
+    n *= ti->dims[i];
+  }
+  return n;
+}
+
+uint64_t bufsize_for(const nnfw_tensorinfo *ti)
+{
+  static int elmsize[] = {
+    sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 */
+    sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
+    sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
+    sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
+    sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+    sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
+  };
+  return elmsize[ti->dtype] * num_elems(ti);
+}
+
+} // namespace onert_run
diff --git a/tests/tools/onert_run/src/nnfw_util.h b/tests/tools/onert_run/src/nnfw_util.h
new file mode 100644 (file)
index 0000000..1fcdfdf
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_NNFW_UTIL_H__
+#define __ONERT_RUN_NNFW_UTIL_H__
+
+#include "nnfw.h"
+
+#define NNPR_ENSURE_STATUS(a)        \
+  do                                 \
+  {                                  \
+    if ((a) != NNFW_STATUS_NO_ERROR) \
+    {                                \
+      exit(-1);                      \
+    }                                \
+  } while (0)
+
+namespace onert_run
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti);
+uint64_t bufsize_for(const nnfw_tensorinfo *ti);
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_NNFW_UTIL_H__
diff --git a/tests/tools/onert_run/src/onert_run.cc b/tests/tools/onert_run/src/onert_run.cc
new file mode 100644 (file)
index 0000000..5acb2bb
--- /dev/null
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "allocation.h"
+#include "args.h"
+#include "benchmark.h"
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+#include "h5formatter.h"
+#endif
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "nnfw_internal.h"
+#include "randomgen.h"
+#include "rawformatter.h"
+#ifdef RUY_PROFILER
+#include "ruy/profiler/profiler.h"
+#endif
+
+#include <boost/program_options.hpp>
+#include <cassert>
+#include <chrono>
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <stdexcept>
+#include <unordered_map>
+#include <vector>
+
+static const char *default_backend_cand = "cpu";
+
+void overwriteShapeMap(onert_run::TensorShapeMap &shape_map,
+                       std::vector<onert_run::TensorShape> shapes)
+{
+  for (uint32_t i = 0; i < shapes.size(); i++)
+    shape_map[i] = shapes[i];
+}
+
+int main(const int argc, char **argv)
+{
+  using namespace onert_run;
+
+  try
+  {
+    Args args(argc, argv);
+    if (args.printVersion())
+    {
+      uint32_t version;
+      NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
+      std::cout << "onert_run (nnfw runtime: v" << (version >> 24) << "."
+                << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
+      exit(0);
+    }
+
+#ifdef RUY_PROFILER
+    ruy::profiler::ScopeProfile ruy_profile;
+#endif
+
+    // TODO Apply verbose level to phases
+    const int verbose = args.getVerboseLevel();
+    benchmark::Phases phases(
+      benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+
+    nnfw_session *session = nullptr;
+    NNPR_ENSURE_STATUS(nnfw_create_session(&session));
+
+    // ModelLoad
+    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
+      if (args.useSingleModel())
+        NNPR_ENSURE_STATUS(
+          nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
+      else
+        NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
+    });
+
+    char *available_backends = std::getenv("BACKENDS");
+    if (available_backends)
+      NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
+
+    uint32_t num_inputs;
+    NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
+
+    // verify input and output
+
+    auto verifyInputTypes = [session]() {
+      uint32_t sz;
+      NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
+      for (uint32_t i = 0; i < sz; ++i)
+      {
+        nnfw_tensorinfo ti;
+        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+        {
+          std::cerr << "E: not supported input type" << std::endl;
+          exit(-1);
+        }
+      }
+    };
+
+    auto verifyOutputTypes = [session]() {
+      uint32_t sz;
+      NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
+
+      for (uint32_t i = 0; i < sz; ++i)
+      {
+        nnfw_tensorinfo ti;
+        NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+        {
+          std::cerr << "E: not supported output type" << std::endl;
+          exit(-1);
+        }
+      }
+    };
+
+    auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
+      for (auto tensor_shape : tensor_shape_map)
+      {
+        auto ind = tensor_shape.first;
+        auto &shape = tensor_shape.second;
+        nnfw_tensorinfo ti;
+        // to fill dtype
+        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
+
+        bool set_input = false;
+        if (ti.rank != shape.size())
+        {
+          set_input = true;
+        }
+        else
+        {
+          for (int i = 0; i < ti.rank; i++)
+          {
+            if (ti.dims[i] != shape.at(i))
+            {
+              set_input = true;
+              break;
+            }
+          }
+        }
+        if (!set_input)
+          continue;
+
+        ti.rank = shape.size();
+        for (int i = 0; i < ti.rank; i++)
+          ti.dims[i] = shape.at(i);
+        NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
+      }
+    };
+
+    verifyInputTypes();
+    verifyOutputTypes();
+
+// set input shape before compilation
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+
+    auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
+      assert(!h5_file.empty());
+      auto shapes = H5Formatter(session).readTensorShapes(h5_file);
+      overwriteShapeMap(shape_map, shapes);
+    };
+
+    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
+      fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
+#endif
+    setTensorInfo(args.getShapeMapForPrepare());
+
+    // prepare execution
+
+    // TODO When nnfw_{prepare|run} are failed, can't catch the time
+    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
+      NNPR_ENSURE_STATUS(nnfw_prepare(session));
+    });
+
+// set input shape after compilation and before execution
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
+        (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
+      fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
+#endif
+    setTensorInfo(args.getShapeMapForRun());
+
+    // prepare input
+    std::vector<Allocation> inputs(num_inputs);
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (!args.getLoadFilename().empty())
+      H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
+    else if (!args.getLoadRawFilename().empty())
+      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+    else
+      RandomGenerator(session).generate(inputs);
+#else
+    if (!args.getLoadRawFilename().empty())
+      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+    else
+      RandomGenerator(session).generate(inputs);
+#endif
+
+    // prepare output
+    uint32_t num_outputs = 0;
+    NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
+    std::vector<Allocation> outputs(num_outputs);
+    auto output_sizes = args.getOutputSizes();
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      uint64_t output_size_in_bytes = 0;
+      {
+        auto found = output_sizes.find(i);
+        if (found == output_sizes.end())
+        {
+          NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+          output_size_in_bytes = bufsize_for(&ti);
+        }
+        else
+        {
+          output_size_in_bytes = found->second;
+        }
+      }
+      outputs[i].alloc(output_size_in_bytes);
+      NNPR_ENSURE_STATUS(
+        nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
+      NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+
+    // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
+    // only warmup.
+    if (verbose == 0)
+    {
+      phases.run(
+        "WARMUP",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        args.getWarmupRuns());
+      phases.run(
+        "EXECUTE",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        args.getNumRuns(), true);
+    }
+    else
+    {
+      phases.run(
+        "WARMUP",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        [&](const benchmark::Phase &phase, uint32_t nth) {
+          std::cout << "... "
+                    << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+                    << std::endl;
+        },
+        args.getWarmupRuns());
+      phases.run(
+        "EXECUTE",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        [&](const benchmark::Phase &phase, uint32_t nth) {
+          std::cout << "... "
+                    << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+                    << std::endl;
+        },
+        args.getNumRuns(), true);
+    }
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    // dump output tensors
+    if (!args.getDumpFilename().empty())
+      H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
+#endif
+    if (!args.getDumpRawFilename().empty())
+      RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
+
+    NNPR_ENSURE_STATUS(nnfw_close_session(session));
+
+    // TODO Apply verbose level to result
+
+    // prepare result
+    benchmark::Result result(phases);
+
+    // to stdout
+    benchmark::printResult(result);
+
+    // to csv
+    if (args.getWriteReport() == false)
+      return 0;
+
+    // prepare csv task
+    std::string exec_basename;
+    std::string nnpkg_basename;
+    std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
+    {
+      char buf[PATH_MAX];
+      char *res = args.useSingleModel() ? realpath(args.getModelFilename().c_str(), buf)
+                                        : realpath(args.getPackageFilename().c_str(), buf);
+      if (res)
+      {
+        nnpkg_basename = basename(buf);
+      }
+      else
+      {
+        std::cerr << "E: during getting realpath from nnpackage or model path." << std::endl;
+        exit(-1);
+      }
+      exec_basename = basename(argv[0]);
+    }
+
+    benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
+
+    return 0;
+  }
+  catch (boost::program_options::error &e)
+  {
+    std::cerr << "E: " << e.what() << std::endl;
+    exit(-1);
+  }
+  catch (std::runtime_error &e)
+  {
+    std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
+    exit(-1);
+  }
+}
diff --git a/tests/tools/onert_run/src/randomgen.cc b/tests/tools/onert_run/src/randomgen.cc
new file mode 100644 (file)
index 0000000..1a8a504
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "randomgen.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "misc/RandomGenerator.h"
+
+#include <iostream>
+
+namespace onert_run
+{
+
+template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
+{
+  for (uint64_t i = 0; i < size; i++)
+    reinterpret_cast<T *>(data)[i] = randgen.generate<T>();
+}
+
+void RandomGenerator::generate(std::vector<Allocation> &inputs)
+{
+  // generate random data
+  const int seed = 1;
+  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+  for (uint32_t i = 0; i < inputs.size(); ++i)
+  {
+    nnfw_tensorinfo ti;
+    NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+    auto input_size_in_bytes = bufsize_for(&ti);
+    inputs[i].alloc(input_size_in_bytes);
+    switch (ti.dtype)
+    {
+      case NNFW_TYPE_TENSOR_FLOAT32:
+        randomData<float>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_BOOL:
+        randomData<bool>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_UINT8:
+        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_INT32:
+        randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_INT64:
+        randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+        randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      default:
+        std::cerr << "Not supported input type" << std::endl;
+        std::exit(-1);
+    }
+    NNPR_ENSURE_STATUS(
+      nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
+    NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+  }
+};
+
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/randomgen.h b/tests/tools/onert_run/src/randomgen.h
new file mode 100644 (file)
index 0000000..58afb41
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_RANDOMGEN_H__
+#define __ONERT_RUN_RANDOMGEN_H__
+
+#include <string>
+#include <vector>
+
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class RandomGenerator
+{
+public:
+  RandomGenerator(nnfw_session *sess) : session_(sess) {}
+  void generate(std::vector<Allocation> &inputs);
+
+private:
+  nnfw_session *session_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_RANDOMGEN_H__
diff --git a/tests/tools/onert_run/src/rawformatter.cc b/tests/tools/onert_run/src/rawformatter.cc
new file mode 100644 (file)
index 0000000..7cfab99
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawformatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+
+namespace onert_run
+{
+void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+
+  // Support multiple inputs
+  // Option 1: Get comman-separated input file list like --load:raw a,b,c
+  // Option 2: Get prefix --load:raw in
+  //           Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
+  //           query api.
+  //
+  // Currently Option 2 is implemented.
+  try
+  {
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
+      auto filesz = file.tellg();
+      if (bufsz != filesz)
+      {
+        throw std::runtime_error("Input " + std::to_string(i) +
+                                 " size does not match: " + std::to_string(bufsz) +
+                                 " expected, but " + std::to_string(filesz) + " provided.");
+      }
+      file.seekg(0, std::ios::beg);
+      file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
+      file.close();
+
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      auto bufsz = bufsize_for(&ti);
+
+      std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
+      file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
+      file.close();
+      std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
+    }
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/rawformatter.h b/tests/tools/onert_run/src/rawformatter.h
new file mode 100644 (file)
index 0000000..b6eaab6
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_RAWFORMATTER_H__
+#define __ONERT_RUN_RAWFORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class RawFormatter : public Formatter
+{
+public:
+  RawFormatter(nnfw_session *sess) : Formatter(sess) {}
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_RAWFORMATTER_H__
diff --git a/tests/tools/onert_run/src/types.h b/tests/tools/onert_run/src/types.h
new file mode 100644 (file)
index 0000000..563c5e4
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_TYPES_H__
+#define __ONERT_RUN_TYPES_H__
+
+namespace onert_run
+{
+
+using TensorShape = std::vector<int>;
+
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_TYPES_H__
diff --git a/tests/tools/tflite_benchmark_model/CMakeLists.txt b/tests/tools/tflite_benchmark_model/CMakeLists.txt
deleted file mode 100644 (file)
index 017e1da..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-if (NOT BUILD_TFLITE_BENCHMARK_MODEL)
-  return()
-endif(NOT BUILD_TFLITE_BENCHMARK_MODEL)
-
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 REQUIRED)
-
-# TODO Remove this target_compile_definitions command, and just check its presence.
-#      This change is prerequisites on pre-built tensorflow-lite package support
-target_compile_definitions(tensorflow-lite PUBLIC "TFLITE_PROFILING_ENABLED")
-
-file(GLOB_RECURSE SOURCES "*.cc")
-
-nnas_find_package(TensorFlowSource EXACT 1.13.1 REQUIRED)
-set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/lite")
-list(APPEND SOURCES "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_main.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_model.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_params.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/command_line_flags.cc")
-
-add_executable(tflite_benchmark_model ${SOURCES})
-target_compile_definitions(tflite_benchmark_model PUBLIC "TFLITE_PROFILING_ENABLED")
-target_link_libraries(tflite_benchmark_model nnfw_lib_misc nnfw_lib_tflite nnfw_lib_profiling)
-target_link_libraries(tflite_benchmark_model tensorflow-lite ${LIB_PTHREAD} dl)
-install(TARGETS tflite_benchmark_model DESTINATION bin)
diff --git a/tests/tools/tflite_benchmark_model/README.md b/tests/tools/tflite_benchmark_model/README.md
deleted file mode 100644 (file)
index a71a2fa..0000000
+++ /dev/null
@@ -1,197 +0,0 @@
-# TFLite Model Benchmark Tool
-
-## Description
-
-A simple C++ binary to benchmark a TFLite model and its individual operators,
-both on desktop machines and on Android. The binary takes a TFLite model,
-generates random inputs and then repeatedly runs the model for specified number
-of runs. Aggregrate latency statistics are reported after running the benchmark.
-
-The instructions below are for running the binary on Desktop and Android,
-for iOS please use the
-[iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark/ios).
-
-## Parameters
-
-The binary takes the following required parameters:
-
-*   `graph`: `string` \
-    The path to the TFLite model file.
-
-and the following optional parameters:
-
-*   `num_threads`: `int` (default=1) \
-    The number of threads to use for running TFLite interpreter.
-*   `warmup_runs`: `int` (default=1) \
-    The number of warmup runs to do before starting the benchmark.
-*   `num_runs`: `int` (default=50) \
-    The number of runs. Increase this to reduce variance.
-*   `run_delay`: `float` (default=-1.0) \
-    The delay in seconds between subsequent benchmark runs. Non-positive values
-    mean use no delay.
-*   `use_nnapi`: `bool` (default=false) \
-    Whether to use [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/).
-    This API is available on recent Android devices.
-
-## To build/install/run
-
-### On Android:
-
-(0) Refer to https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android to edit the `WORKSPACE` to configure the android NDK/SDK.
-
-(1) Build for your specific platform, e.g.:
-
-```
-bazel build -c opt \
-  --config=android_arm \
-  --cxxopt='--std=c++11' \
-  tensorflow/lite/tools/benchmark:benchmark_model
-```
-
-(2) Connect your phone. Push the binary to your phone with adb push
-     (make the directory if required):
-
-```
-adb push bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model /data/local/tmp
-```
-
-(3) Make the binary executable.
-
-```
-adb shell chmod +x /data/local/tmp/benchmark_model
-```
-
-(4) Push the compute graph that you need to test. For example:
-
-```
-adb push mobilenet_quant_v1_224.tflite /data/local/tmp
-```
-
-(5) Run the benchmark. For example:
-
-```
-adb shell /data/local/tmp/benchmark_model \
-  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
-  --num_threads=4
-```
-
-### On desktop:
-(1) build the binary
-
-```
-bazel build -c opt tensorflow/lite/tools/benchmark:benchmark_model
-```
-
-(2) Run on your compute graph, similar to the Android case but without the need of adb shell.
-For example:
-
-```
-bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model \
-  --graph=mobilenet_quant_v1_224.tflite \
-  --num_threads=4
-```
-
-The MobileNet graph used as an example here may be downloaded from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip).
-
-
-## Reducing variance between runs on Android.
-
-Most modern Android phones use [ARM big.LITTLE](https://en.wikipedia.org/wiki/ARM_big.LITTLE)
-architecture where some cores are more power hungry but faster than other cores.
-When running benchmarks on these phones there can be significant variance
-between different runs of the benchmark. One way to reduce variance between runs
-is to set the [CPU affinity](https://en.wikipedia.org/wiki/Processor_affinity)
-before running the benchmark. On Android this can be done using the `taskset`
-command.
-E.g. for running the benchmark on big cores on Pixel 2 with a single thread one
-can use the following command:
-
-```
-adb shell taskset f0 /data/local/tmp/benchmark_model \
-  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
-  --num_threads=1
-```
-
-where `f0` is the affinity mask for big cores on Pixel 2.
-Note: The affinity mask varies with the device.
-
-## Profiling model operators
-The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
-compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
-to compile benchmark with profiling support.
-For example, to compile with profiling support on Android, add this flag to the previous command:
-
-```
-bazel build -c opt \
-  --config=android_arm \
-  --cxxopt='--std=c++11' \
-  --copt=-DTFLITE_PROFILING_ENABLED \
-  tensorflow/lite/tools/benchmark:benchmark_model
-```
-This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
-
-```
-
-============================== Run Order ==============================
-                    [node type]          [start]         [first]        [avg ms]            [%]          [cdf%]          [mem KB]      [times called]  [Name]
-                        CONV_2D            0.000           4.269           4.269         0.107%          0.107%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
-              DEPTHWISE_CONV_2D            4.270           2.150           2.150         0.054%          0.161%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6]
-                        CONV_2D            6.421           6.107           6.107         0.153%          0.314%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           12.528           1.366           1.366         0.034%          0.348%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6]
-                        CONV_2D           13.895           4.195           4.195         0.105%          0.454%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           18.091           1.260           1.260         0.032%          0.485%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6]
-                        CONV_2D           19.352           6.652           6.652         0.167%          0.652%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           26.005           0.698           0.698         0.018%          0.670%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6]
-                        CONV_2D           26.703           3.344           3.344         0.084%          0.754%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           30.047           0.646           0.646         0.016%          0.770%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6]
-                        CONV_2D           30.694           5.800           5.800         0.145%          0.915%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           36.495           0.331           0.331         0.008%          0.924%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6]
-                        CONV_2D           36.826           2.838           2.838         0.071%          0.995%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           39.665           0.439           0.439         0.011%          1.006%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6]
-                        CONV_2D           40.105           5.293           5.293         0.133%          1.139%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           45.399           0.352           0.352         0.009%          1.147%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6]
-                        CONV_2D           45.752           5.322           5.322         0.133%          1.281%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           51.075           0.357           0.357         0.009%          1.290%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6]
-                        CONV_2D           51.432           5.693           5.693         0.143%          1.433%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           57.126           0.366           0.366         0.009%          1.442%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6]
-                        CONV_2D           57.493           5.472           5.472         0.137%          1.579%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           62.966           0.364           0.364         0.009%          1.588%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6]
-                        CONV_2D           63.330           5.404           5.404         0.136%          1.724%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           68.735           0.155           0.155         0.004%          1.728%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6]
-                        CONV_2D           68.891           2.970           2.970         0.074%          1.802%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6]
-              DEPTHWISE_CONV_2D           71.862           0.206           0.206         0.005%          1.807%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6]
-                        CONV_2D           72.069           5.888           5.888         0.148%          1.955%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
-                AVERAGE_POOL_2D           77.958           0.036           0.036         0.001%          1.956%             0.000              0       [MobilenetV1/Logits/AvgPool_1a/AvgPool]
-                        CONV_2D           77.994           1.445           1.445         0.036%          1.992%             0.000              0       [MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd]
-                        RESHAPE           79.440           0.002           0.002         0.000%          1.992%             0.000              0       [MobilenetV1/Predictions/Reshape]
-                        SOFTMAX           79.443           0.029           0.029         0.001%          1.993%             0.000              0       [MobilenetV1/Predictions/Softmax]
-
-============================== Top by Computation Time ==============================
-                    [node type]          [start]         [first]        [avg ms]            [%]          [cdf%]          [mem KB]      [times called]  [Name]
-                        CONV_2D           19.352           6.652           6.652         0.167%          0.167%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
-                        CONV_2D            6.421           6.107           6.107         0.153%          0.320%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
-                        CONV_2D           72.069           5.888           5.888         0.148%          0.468%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
-                        CONV_2D           30.694           5.800           5.800         0.145%          0.613%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
-                        CONV_2D           51.432           5.693           5.693         0.143%          0.756%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
-                        CONV_2D           57.493           5.472           5.472         0.137%          0.893%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
-                        CONV_2D           63.330           5.404           5.404         0.136%          1.029%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
-                        CONV_2D           45.752           5.322           5.322         0.133%          1.162%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
-                        CONV_2D           40.105           5.293           5.293         0.133%          1.295%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
-                        CONV_2D            0.000           4.269           4.269         0.107%          1.402%             0.000              0       [MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
-
-Number of nodes executed: 31
-============================== Summary by node type ==============================
-                    [Node type]          [count]         [avg ms]          [avg %]         [cdf %]       [mem KB]      [times called]
-                        CONV_2D               15            1.406          89.270%         89.270%          0.000              0
-              DEPTHWISE_CONV_2D               13            0.169          10.730%        100.000%          0.000              0
-                        SOFTMAX                1            0.000           0.000%        100.000%          0.000              0
-                        RESHAPE                1            0.000           0.000%        100.000%          0.000              0
-                AVERAGE_POOL_2D                1            0.000           0.000%        100.000%          0.000              0
-
-Timings (microseconds): count=50 first=79449 curr=81350 min=77385 max=88213 avg=79732 std=1929
-Memory (bytes): count=0
-31 nodes observed
-
-
-Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9
-```
diff --git a/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
deleted file mode 100644 (file)
index 16e85fc..0000000
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
-
-#include <cstdarg>
-#include <cstdlib>
-#include <iostream>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/op_resolver.h"
-#include "tensorflow/lite/string_util.h"
-#include "tensorflow/lite/tools/benchmark/logging.h"
-
-#ifdef GEMMLOWP_PROFILING
-#include "gemmlowp/profiling/profiler.h"
-#endif
-
-// For profiling nnapi_delegate
-#include "profiling/profiling.h"
-#include "tflite/ext/nnapi_delegate.h"
-
-namespace {
-  nnfw::tflite::NNAPIDelegate nnfw_delegate_;
-}
-
-#ifdef TFLITE_CUSTOM_OPS_HEADER
-void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
-#endif
-
-namespace tflite {
-namespace benchmark {
-
-void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
-  TFLITE_BENCHMARK_CHECK(interpreter);
-  interpreter_ = interpreter;
-  interpreter_->SetProfiler(&profiler_);
-}
-
-void ProfilingListener::OnSingleRunStart(RunType run_type) {
-  if (run_type == REGULAR) {
-    profiler_.Reset();
-    profiler_.StartProfiling();
-  }
-}
-
-void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
-  if (has_profiles_) {
-    TFLITE_LOG(INFO) << summarizer_.GetOutputString();
-  }
-}
-
-void ProfilingListener::OnSingleRunEnd() {
-  profiler_.StopProfiling();
-  auto profile_events = profiler_.GetProfileEvents();
-  has_profiles_ = !profile_events.empty();
-  summarizer_.ProcessProfiles(profile_events, *interpreter_);
-}
-
-void GemmlowpProfilingListener::OnBenchmarkStart(
-    const BenchmarkParams& params) {
-#ifdef GEMMLOWP_PROFILING
-  gemmlowp::RegisterCurrentThreadForProfiling();
-  gemmlowp::StartProfiling();
-#endif
-}
-
-void GemmlowpProfilingListener::OnBenchmarkEnd(
-    const BenchmarkResults& results) {
-#ifdef GEMMLOWP_PROFILING
-  gemmlowp::FinishProfiling();
-#endif
-}
-
-namespace {
-
-std::vector<std::string> Split(const std::string& str, const char delim) {
-  std::istringstream input(str);
-  std::vector<std::string> results;
-  std::string item;
-  while (std::getline(input, item, delim)) {
-    results.push_back(item);
-  }
-  return results;
-}
-
-template <typename T>
-bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) {
-  std::istringstream input(str);
-  bool first = true;
-  while (!input.eof()) {
-    if (!first) {
-      char c;
-      input >> c;
-      if (c != delim) {
-        return false;
-      }
-    } else {
-      first = false;
-    }
-    T val;
-    input >> val;
-    if (!input.eof() && !input.good()) {
-      return false;
-    }
-    values->push_back(val);
-  }
-  return true;
-}
-
-template <typename T>
-void FillRandomValue(T* ptr, const std::vector<int>& sizes,
-                     const std::function<T()>& random_func) {
-  int num_elements = 1;
-  for (int dim : sizes) {
-    num_elements *= dim;
-  }
-  for (int i = 0; i < num_elements; ++i) {
-    *ptr++ = random_func();
-  }
-}
-
-void FillRandomString(tflite::DynamicBuffer* buffer,
-                      const std::vector<int>& sizes,
-                      const std::function<string()>& random_func) {
-  int num_elements = 1;
-  for (int dim : sizes) {
-    num_elements *= dim;
-  }
-  for (int i = 0; i < num_elements; ++i) {
-    auto str = random_func();
-    buffer->AddString(str.data(), str.length());
-  }
-}
-
-bool PopulateInputLayerInfo(
-    const string& names_string, const string& shapes_string,
-    std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
-  std::vector<std::string> names = Split(names_string, ',');
-  std::vector<std::string> shapes = Split(shapes_string, ':');
-
-  if (names.size() != shapes.size()) {
-    TFLITE_LOG(ERROR) << "The number of items in"
-                      << " --input_layer_shape (" << shapes_string << ", with "
-                      << shapes.size() << " items)"
-                      << " must match the number of items in"
-                      << " --input_layer (" << names_string << ", with "
-                      << names.size() << " items)."
-                      << " For example --input_layer=input1,input2"
-                      << " --input_layer_shape=1,224,224,4:1,20";
-    return false;
-  }
-
-  for (int i = 0; i < names.size(); ++i) {
-    info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
-    BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
-
-    input.name = names[i];
-
-    TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape))
-        << "Incorrect size string specified: " << shapes[i];
-    for (int dim : input.shape) {
-      if (dim == -1) {
-        TFLITE_LOG(ERROR)
-            << "Any unknown sizes in the shapes (-1's) must be replaced"
-            << " with the size you want to benchmark with.";
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) {
-  std::vector<int> values;
-  values.reserve(int_array->size);
-  for (size_t i = 0; i < int_array->size; i++) {
-    values.push_back(int_array->data[i]);
-  }
-  return values;
-}
-
-}  // namespace
-
-BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
-  BenchmarkParams default_params = BenchmarkModel::DefaultParams();
-  default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("input_layer",
-                          BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("input_layer_shape",
-                          BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false));
-  return default_params;
-}
-
-BenchmarkTfLiteModel::BenchmarkTfLiteModel()
-    : BenchmarkTfLiteModel(DefaultParams()) {}
-
-BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
-    : BenchmarkModel(std::move(params)) {
-  AddListener(&profiling_listener_);
-  AddListener(&gemmlowp_profiling_listener_);
-}
-
-std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
-  std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags();
-  std::vector<Flag> specific_flags = {
-      CreateFlag<std::string>("graph", &params_, "graph file name"),
-      CreateFlag<std::string>("input_layer", &params_, "input layer names"),
-      CreateFlag<std::string>("input_layer_shape", &params_,
-                              "input layer shape"),
-      CreateFlag<bool>("use_nnapi", &params_, "use nnapi api")};
-
-  flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
-  return flags;
-}
-
-void BenchmarkTfLiteModel::LogParams() {
-  BenchmarkModel::LogParams();
-  TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]";
-  TFLITE_LOG(INFO) << "Input layers: ["
-                   << params_.Get<std::string>("input_layer") << "]";
-  TFLITE_LOG(INFO) << "Input shapes: ["
-                   << params_.Get<std::string>("input_layer_shape") << "]";
-  TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]";
-}
-
-bool BenchmarkTfLiteModel::ValidateParams() {
-  if (params_.Get<std::string>("graph").empty()) {
-    TFLITE_LOG(ERROR)
-        << "Please specify the name of your TF Lite input file with --graph";
-    return false;
-  }
-  return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"),
-                                params_.Get<std::string>("input_layer_shape"),
-                                &inputs);
-}
-
-uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
-  TFLITE_BENCHMARK_CHECK(interpreter);
-  uint64_t total_input_bytes = 0;
-  for (int input : interpreter->inputs()) {
-    auto* t = interpreter->tensor(input);
-    total_input_bytes += t->bytes;
-  }
-  return total_input_bytes;
-}
-
-void BenchmarkTfLiteModel::PrepareInputsAndOutputs() {
-  auto interpreter_inputs = interpreter->inputs();
-  // Set the values of the input tensors.
-  for (int j = 0; j < interpreter_inputs.size(); ++j) {
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
-    // TODO(ahentz): below we ignore the O-th dimension (number of batches).
-    if (t->type == kTfLiteFloat32) {
-      FillRandomValue<float>(
-          interpreter->typed_tensor<float>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<float>(rand()) / RAND_MAX - 0.5f; });
-    } else if (t->type == kTfLiteInt32) {
-      // TODO(yunluli): This is currently only used for handling embedding input
-      // for speech models. Generalize if necessary.
-      FillRandomValue<int32_t>(
-          interpreter->typed_tensor<int32_t>(i),
-          std::vector<int32_t>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<int32_t>(rand()) % 100; });
-    } else if (t->type == kTfLiteUInt8) {
-      FillRandomValue<uint8_t>(
-          interpreter->typed_tensor<uint8_t>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<uint8_t>(rand()) % 255; });
-    } else if (t->type == kTfLiteInt8) {
-      FillRandomValue<int8_t>(
-          interpreter->typed_tensor<int8_t>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<int8_t>(rand()) % 255 - 127; });
-    } else if (t->type == kTfLiteString) {
-      tflite::DynamicBuffer buffer;
-      FillRandomString(&buffer, sizes, []() {
-        return "we're have some friends over saturday to hang out in the yard";
-      });
-      buffer.WriteToTensor(interpreter->tensor(i), /*new_shape=*/nullptr);
-    } else {
-      TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
-                        << " of type " << t->type;
-    }
-  }
-}
-
-void BenchmarkTfLiteModel::Init() {
-  std::string graph = params_.Get<std::string>("graph");
-  model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
-  if (!model) {
-    TFLITE_LOG(FATAL) << "Failed to mmap model " << graph;
-  }
-  TFLITE_LOG(INFO) << "Loaded model " << graph;
-  model->error_reporter();
-  TFLITE_LOG(INFO) << "resolved reporter";
-
-#ifdef TFLITE_CUSTOM_OPS_HEADER
-  tflite::MutableOpResolver resolver;
-  RegisterSelectedOps(&resolver);
-#else
-  nnfw::tflite::BuiltinOpResolver resolver;
-#endif
-
-  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
-  if (!interpreter) {
-    TFLITE_LOG(FATAL) << "Failed to construct interpreter";
-  }
-  profiling_listener_.SetInterpreter(interpreter.get());
-  ::profiling::Context::get().setProfiler(interpreter->GetProfiler());
-
-  auto enable_sync = std::getenv("PROFILING_OP_SYNC");
-  if (enable_sync && std::strtol(enable_sync, NULL, 0) != 0)
-  {
-    ::profiling::Context::get().setSync();
-  }
-
-  const int32_t num_threads = params_.Get<int32_t>("num_threads");
-
-  if (num_threads != -1) {
-    interpreter->SetNumThreads(num_threads);
-  }
-
-  bool use_nnapi = params_.Get<bool>("use_nnapi");
-
-  interpreter->UseNNAPI(use_nnapi);
-  if (use_nnapi) {
-    if (nnfw_delegate_.BuildGraph(&(interpreter.get()->primary_subgraph())) != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to BuildGraph!";
-    }
-  }
-  ApplyDelegates();
-
-  auto interpreter_inputs = interpreter->inputs();
-
-  if (!inputs.empty()) {
-    TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size())
-        << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
-        << " expected: " << inputs.size();
-  }
-
-  // TFLITE_BENCHMARK_CHECK that all names and types match
-  for (int j = 0; j < inputs.size(); ++j) {
-    const InputLayerInfo& input = inputs[j];
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    TFLITE_BENCHMARK_CHECK_EQ(t->name, input.name)
-        << "Tensor # " << i << " is named " << t->name << " but flags call it "
-        << input.name;
-  }
-
-  // Resize all non-string tensors.
-  for (int j = 0; j < inputs.size(); ++j) {
-    const InputLayerInfo& input = inputs[j];
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    if (t->type != kTfLiteString) {
-      interpreter->ResizeInputTensor(i, input.shape);
-    }
-  }
-
-  if (interpreter->AllocateTensors() != kTfLiteOk) {
-    TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
-  }
-}
-
-void BenchmarkTfLiteModel::RunImpl() {
-  bool use_nnapi = params_.Get<bool>("use_nnapi");
-  if (use_nnapi) {
-    if (nnfw_delegate_.Invoke(&interpreter->primary_subgraph()) != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to invoke!";
-    }
-  } else {
-    if (interpreter->Invoke() != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to invoke!";
-    }
-  }
-}
-
-}  // namespace benchmark
-}  // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/profile_summarizer.cc b/tests/tools/tflite_benchmark_model/profile_summarizer.cc
deleted file mode 100644 (file)
index b547c70..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/profiling/profile_summarizer.h"
-
-#include <sstream>
-
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-namespace profiling {
-namespace {
-
-struct OperatorDetails {
-  std::string name;
-  std::vector<std::string> inputs;
-  std::vector<std::string> outputs;
-};
-
-std::string GetTensorName(const tflite::Interpreter& interpreter,
-                          int tensor_index) {
-  const auto tensor = interpreter.tensor(tensor_index);
-  if (tensor == nullptr || tensor->name == nullptr) {
-    return "Unknown";
-  }
-  return tensor->name;
-}
-std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
-                                        const TfLiteIntArray* tensor_indices) {
-  std::vector<std::string> tensors;
-  tensors.reserve(tensor_indices->size);
-  for (int i = 0; i < tensor_indices->size; i++) {
-    tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
-  }
-  return tensors;
-}
-
-std::string ToString(const std::vector<std::string>& str_vector) {
-  std::stringstream stream;
-  stream << "[";
-  bool first = true;
-  for (const auto& s : str_vector) {
-    if (!first) {
-      stream << ", ";
-    } else {
-      first = false;
-    }
-    stream << s;
-  }
-  stream << "]";
-  return stream.str();
-}
-
-OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
-                                   int node_index) {
-  auto node_reg = interpreter.node_and_registration(node_index);
-  auto inputs = node_reg->first.inputs;
-  auto outputs = node_reg->first.outputs;
-  int code = node_reg->second.builtin_code;
-  const char* op_name = nullptr;
-  if (code == tflite::BuiltinOperator_CUSTOM) {
-    const char* custom_name = node_reg->second.custom_name;
-    op_name = custom_name ? custom_name : "UnknownCustomOp";
-  } else {
-    op_name = tflite::EnumNamesBuiltinOperator()[code];
-  }
-  const char* profiling_string =
-      interpreter.OpProfilingString(node_reg->second, &node_reg->first);
-  OperatorDetails details;
-  details.name = op_name;
-  if (profiling_string) {
-    details.name += ":" + std::string(profiling_string);
-  }
-  details.inputs = GetTensorNames(interpreter, inputs);
-  details.outputs = GetTensorNames(interpreter, outputs);
-  return details;
-}
-
-tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() {
-  auto options = tensorflow::StatSummarizerOptions();
-  options.show_summary = true;
-  options.show_memory = false;
-  return options;
-}
-
-}  // namespace
-
-ProfileSummarizer::ProfileSummarizer()
-    : stats_calculator_(
-          new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {}
-
-void ProfileSummarizer::ProcessProfiles(
-    const std::vector<const ProfileEvent*>& profile_stats,
-    const tflite::Interpreter& interpreter) {
-  std::vector<const ProfileEvent*> events;
-  std::copy_if(profile_stats.begin(), profile_stats.end(),
-               std::back_inserter(events), [](const ProfileEvent* e) {
-                 return e->event_type ==
-                            ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
-                        e->end_timestamp_us >= e->begin_timestamp_us;
-               });
-  // Sort with begin_time.
-  std::sort(events.begin(), events.end(),
-            [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
-              return a->begin_timestamp_us < b->begin_timestamp_us;
-            });
-  if (events.empty()) {
-    return;
-  }
-
-  int64_t base_start_us = events[0]->begin_timestamp_us;
-  int node_num = 0;
-  int64_t curr_total_us = 0;
-  int prev_op_idx = -1;
-  int child_op_no = 1;
-  for (auto event : events) {
-    auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
-    bool from_same_op = (prev_op_idx == event->event_metadata);
-    child_op_no = from_same_op ? child_op_no + 1 : 1;
-    auto node_name = ToString(op_details.outputs) + "#" + std::to_string(child_op_no);
-    int64_t start_us = event->begin_timestamp_us - base_start_us;
-    int64_t node_exec_time =
-        event->end_timestamp_us - event->begin_timestamp_us;
-    stats_calculator_->AddNodeStats(node_name, op_details.name, node_num,
-                                    start_us, node_exec_time, 0 /*memory */);
-    curr_total_us += node_exec_time;
-    ++node_num;
-    prev_op_idx = event->event_metadata;
-  }
-  stats_calculator_->UpdateRunTotalUs(curr_total_us);
-}
-}  // namespace profiling
-}  // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/stats_calculator.cc b/tests/tools/tflite_benchmark_model/stats_calculator.cc
deleted file mode 100644 (file)
index 5786507..0000000
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/util/stats_calculator.h"
-
-#include <iomanip>
-#include <map>
-#include <queue>
-#include <sstream>
-#include <string>
-#include <algorithm>
-
-namespace tensorflow {
-
-StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
-    : options_(options) {}
-
-std::string StatsCalculator::GetShortSummary() const {
-  std::stringstream stream;
-  stream << "Timings (microseconds): ";
-  run_total_us_.OutputToStream(&stream);
-  stream << std::endl;
-
-  stream << "Memory (bytes): ";
-  memory_.OutputToStream(&stream);
-  stream << std::endl;
-
-  stream << details_.size() << " nodes observed" << std::endl;
-  return stream.str();
-}
-
-std::ostream& InitField(std::ostream& stream, int width) {
-  stream << "\t" << std::right << std::setw(width) << std::fixed
-         << std::setprecision(3);
-  return stream;
-}
-
-std::string StatsCalculator::HeaderString(const std::string& title) const {
-  std::stringstream stream;
-
-  stream << "============================== " << title
-         << " ==============================" << std::endl;
-
-  InitField(stream, 24) << "[node type]";
-  InitField(stream, 9) << "[start]";
-  InitField(stream, 9) << "[first]";
-  InitField(stream, 9) << "[avg ms]";
-  InitField(stream, 8) << "[%]";
-  InitField(stream, 8) << "[cdf%]";
-  InitField(stream, 10) << "[mem KB]";
-  InitField(stream, 9) << "[times called]";
-  stream << "\t"
-         << "[Name]";
-  return stream.str();
-}
-
-std::string StatsCalculator::ColumnString(const Detail& detail,
-                                          const int64_t cumulative_stat_on_node,
-                                          const Stat<int64_t>& stat) const {
-  const double start_ms = detail.start_us.avg() / 1000.0;
-  const double first_time_ms = detail.rel_end_us.first() / 1000.0;
-  const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
-  const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
-  const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
-  const int64_t times_called = detail.times_called / num_runs();
-
-  std::stringstream stream;
-  InitField(stream, 24) << detail.type;
-  InitField(stream, 9) << start_ms;
-  InitField(stream, 9) << first_time_ms;
-  InitField(stream, 9) << avg_time_ms;
-  InitField(stream, 7) << percentage << "%";
-  InitField(stream, 7) << cdf_percentage << "%";
-  InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
-  InitField(stream, 9) << times_called;
-  stream << "\t" << detail.name;
-
-  return stream.str();
-}
-
-void StatsCalculator::OrderNodesByMetric(
-    SortingMetric metric, std::vector<const Detail*>* details) const {
-  std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
-  const int num_nodes = details_.size();
-
-  for (const auto& det : details_) {
-    const Detail* detail = &(det.second);
-    std::stringstream stream;
-    stream << std::setw(20) << std::right << std::setprecision(10)
-           << std::fixed;
-
-    switch (metric) {
-      case BY_NAME:
-        stream << detail->name;
-        break;
-      case BY_RUN_ORDER:
-        stream << num_nodes - detail->run_order;
-        break;
-      case BY_TIME:
-        stream << detail->rel_end_us.avg();
-        break;
-      case BY_MEMORY:
-        stream << detail->mem_used.avg();
-        break;
-      case BY_TYPE:
-        stream << detail->type;
-        break;
-      default:
-        stream << "";
-        break;
-    }
-
-    sorted_list.emplace(stream.str(), detail);
-  }
-
-  while (!sorted_list.empty()) {
-    auto entry = sorted_list.top();
-    sorted_list.pop();
-    details->push_back(entry.second);
-  }
-}
-
-void StatsCalculator::ComputeStatsByType(
-    std::map<std::string, int64_t>* node_type_map_count,
-    std::map<std::string, int64_t>* node_type_map_time,
-    std::map<std::string, int64_t>* node_type_map_memory,
-    std::map<std::string, int64_t>* node_type_map_times_called,
-    int64_t* accumulated_us) const {
-  int64_t run_count = run_total_us_.count();
-
-  for (const auto& det : details_) {
-    const std::string node_name = det.first;
-    const Detail& detail = det.second;
-
-    int64_t curr_time_val =
-        static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
-    *accumulated_us += curr_time_val;
-
-    int64_t curr_memory_val = detail.mem_used.newest();
-
-    const std::string& node_type = detail.type;
-
-    const std::string sharp1("#1");
-    bool first = std::mismatch(sharp1.rbegin(), sharp1.rend(), node_name.rbegin()).first == sharp1.rend();
-
-    if (first) {
-      (*node_type_map_count)[node_type] += 1;
-      (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
-    }
-    (*node_type_map_time)[node_type] += curr_time_val;
-    (*node_type_map_memory)[node_type] += curr_memory_val;
-  }
-}
-
-std::string StatsCalculator::GetStatsByNodeType() const {
-  std::stringstream stream;
-
-  stream << "Number of nodes executed: " << details_.size() << std::endl;
-
-  stream << "============================== Summary by node type "
-            "=============================="
-         << std::endl;
-
-  std::map<std::string, int64_t> node_type_map_count;
-  std::map<std::string, int64_t> node_type_map_time;
-  std::map<std::string, int64_t> node_type_map_memory;
-  std::map<std::string, int64_t> node_type_map_times_called;
-  int64_t accumulated_us = 0;
-
-  ComputeStatsByType(&node_type_map_count, &node_type_map_time,
-                     &node_type_map_memory, &node_type_map_times_called,
-                     &accumulated_us);
-
-  // Sort them.
-  std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
-      timings;
-  for (const auto& node_type : node_type_map_time) {
-    const int64_t mem_used = node_type_map_memory[node_type.first];
-    timings.emplace(node_type.second,
-                    std::pair<std::string, int64_t>(node_type.first, mem_used));
-  }
-
-  InitField(stream, 24) << "[Node type]";
-  InitField(stream, 9) << "[count]";
-  InitField(stream, 10) << "[avg ms]";
-  InitField(stream, 11) << "[avg %]";
-  InitField(stream, 11) << "[cdf %]";
-  InitField(stream, 10) << "[mem KB]";
-  InitField(stream, 10) << "[times called]";
-  stream << std::endl;
-
-  float cdf = 0.0f;
-  while (!timings.empty()) {
-    auto entry = timings.top();
-    timings.pop();
-
-    const std::string node_type = entry.second.first;
-    const float memory = entry.second.second / 1000.0f;
-
-    const int64_t node_type_total_us = entry.first;
-    const float time_per_run_ms = node_type_total_us / 1000.0f;
-
-    const float percentage =
-        ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
-    cdf += percentage;
-
-    InitField(stream, 24) << node_type;
-    InitField(stream, 9) << node_type_map_count[node_type];
-    InitField(stream, 10) << time_per_run_ms;
-    InitField(stream, 10) << percentage << "%";
-    InitField(stream, 10) << cdf << "%";
-    InitField(stream, 10) << memory;
-    InitField(stream, 9) << node_type_map_times_called[node_type];
-    stream << std::endl;
-  }
-  stream << std::endl;
-  return stream.str();
-}
-
-std::string StatsCalculator::GetStatsByMetric(const std::string& title,
-                                              SortingMetric sorting_metric,
-                                              int num_stats) const {
-  std::vector<const Detail*> details;
-  OrderNodesByMetric(sorting_metric, &details);
-
-  double cumulative_stat_on_node = 0;
-
-  std::stringstream stream;
-  stream << HeaderString(title) << std::endl;
-  int stat_num = 0;
-  for (auto detail : details) {
-    ++stat_num;
-    if (num_stats > 0 && stat_num > num_stats) {
-      break;
-    }
-
-    // TODO(andrewharp): Make this keep track of the particular metric for cdf.
-    cumulative_stat_on_node += detail->rel_end_us.sum();
-    stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
-           << std::endl;
-  }
-  stream << std::endl;
-  return stream.str();
-}
-
-std::string StatsCalculator::GetOutputString() const {
-  std::stringstream stream;
-  if (options_.show_run_order) {
-    stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
-                               options_.run_order_limit);
-  }
-  if (options_.show_time) {
-    stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
-                               options_.time_limit);
-  }
-  if (options_.show_memory) {
-    stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
-                               options_.memory_limit);
-  }
-  if (options_.show_type) {
-    stream << GetStatsByNodeType();
-  }
-  if (options_.show_summary) {
-    stream << GetShortSummary() << std::endl;
-  }
-  return stream.str();
-}
-
-void StatsCalculator::AddNodeStats(const std::string& name,
-                                   const std::string& type, int64_t run_order,
-                                   int64_t start_us, int64_t rel_end_us,
-                                   int64_t mem_used) {
-  Detail* detail = nullptr;
-  if (details_.find(name) == details_.end()) {
-    details_.insert({name, {}});
-    detail = &details_.at(name);
-    detail->type = type;
-    detail->name = name;
-    detail->run_order = run_order;
-  } else {
-    detail = &details_.at(name);
-  }
-  detail->start_us.UpdateStat(start_us);
-  detail->rel_end_us.UpdateStat(rel_end_us);
-  detail->mem_used.UpdateStat(mem_used);
-  detail->times_called++;
-}
-
-}  // namespace tensorflow
index b7422ed..383a4e4 100644 (file)
 
 #include <tflite/Assert.h>
 #include <tflite/InterpreterSession.h>
-#include <tflite/interp/FlatBufferBuilder.h>
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <memory>
+#include <string>
 
 const int RUN_FAILED = 1;
 
-using namespace tflite;
 using namespace nnfw::tflite;
 
 const int FILE_ERROR = 2;
@@ -46,7 +45,7 @@ const int FILE_ERROR = 2;
   }
 
 // Read vector of floats from selected file
-void readData(const string &path, std::vector<uint8_t> &dest)
+void readData(const std::string &path, std::vector<uint8_t> &dest)
 {
   std::ifstream in(path);
   if (!in.good())
@@ -119,18 +118,22 @@ inline size_t sizeOfNnfwType(NNFW_TYPE type)
 }
 
 template <typename T>
-bool compareBuffersExact(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+bool isClose(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
 {
+  // TODO better way for handling quant error?
+  auto tolerance = static_cast<uint64_t>(nnfw::misc::EnvVar("TOLERANCE").asInt(0));
   bool match = true;
+
   for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
   {
     T ref = ref_buf[e];
     T act = reinterpret_cast<const T *>(act_buf.data())[e];
+    uint64_t diff = static_cast<uint64_t>(((ref > act) ? (ref - act) : (act - ref)));
 
-    if (ref != act)
+    if (ref != act && diff > tolerance)
     {
       std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
-                << ", act: " << act << std::endl;
+                << ", act: " << act << " (diff: " << diff << ")" << std::endl;
       match = false;
     }
   }
@@ -138,8 +141,34 @@ bool compareBuffersExact(const T *ref_buf, const std::vector<uint8_t> &act_buf,
   return match;
 }
 
-bool compareBuffersExactBool(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf,
-                             uint32_t index)
+template <>
+bool isClose<float>(const float *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+  uint32_t tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
+  bool match = true;
+
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(float); e++)
+  {
+    float ref = ref_buf[e];
+    float act = reinterpret_cast<const float *>(act_buf.data())[e];
+    float diff = std::fabs(ref - act);
+
+    bool match_elem = nnfw::misc::fp32::absolute_epsilon_equal(ref, act)
+                        ? true
+                        : nnfw::misc::fp32::epsilon_equal(ref, act, tolerance);
+
+    if (!match_elem)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << " (diff: " << diff << ")" << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+bool exact(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
 {
   bool match = true;
   for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
@@ -288,29 +317,18 @@ int main(const int argc, char **argv)
   // Compare with tflite
   std::cout << "[Comparison] Stage start!" << std::endl;
   // Read tflite model
-  StderrReporter error_reporter;
-  auto model = FlatBufferModel::BuildFromFile(tflite_file.c_str(), &error_reporter);
-  auto builder = FlatBufferBuilder(*model);
+  auto model = TfLiteModelCreateFromFile(tflite_file.c_str());
+  auto options = TfLiteInterpreterOptionsCreate();
+  TfLiteInterpreterOptionsSetNumThreads(options, nnfw::misc::EnvVar("THREAD").asInt(1));
+  auto interpreter = TfLiteInterpreterCreate(model, options);
 
-  std::unique_ptr<Interpreter> interpreter;
-  try
-  {
-    interpreter = builder.build();
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    exit(FILE_ERROR);
-  }
-  interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(1));
-
-  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter);
   sess->prepare();
   // Set input and run
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
-    memcpy(input_tensor->data.uint8, inputs[i].data(), inputs[i].size());
+    auto input_tensor = TfLiteInterpreterGetInputTensor(interpreter, i);
+    memcpy(TfLiteTensorData(input_tensor), inputs[i].data(), inputs[i].size());
   }
   if (!sess->run())
   {
@@ -319,10 +337,7 @@ int main(const int argc, char **argv)
   }
   std::cout << "[Comparison] TFLite run done!" << std::endl;
 
-  // Calculate max difference over all outputs
-  float max_float_difference = 0.0f;
   bool find_unmatched_output = false;
-  auto tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
 
   for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
   {
@@ -331,41 +346,30 @@ int main(const int argc, char **argv)
 
     bool matched = true;
     // Check output tensor values
-
-    const auto &ref_output = interpreter->tensor(interpreter->outputs().at(out_idx))->data;
+    auto output_tensor = TfLiteInterpreterGetOutputTensor(interpreter, out_idx);
+    auto ref_output = TfLiteTensorData(output_tensor);
     const auto &output = outputs[out_idx];
 
     switch (ti.dtype)
     {
       case NNFW_TYPE_TENSOR_BOOL:
-        matched = compareBuffersExactBool(ref_output.uint8, output, out_idx);
+        matched = exact(reinterpret_cast<uint8_t *>(ref_output), output, out_idx);
         break;
       case NNFW_TYPE_TENSOR_UINT8:
       case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
-        matched = compareBuffersExact<uint8_t>(ref_output.uint8, output, out_idx);
+        matched = isClose<uint8_t>(reinterpret_cast<uint8_t *>(ref_output), output, out_idx);
         break;
       case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
-        matched = compareBuffersExact<int8_t>(ref_output.int8, output, out_idx);
+        matched = isClose<int8_t>(reinterpret_cast<int8_t *>(ref_output), output, out_idx);
         break;
       case NNFW_TYPE_TENSOR_INT32:
-        matched = compareBuffersExact<int32_t>(ref_output.i32, output, out_idx);
+        matched = isClose<int32_t>(reinterpret_cast<int32_t *>(ref_output), output, out_idx);
         break;
       case NNFW_TYPE_TENSOR_FLOAT32:
-        // TODO better way for handling FP error?
-        for (uint32_t e = 0; e < num_elems(&ti); e++)
-        {
-          float refval = ref_output.f[e];
-          float val = reinterpret_cast<const float *>(output.data())[e];
-          if (std::abs(refval - val) > max_float_difference)
-            max_float_difference = std::abs(refval - val);
-
-          matched = nnfw::misc::fp32::absolute_epsilon_equal(refval, val)
-                      ? true
-                      : nnfw::misc::fp32::epsilon_equal(refval, val, tolerance);
-        }
+        matched = isClose<float>(reinterpret_cast<float *>(ref_output), output, out_idx);
         break;
       case NNFW_TYPE_TENSOR_INT64:
-        matched = compareBuffersExact<int64_t>(ref_output.i64, output, out_idx);
+        matched = isClose<int64_t>(reinterpret_cast<int64_t *>(ref_output), output, out_idx);
         break;
       default:
         throw std::runtime_error{"Invalid tensor type"};
@@ -376,7 +380,6 @@ int main(const int argc, char **argv)
   }
 
   // Print results
-  std::cout << "[Comparison] Max float difference: " << max_float_difference << std::endl;
   int ret = 0;
   if (find_unmatched_output)
   {
index 3f30d3e..bbe1992 100644 (file)
@@ -32,4 +32,4 @@ add_executable(tflite_test src/tflite_test.cc)
 ## Link test executable against gtest & gtest_main
 target_link_libraries(tflite_test gtest gtest_main ${LIB_PTHREAD})
 ## install test binary for packaging
-install(TARGETS tflite_test DESTINATION unittest_standalone)
+install(TARGETS tflite_test DESTINATION unittest)
index 4ccd4e1..86d37de 100644 (file)
@@ -20,7 +20,7 @@
 #include <iostream>
 #include <cstring>
 
-#include "tensorflow/lite/interpreter.h"
+#include <tensorflow/lite/c/c_api.h>
 
 namespace TFLiteRun
 {
@@ -30,16 +30,31 @@ TensorDumper::TensorDumper()
   // DO NOTHING
 }
 
-void TensorDumper::addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices)
+void TensorDumper::addInputTensors(TfLiteInterpreter &interpreter)
 {
-  for (const auto &o : indices)
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(&interpreter);
+  for (int32_t idx = 0; idx < input_count; idx++)
   {
-    const TfLiteTensor *tensor = interpreter.tensor(o);
-    int size = tensor->bytes;
+    const TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(&interpreter, idx);
+    auto size = TfLiteTensorByteSize(tensor);
     std::vector<char> buffer;
     buffer.resize(size);
-    memcpy(buffer.data(), tensor->data.raw, size);
-    _tensors.emplace_back(o, std::move(buffer));
+    memcpy(buffer.data(), TfLiteTensorData(tensor), size);
+    _input_tensors.emplace_back(idx, std::move(buffer));
+  }
+}
+
+void TensorDumper::addOutputTensors(TfLiteInterpreter &interpreter)
+{
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(&interpreter);
+  for (int32_t idx = 0; idx < output_count; idx++)
+  {
+    const TfLiteTensor *tensor = TfLiteInterpreterGetOutputTensor(&interpreter, idx);
+    auto size = TfLiteTensorByteSize(tensor);
+    std::vector<char> buffer;
+    buffer.resize(size);
+    memcpy(buffer.data(), TfLiteTensorData(tensor), size);
+    _output_tensors.emplace_back(idx, std::move(buffer));
   }
 }
 
@@ -49,17 +64,30 @@ void TensorDumper::dump(const std::string &filename) const
   std::ofstream file(filename, std::ios::out | std::ios::binary);
 
   // Write number of tensors
-  uint32_t num_tensors = static_cast<uint32_t>(_tensors.size());
+  uint32_t num_tensors =
+    static_cast<uint32_t>(_input_tensors.size()) + static_cast<uint32_t>(_output_tensors.size());
   file.write(reinterpret_cast<const char *>(&num_tensors), sizeof(num_tensors));
 
-  // Write tensor indices
-  for (const auto &t : _tensors)
+  // Write input tensor indices
+  for (const auto &t : _input_tensors)
   {
     file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
   }
 
-  // Write data
-  for (const auto &t : _tensors)
+  // Write output tensor indices
+  for (const auto &t : _output_tensors)
+  {
+    file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
+  }
+
+  // Write input data
+  for (const auto &t : _input_tensors)
+  {
+    file.write(t._data.data(), t._data.size());
+  }
+
+  // Write output data
+  for (const auto &t : _output_tensors)
   {
     file.write(t._data.data(), t._data.size());
   }
index 5fdcc54..5847c39 100644 (file)
@@ -17,6 +17,8 @@
 #ifndef __TFLITE_RUN_TENSOR_DUMPER_H__
 #define __TFLITE_RUN_TENSOR_DUMPER_H__
 
+#include <tensorflow/lite/c/c_api.h>
+
 #include <memory>
 #include <string>
 #include <vector>
@@ -42,11 +44,13 @@ private:
 
 public:
   TensorDumper();
-  void addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices);
+  void addInputTensors(TfLiteInterpreter &interpreter);
+  void addOutputTensors(TfLiteInterpreter &interpreter);
   void dump(const std::string &filename) const;
 
 private:
-  std::vector<Tensor> _tensors;
+  std::vector<Tensor> _input_tensors;
+  std::vector<Tensor> _output_tensors;
 };
 
 } // end of namespace TFLiteRun
index a1a9433..ebd6447 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <assert.h>
 
+#include <cstring>
 #include <fstream>
 
 #include "misc/tensor/Shape.h"
@@ -25,7 +26,7 @@
 namespace TFLiteRun
 {
 
-TensorLoader::TensorLoader(tflite::Interpreter &interpreter)
+TensorLoader::TensorLoader(TfLiteInterpreter &interpreter)
   : _interpreter(interpreter), _raw_data(nullptr)
 {
 }
@@ -42,21 +43,20 @@ void TensorLoader::loadDumpedTensors(const std::string &filename)
 
   int tensor_indices_raw[num_tensors];
   file.read(reinterpret_cast<char *>(tensor_indices_raw), sizeof(tensor_indices_raw));
-  std::vector<int> tensor_indices(tensor_indices_raw, tensor_indices_raw + num_tensors);
 
   _raw_data = std::unique_ptr<float[]>(new float[file_size]);
   file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
   file.close();
 
-  size_t read_bytes = loadTensorsFromRawData(tensor_indices);
+  size_t read_bytes = loadInputTensorsFromRawData();
+  read_bytes += loadOutputTensorsFromRawData();
 
   // The file size and total output tensor size must match
   assert(file_size ==
          sizeof(num_tensors) + sizeof(tensor_indices_raw) + read_bytes * sizeof(float));
 }
 
-void TensorLoader::loadRawTensors(const std::string &filename,
-                                  const std::vector<int> &tensor_indices)
+void TensorLoader::loadRawInputTensors(const std::string &filename)
 {
   // TODO Handle file open/read error
   std::ifstream file(filename, std::ios::ate | std::ios::binary);
@@ -67,41 +67,74 @@ void TensorLoader::loadRawTensors(const std::string &filename,
   file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
   file.close();
 
-  size_t read_bytes = loadTensorsFromRawData(tensor_indices);
+  size_t read_bytes = loadInputTensorsFromRawData();
 
   // The file size and total output tensor size must match
   assert(file_size == read_bytes * sizeof(float));
 }
 
-size_t TensorLoader::loadTensorsFromRawData(const std::vector<int> &tensor_indices)
+size_t TensorLoader::loadInputTensorsFromRawData()
 {
   size_t offset = 0;
-  for (const auto &o : tensor_indices)
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(&_interpreter);
+  for (auto idx = 0; idx < input_count; idx++)
   {
-    const TfLiteTensor *tensor = _interpreter.tensor(o);
+    const TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(&_interpreter, idx);
 
     // Convert tensor shape to `Shape` from `tensor->dims`
-    nnfw::misc::tensor::Shape shape(static_cast<size_t>(tensor->dims->size));
-    for (int d = 0; d < tensor->dims->size; d++)
+    nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
+    for (int32_t d = 0; d < TfLiteTensorNumDims(tensor); d++)
     {
-      shape.dim(d) = tensor->dims->data[d];
+      shape.dim(d) = TfLiteTensorDim(tensor, d);
     }
 
     float *base = _raw_data.get() + offset;
 
-    assert(tensor->bytes % sizeof(float) == 0);
-    offset += (tensor->bytes / sizeof(float));
+    assert(TfLiteTensorByteSize(tensor) % sizeof(float) == 0);
+    offset += (TfLiteTensorByteSize(tensor) / sizeof(float));
 
-    _tensor_map.insert(std::make_pair(o, nnfw::tflite::TensorView<float>(shape, base)));
+    _input_tensor_map.emplace(idx, nnfw::tflite::TensorView<float>(shape, base));
+
+    memcpy(TfLiteTensorData(tensor), reinterpret_cast<const void *>(base),
+           TfLiteTensorByteSize(tensor));
+  }
+
+  return offset;
+}
+
+size_t TensorLoader::loadOutputTensorsFromRawData()
+{
+  size_t offset = 0;
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(&_interpreter);
+  for (auto idx = 0; idx < output_count; idx++)
+  {
+    const TfLiteTensor *tensor = TfLiteInterpreterGetOutputTensor(&_interpreter, idx);
+
+    // Convert tensor shape to `Shape` from `tensor->dims`
+    nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
+    for (int32_t d = 0; d < TfLiteTensorNumDims(tensor); d++)
+    {
+      shape.dim(d) = TfLiteTensorDim(tensor, d);
+    }
+
+    float *base = _raw_data.get() + offset;
+
+    assert(TfLiteTensorByteSize(tensor) % sizeof(float) == 0);
+    offset += (TfLiteTensorByteSize(tensor) / sizeof(float));
+
+    _output_tensor_map.emplace(idx, nnfw::tflite::TensorView<float>(shape, base));
+
+    memcpy(TfLiteTensorData(tensor), reinterpret_cast<const void *>(base),
+           TfLiteTensorByteSize(tensor));
   }
 
   return offset;
 }
 
-const nnfw::tflite::TensorView<float> &TensorLoader::get(int tensor_idx) const
+const nnfw::tflite::TensorView<float> &TensorLoader::getOutput(int tensor_idx) const
 {
-  auto found = _tensor_map.find(tensor_idx);
-  assert(found != _tensor_map.end());
+  auto found = _output_tensor_map.find(tensor_idx);
+  assert(found != _output_tensor_map.end());
   return found->second;
 }
 
index ef51e0f..b9e6b72 100644 (file)
 #ifndef __TFLITE_RUN_TENSOR_LOADER_H__
 #define __TFLITE_RUN_TENSOR_LOADER_H__
 
+#include "tflite/TensorView.h"
+
 #include <sys/mman.h>
 
+#include <memory>
 #include <string>
 #include <unordered_map>
 
-#include "tflite/TensorView.h"
-
 namespace tflite
 {
 class Interpreter;
@@ -35,17 +36,18 @@ namespace TFLiteRun
 class TensorLoader
 {
 public:
-  TensorLoader(tflite::Interpreter &interpreter);
+  TensorLoader(TfLiteInterpreter &interpreter);
   void loadDumpedTensors(const std::string &filename);
-  void loadRawTensors(const std::string &filename, const std::vector<int> &tensor_indices);
-  const nnfw::tflite::TensorView<float> &get(int tensor_idx) const;
-  size_t getNums() const { return _tensor_map.size(); }
+  void loadRawInputTensors(const std::string &filename);
+  const nnfw::tflite::TensorView<float> &getOutput(int tensor_idx) const;
 
 private:
-  size_t loadTensorsFromRawData(const std::vector<int> &tensor_indices);
-  tflite::Interpreter &_interpreter;
+  size_t loadInputTensorsFromRawData();
+  size_t loadOutputTensorsFromRawData();
+  TfLiteInterpreter &_interpreter;
   std::unique_ptr<float[]> _raw_data;
-  std::unordered_map<int, nnfw::tflite::TensorView<float>> _tensor_map;
+  std::unordered_map<int, nnfw::tflite::TensorView<float>> _input_tensor_map;
+  std::unordered_map<int, nnfw::tflite::TensorView<float>> _output_tensor_map;
 };
 
 } // end of namespace TFLiteRun
index 55d2a1b..a1e3d2e 100644 (file)
@@ -14,9 +14,6 @@
  * limitations under the License.
  */
 
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/model.h"
-
 #include "args.h"
 #include "tensor_dumper.h"
 #include "tensor_loader.h"
@@ -28,7 +25,6 @@
 #include "tflite/Session.h"
 #include "tflite/RandomInputInitializer.h"
 #include "tflite/InterpreterSession.h"
-#include "tflite/NNAPISession.h"
 #include "misc/tensor/IndexIterator.h"
 #include "misc/tensor/Object.h"
 #include "benchmark.h"
@@ -55,31 +51,10 @@ void print_max_idx(float *f, int size)
 
 static const char *default_backend_cand = "tflite_cpu";
 
-// Verifies whether the model is a flatbuffer file.
-class BMFlatBufferVerifier : public tflite::TfLiteVerifier
-{
-public:
-  bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
-  {
-
-    flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
-    if (!tflite::VerifyModelBuffer(verifier))
-    {
-      reporter->Report("The model is not a valid Flatbuffer file");
-      return false;
-    }
-    return true;
-  }
-};
-
 } // namespace
 
 int main(const int argc, char **argv)
 {
-  const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
-
-  StderrReporter error_reporter;
-
   TFLiteRun::Args args(argc, argv);
 
   std::chrono::milliseconds t_model_load(0), t_prepare(0);
@@ -89,31 +64,12 @@ int main(const int argc, char **argv)
   benchmark::Phases phases(
     benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
 
-  std::unique_ptr<FlatBufferModel> model;
-  std::unique_ptr<Interpreter> interpreter;
-  std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
+  TfLiteModel *model = nullptr;
 
   try
   {
     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      if (args.getModelValidate())
-      {
-        model = FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
-                                                        verifier.get(), &error_reporter);
-      }
-      else
-      {
-        model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
-      }
-      if (model == nullptr)
-      {
-        throw std::runtime_error{"Cannot create model"};
-      }
-
-      tflite::ops::builtin::BuiltinOpResolver resolver;
-      InterpreterBuilder builder(*model, resolver);
-      TFLITE_ENSURE(builder(&interpreter))
-      interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(1));
+      model = TfLiteModelCreateFromFile(args.getTFLiteFilename().c_str());
     });
   }
   catch (const std::exception &e)
@@ -122,17 +78,16 @@ int main(const int argc, char **argv)
     return 1;
   }
 
-  std::shared_ptr<nnfw::tflite::Session> sess;
-
-  if (use_nnapi)
-  {
-    sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
-  }
-  else
+  if (model == nullptr)
   {
-    sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+    throw std::runtime_error{"Cannot create model"};
   }
 
+  auto options = TfLiteInterpreterOptionsCreate();
+  TfLiteInterpreterOptionsSetNumThreads(options, nnfw::misc::EnvVar("THREAD").asInt(1));
+
+  TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
+  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter);
   try
   {
     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) { sess->prepare(); });
@@ -145,27 +100,28 @@ int main(const int argc, char **argv)
 
   if (args.getInputShapes().size() != 0)
   {
-    const int dim_values = args.getInputShapes().size();
-    int offset = 0;
+    const auto dim_values = args.getInputShapes().size();
+    int32_t offset = 0;
 
-    for (const auto &id : interpreter->inputs())
+    auto const input_count = TfLiteInterpreterGetInputTensorCount(interpreter);
+    for (int32_t id = 0; id < input_count; id++)
     {
-      TfLiteTensor *tensor = interpreter->tensor(id);
+      TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(interpreter, id);
       std::vector<int32_t> new_dim;
-      new_dim.resize(tensor->dims->size);
+      new_dim.resize(TfLiteTensorNumDims(tensor));
 
-      for (uint32_t axis = 0; axis < tensor->dims->size; axis++, offset++)
+      for (int32_t axis = 0; axis < TfLiteTensorNumDims(tensor); axis++, offset++)
       {
         new_dim[axis] =
-          ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]);
+          ((offset < dim_values) ? args.getInputShapes()[offset] : TfLiteTensorDim(tensor, axis));
       }
 
-      interpreter->ResizeInputTensor(id, new_dim);
+      TfLiteInterpreterResizeInputTensor(interpreter, id, new_dim.data(), new_dim.size());
 
       if (offset >= dim_values)
         break;
     }
-    interpreter->AllocateTensors();
+    TfLiteInterpreterAllocateTensors(interpreter);
   }
 
   TFLiteRun::TensorLoader tensor_loader(*interpreter);
@@ -176,21 +132,12 @@ int main(const int argc, char **argv)
   {
     if (!args.getInputFilename().empty())
     {
-      tensor_loader.loadRawTensors(args.getInputFilename(), interpreter->inputs());
+      tensor_loader.loadRawInputTensors(args.getInputFilename());
     }
     else
     {
       tensor_loader.loadDumpedTensors(args.getCompareFilename());
     }
-
-    for (const auto &o : interpreter->inputs())
-    {
-      const auto &tensor_view = tensor_loader.get(o);
-      TfLiteTensor *tensor = interpreter->tensor(o);
-
-      memcpy(reinterpret_cast<void *>(tensor->data.f),
-             reinterpret_cast<const void *>(tensor_view._base), tensor->bytes);
-    }
   }
   else
   {
@@ -198,17 +145,18 @@ int main(const int argc, char **argv)
     nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
 
     RandomInputInitializer initializer{randgen};
-    initializer.run(*(interpreter.get()));
+    initializer.run(*interpreter);
   }
 
   TFLiteRun::TensorDumper tensor_dumper;
   // Must be called before `interpreter->Invoke()`
-  tensor_dumper.addTensors(*interpreter, interpreter->inputs());
+  tensor_dumper.addInputTensors(*interpreter);
 
   std::cout << "input tensor indices = [";
-  for (const auto &o : interpreter->inputs())
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(interpreter);
+  for (int32_t idx = 0; idx < input_count; idx++)
   {
-    std::cout << o << ",";
+    std::cout << idx << ",";
   }
   std::cout << "]" << std::endl;
 
@@ -243,14 +191,15 @@ int main(const int argc, char **argv)
   sess->teardown();
 
   // Must be called after `interpreter->Invoke()`
-  tensor_dumper.addTensors(*interpreter, interpreter->outputs());
+  tensor_dumper.addOutputTensors(*interpreter);
 
   std::cout << "output tensor indices = [";
-  for (const auto &o : interpreter->outputs())
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(interpreter);
+  for (int32_t idx = 0; idx < output_count; idx++)
   {
-    std::cout << o << "(";
-
-    print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
+    auto tensor = TfLiteInterpreterGetOutputTensor(interpreter, idx);
+    print_max_idx(reinterpret_cast<float *>(TfLiteTensorData(tensor)),
+                  TfLiteTensorByteSize(tensor) / sizeof(float));
 
     std::cout << "),";
   }
@@ -314,12 +263,13 @@ int main(const int argc, char **argv)
     TfLiteInterpMatchApp app(comparator);
     bool res = true;
 
-    for (const auto &o : interpreter->outputs())
+    for (int32_t idx = 0; idx < output_count; idx++)
     {
-      auto expected = tensor_loader.get(o);
-      auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o);
+      auto expected = tensor_loader.getOutput(idx);
+      auto const tensor = TfLiteInterpreterGetOutputTensor(interpreter, idx);
+      auto obtained = nnfw::tflite::TensorView<float>::make(tensor);
 
-      res = res && app.compareSingleTensorView(expected, obtained, o);
+      res = res && app.compareSingleTensorView(expected, obtained, idx);
     }
 
     if (!res)
diff --git a/tests/tools/tflite_vanilla_run/CMakeLists.txt b/tests/tools/tflite_vanilla_run/CMakeLists.txt
deleted file mode 100644 (file)
index 115b2f3..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-if(NOT BUILD_TFLITE_VANILLA_RUN)
-  return()
-endif()
-
-if(NOT BUILD_TENSORFLOW_LITE_2_8_0)
-  set(BUILD_TENSORFLOW_LITE_2_8_0 ON)
-  set(BUILD_TENSORFLOWRUY ON)
-endif()
-
-nnfw_find_package(TensorFlowLite EXACT 2.8.0 REQUIRED)
-nnfw_find_package(Boost REQUIRED program_options)
-
-list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
-list(APPEND TFLITE_RUN_SRCS "src/args.cc")
-
-add_executable(tflite_vanilla_run ${TFLITE_RUN_SRCS})
-target_include_directories(tflite_vanilla_run PRIVATE src)
-target_include_directories(tflite_vanilla_run PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_vanilla_run tensorflow-lite-2.8.0 ${LIB_PTHREAD} dl)
-target_link_libraries(tflite_vanilla_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(tflite_vanilla_run nnfw_lib_benchmark nnfw_lib_misc)
-
-install(TARGETS tflite_vanilla_run DESTINATION bin)
diff --git a/tests/tools/tflite_vanilla_run/src/args.cc b/tests/tools/tflite_vanilla_run/src/args.cc
deleted file mode 100644 (file)
index dc9f250..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-namespace TFLiteVanillaRun
-{
-
-Args::Args(const int argc, char **argv) noexcept
-{
-  try
-  {
-    Initialize();
-    Parse(argc, argv);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "error during paring args" << e.what() << '\n';
-    exit(1);
-  }
-}
-
-void Args::Initialize(void)
-{
-  try
-  {
-    // General options
-    po::options_description general("General options");
-
-    // clang-format off
-  general.add_options()
-    ("help,h", "Display available options")
-    ("input,i", po::value<std::string>()->default_value(""), "Input filename")
-    ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
-    ("ishapes", po::value<std::vector<int>>()->multitoken(), "Input shapes")
-    ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with")
-    ("tflite", po::value<std::string>()->required())
-    ("num_runs,r", po::value<int>()->default_value(1), "The number of runs")
-    ("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1), "Delay time(ms) between runs (as default no delay")
-    ("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
-    ("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
-    ("write_report,p", po::value<bool>()->default_value(false), "Write report")
-    ("validate", po::value<bool>()->default_value(true), "Validate tflite model")
-    ("verbose_level,v", po::value<int>()->default_value(0), "Verbose level\n"
-         "0: prints the only result. Messages btw run don't print\n"
-         "1: prints result and message btw run\n"
-         "2: prints all of messages to print\n")
-    ;
-    // clang-format on
-
-    _options.add(general);
-    _positional.add("tflite", 1);
-  }
-  catch (const std::bad_cast &e)
-  {
-    std::cerr << "error by bad cast during initialization of boost::program_options" << e.what()
-              << '\n';
-    exit(1);
-  }
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-
-  {
-    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
-      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
-      {
-        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
-                                            "' cannot be given at once.");
-      }
-    };
-
-    conflicting_options("input", "compare");
-  }
-
-  if (vm.count("help"))
-  {
-    std::cout << "tflite_run\n\n";
-    std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
-    std::cout << _options;
-    std::cout << "\n";
-
-    exit(0);
-  }
-
-  po::notify(vm);
-
-  if (vm.count("dump"))
-  {
-    _dump_filename = vm["dump"].as<std::string>();
-  }
-
-  if (vm.count("compare"))
-  {
-    _compare_filename = vm["compare"].as<std::string>();
-  }
-
-  if (vm.count("input"))
-  {
-    _input_filename = vm["input"].as<std::string>();
-
-    if (!_input_filename.empty())
-    {
-      if (access(_input_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "input image file not found: " << _input_filename << "\n";
-      }
-    }
-  }
-
-  if (vm.count("ishapes"))
-  {
-    _input_shapes.resize(vm["ishapes"].as<std::vector<int>>().size());
-    for (auto i = 0; i < _input_shapes.size(); i++)
-    {
-      _input_shapes[i] = vm["ishapes"].as<std::vector<int>>()[i];
-    }
-  }
-
-  if (vm.count("tflite"))
-  {
-    _tflite_filename = vm["tflite"].as<std::string>();
-
-    if (_tflite_filename.empty())
-    {
-      // TODO Print usage instead of the below message
-      std::cerr << "Please specify tflite file. Run with `--help` for usage."
-                << "\n";
-
-      exit(1);
-    }
-    else
-    {
-      if (access(_tflite_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
-        exit(1);
-      }
-    }
-  }
-
-  if (vm.count("num_runs"))
-  {
-    _num_runs = vm["num_runs"].as<int>();
-  }
-
-  if (vm.count("warmup_runs"))
-  {
-    _warmup_runs = vm["warmup_runs"].as<int>();
-  }
-
-  if (vm.count("run_delay"))
-  {
-    _run_delay = vm["run_delay"].as<int>();
-  }
-
-  if (vm.count("gpumem_poll"))
-  {
-    _gpumem_poll = vm["gpumem_poll"].as<bool>();
-  }
-
-  if (vm.count("mem_poll"))
-  {
-    _mem_poll = vm["mem_poll"].as<bool>();
-    // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
-    if (_mem_poll && _warmup_runs == 0)
-    {
-      _warmup_runs = 1;
-    }
-  }
-
-  if (vm.count("write_report"))
-  {
-    _write_report = vm["write_report"].as<bool>();
-  }
-
-  if (vm.count("validate"))
-  {
-    _tflite_validate = vm["validate"].as<bool>();
-  }
-
-  if (vm.count("verbose_level"))
-  {
-    _verbose_level = vm["verbose_level"].as<int>();
-  }
-}
-
-} // end of namespace TFLiteVanillaRun
diff --git a/tests/tools/tflite_vanilla_run/src/args.h b/tests/tools/tflite_vanilla_run/src/args.h
deleted file mode 100644 (file)
index 3605b65..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLITE_VANILLA_RUN_ARGS_H__
-#define __TFLITE_VANILLA_RUN_ARGS_H__
-
-#include <string>
-#include <boost/program_options.hpp>
-
-namespace po = boost::program_options;
-
-namespace TFLiteVanillaRun
-{
-
-class Args
-{
-public:
-  Args(const int argc, char **argv) noexcept;
-  void print(void);
-
-  const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
-  const std::string &getDumpFilename(void) const { return _dump_filename; }
-  const std::string &getCompareFilename(void) const { return _compare_filename; }
-  const std::string &getInputFilename(void) const { return _input_filename; }
-  const std::vector<int> &getInputShapes(void) const { return _input_shapes; }
-  const int getNumRuns(void) const { return _num_runs; }
-  const int getWarmupRuns(void) const { return _warmup_runs; }
-  const int getRunDelay(void) const { return _run_delay; }
-  const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
-  const bool getMemoryPoll(void) const { return _mem_poll; }
-  const bool getWriteReport(void) const { return _write_report; }
-  const bool getModelValidate(void) const { return _tflite_validate; }
-  const int getVerboseLevel(void) const { return _verbose_level; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::positional_options_description _positional;
-  po::options_description _options;
-
-  std::string _tflite_filename;
-  std::string _dump_filename;
-  std::string _compare_filename;
-  std::string _input_filename;
-  std::vector<int> _input_shapes;
-  int _num_runs;
-  int _warmup_runs;
-  int _run_delay;
-  bool _gpumem_poll;
-  bool _mem_poll;
-  bool _write_report;
-  bool _tflite_validate;
-  int _verbose_level;
-};
-
-} // end of namespace TFLiteVanillaRun
-
-#endif // __TFLITE_VANILLA_RUN_ARGS_H__
diff --git a/tests/tools/tflite_vanilla_run/src/tensor_view.h b/tests/tools/tflite_vanilla_run/src/tensor_view.h
deleted file mode 100644 (file)
index ca04a05..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     TensorView.h
- * @brief    This file contains TensorView class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
-#define __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
-
-#include "tensorflow/lite/interpreter.h"
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-#include "misc/tensor/Reader.h"
-#include "misc/tensor/NonIncreasingStride.h"
-
-namespace TFLiteVanillaRun
-{
-
-/**
- * @brief Class to define TensorView which is inherited from nnfw::misc::tensor::Reader<T> class
- */
-template <typename T> class TensorView final : public nnfw::misc::tensor::Reader<T>
-{
-public:
-  /**
-   * @brief Construct a TensorView object with base and shape informations
-   * @param[in] shape The shape of a tensor
-   * @param[in] base The base address of a tensor
-   */
-  TensorView(const nnfw::misc::tensor::Shape &shape, T *base) : _shape{shape}, _base{base}
-  {
-    // Set 'stride'
-    _stride.init(_shape);
-  }
-
-public:
-  /**
-   * @brief Get shape of tensor
-   * @return Reference of shape
-   */
-  const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
-
-public:
-  /**
-   * @brief Get value of tensor index
-   * @param[in] index The tensor index
-   * @return The value at the index
-   */
-  T at(const nnfw::misc::tensor::Index &index) const override
-  {
-    const auto offset = _stride.offset(index);
-    return *(_base + offset);
-  }
-
-public:
-  /**
-   * @brief Get reference value of tensor index
-   * @param[in] index The tensor index
-   * @return The reference value at the index
-   */
-  T &at(const nnfw::misc::tensor::Index &index)
-  {
-    const auto offset = _stride.offset(index);
-    return *(_base + offset);
-  }
-
-private:
-  nnfw::misc::tensor::Shape _shape; /**< The tensor shape */
-
-public:
-  T *_base;                                        /**< The base address of tensor */
-  nnfw::misc::tensor::NonIncreasingStride _stride; /**< The NonIncreasingStride object */
-
-public:
-  // TODO Introduce Operand ID class
-  /**
-   * @brief Create TensorView object using given parameters
-   * @param[in] interp The TfLite interpreter
-   * @param[in] tensor_index The tensor index
-   * @return The new TensorView<T> object
-   */
-  static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index)
-  {
-    auto tensor_ptr = interp.tensor(tensor_index);
-
-    // Set 'shape'
-    nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size);
-
-    for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-    {
-      shape.dim(axis) = tensor_ptr->dims->data[axis];
-    }
-
-    return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index));
-  }
-};
-
-} // namespace TFLiteVanillaRun
-
-#endif // __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
diff --git a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
deleted file mode 100644 (file)
index 6194b45..0000000
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
-
-#include "args.h"
-#include "tensor_view.h"
-#include "misc/EnvVar.h"
-#include "misc/RandomGenerator.h"
-#include "misc/tensor/IndexIterator.h"
-#include "misc/tensor/Object.h"
-#include "benchmark.h"
-
-#include <iostream>
-#include <chrono>
-#include <algorithm>
-#include <vector>
-#include <memory>
-
-using namespace std::placeholders; // for _1, _2 ...
-
-#define TFLITE_ENSURE(exp)                                             \
-  {                                                                    \
-    const TfLiteStatus status = (exp);                                 \
-                                                                       \
-    if (status != kTfLiteOk)                                           \
-    {                                                                  \
-      std::ostringstream ss;                                           \
-      ss << #exp << " failed (" << __FILE__ << ":" << __LINE__ << ")"; \
-      throw std::runtime_error{ss.str()};                              \
-    }                                                                  \
-  }
-
-namespace
-{
-
-void print_max_idx(float *f, int size)
-{
-  float *p = std::max_element(f, f + size);
-  std::cout << "max:" << p - f;
-}
-
-static const char *default_backend_cand = "tflite_cpu";
-
-// Verifies whether the model is a flatbuffer file.
-class BMFlatBufferVerifier : public tflite::TfLiteVerifier
-{
-public:
-  bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
-  {
-
-    flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
-    if (!tflite::VerifyModelBuffer(verifier))
-    {
-      reporter->Report("The model is not a valid Flatbuffer file");
-      return false;
-    }
-    return true;
-  }
-};
-
-} // namespace
-
-int main(const int argc, char **argv)
-{
-  tflite::StderrReporter error_reporter;
-
-  TFLiteVanillaRun::Args args(argc, argv);
-
-  std::chrono::milliseconds t_model_load(0), t_prepare(0);
-
-  // TODO Apply verbose level to phases
-  const int verbose = args.getVerboseLevel();
-  benchmark::Phases phases(
-    benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
-
-  std::unique_ptr<tflite::FlatBufferModel> model;
-  std::unique_ptr<tflite::Interpreter> interpreter;
-  std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
-
-  try
-  {
-    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      if (args.getModelValidate())
-      {
-        model = tflite::FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
-                                                                verifier.get(), &error_reporter);
-      }
-      else
-      {
-        model =
-          tflite::FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
-      }
-      if (model == nullptr)
-      {
-        throw std::runtime_error{"Cannot create model"};
-      }
-
-      // Use tflite's resolver, not onert's one
-      tflite::ops::builtin::BuiltinOpResolver resolver;
-      tflite::InterpreterBuilder builder(*model, resolver);
-      TFLITE_ENSURE(builder(&interpreter))
-      interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
-    });
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << '\n';
-    return 1;
-  }
-
-  const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
-
-  try
-  {
-    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
-      if (use_nnapi)
-      {
-        // TFLite NNAPI is not worked yet
-        interpreter->ModifyGraphWithDelegate(tflite::NnApiDelegate());
-      }
-      interpreter->AllocateTensors();
-    });
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << '\n';
-    return 1;
-  }
-
-  const int seed = 1; /* TODO Add an option for seed value */
-  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-
-  // No input specified. So we fill the input tensors with random values.
-  for (const auto &o : interpreter->inputs())
-  {
-    TfLiteTensor *tensor = interpreter->tensor(o);
-    if (tensor->type == kTfLiteInt32)
-    {
-      // Generate singed 32-bit integer (s32) input
-      auto tensor_view = TFLiteVanillaRun::TensorView<int32_t>::make(*interpreter, o);
-
-      int32_t value = 0;
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             // TODO Generate random values
-             // Gather operation: index should be within input coverage.
-             tensor_view.at(ind) = value;
-             value++;
-           };
-    }
-    else if (tensor->type == kTfLiteUInt8)
-    {
-      // Generate unsigned 8-bit integer input
-      auto tensor_view = TFLiteVanillaRun::TensorView<uint8_t>::make(*interpreter, o);
-
-      uint8_t value = 0;
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             // TODO Generate random values
-             tensor_view.at(ind) = value;
-             value = (value + 1) & 0xFF;
-           };
-    }
-    else if (tensor->type == kTfLiteBool)
-    {
-      // Generate bool input
-      auto tensor_view = TFLiteVanillaRun::TensorView<bool>::make(*interpreter, o);
-
-      auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-        const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-        &nnfw::misc::RandomGenerator::generate<bool>);
-      const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
-                                                  std::bind(fp, randgen, _1, _2));
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-        << [&](const nnfw::misc::tensor::Index &ind) {
-             const auto value = data.at(ind);
-             tensor_view.at(ind) = value;
-           };
-    }
-    else
-    {
-      assert(tensor->type == kTfLiteFloat32);
-
-      const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
-      for (float *ptr = tensor->data.f; ptr < end; ptr++)
-      {
-        *ptr = randgen.generate<float>();
-      }
-    }
-  }
-
-  std::cout << "input tensor indices = [";
-  for (const auto &o : interpreter->inputs())
-  {
-    std::cout << o << ",";
-  }
-  std::cout << "]" << std::endl;
-
-  // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
-  // only warmup.
-  if (verbose == 0)
-  {
-    phases.run(
-      "WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-      args.getWarmupRuns());
-    phases.run(
-      "EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-      args.getNumRuns(), true);
-  }
-  else
-  {
-    phases.run(
-      "WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-      [&](const benchmark::Phase &phase, uint32_t nth) {
-        std::cout << "... "
-                  << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                  << std::endl;
-      },
-      args.getWarmupRuns());
-    phases.run(
-      "EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-      [&](const benchmark::Phase &phase, uint32_t nth) {
-        std::cout << "... "
-                  << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" << std::endl;
-      },
-      args.getNumRuns(), true);
-  }
-
-  std::cout << "output tensor indices = [";
-  for (const auto &o : interpreter->outputs())
-  {
-    std::cout << o << "(";
-
-    print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
-
-    std::cout << "),";
-  }
-  std::cout << "]" << std::endl;
-
-  // TODO Apply verbose level to result
-
-  // prepare result
-  benchmark::Result result(phases);
-
-  // to stdout
-  benchmark::printResult(result);
-
-  if (args.getWriteReport())
-  {
-    // prepare csv task
-    std::string exec_basename;
-    std::string model_basename;
-    std::string backend_name = default_backend_cand;
-    {
-      std::vector<char> vpath(args.getTFLiteFilename().begin(), args.getTFLiteFilename().end() + 1);
-      model_basename = basename(vpath.data());
-      size_t lastindex = model_basename.find_last_of(".");
-      model_basename = model_basename.substr(0, lastindex);
-      exec_basename = basename(argv[0]);
-    }
-    benchmark::writeResult(result, exec_basename, model_basename, backend_name);
-  }
-
-  return 0;
-}
index 34fd49d..4c70547 100644 (file)
@@ -4,20 +4,56 @@
 
 It takes `modelfile` as input and generates `nnpackage`.
 
+## prerequisite
+
+Python 3.5 or greater
+
 ## Usage
 
 ```
-Usage: model2nnpkg.sh [options] modelfile
-Convert modelfile to nnpackage.
+usage:  model2nnpkg.py [options]
+  Examples:
+      model2nnpkg.py -m add.tflite                           => create nnpkg "add" in current directory
+      model2nnpkg.py -o out -m add.tflite                    => create nnpkg "add" in out/
+      model2nnpkg.py -o out -p addpkg -m add.tflite          => create nnpkg "addpkg" in out/
+      model2nnpkg.py -c add.cfg -m add.tflite                => create nnpkg "add" with add.cfg
+      model2nnpkg.py -o out -p addpkg -m a1.tflite a2.tflite -i a1.json a2.json
+        => create nnpkg "addpkg" with models a1.tflite and a2.tflite in out/
+
+
+Convert model files (tflite, circle or tvn) to nnpkg.
+
+options:
+  -h, --help            show this help message and exit
+  -o output_directory, --outdir output_directory
+                        set nnpkg output directory
+  -p nnpkg_name, --nnpkg-name nnpkg_name
+                        set nnpkg output name (default=[1st modelfile name])
+  -c conf [conf ...], --config conf [conf ...]
+                        provide configuration files
+  -m model [model ...], --models model [model ...]
+                        provide model files
+  -i io_info [io_info ...], --io-info io_info [io_info ...]
+                        provide io info
+```
+
+## Usage (To be deprecated)
+```
+Usage: model2nnpkg.sh [options]
+Convert modelfile (tflite, circle or tvn) to nnpackage.
 
 Options:
     -h   show this help
     -o   set nnpackage output directory (default=.)
-    -p   set nnpackage output name (default=[modelfile name])
+    -p   set nnpackage output name (default=[1st modelfile name])
+    -c   provide configuration files
+    -m   provide model files
 
 Examples:
-    model2nnpkg.sh add.tflite                  => create nnpackage 'add' in ./
-    model2nnpkg.sh -o out add.tflite           => create nnpackage 'add' in out/
-    model2nnpkg.sh -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/
+    model2nnpkg.sh -m add.tflite                                              => create nnpackage 'add' in ./
+    model2nnpkg.sh -o out -m add.tflite                                       => create nnpackage 'add' in out/
+    model2nnpkg.sh -o out -p addpkg -m add.tflite                             => create nnpackage 'addpkg' in out/
+    model2nnpkg.sh -c add.cfg -m add.tflite                                   => create nnpackage 'add' with add.cfg
+    model2nnpkg.py -o out -p addpkg -m a1.tflite a2.tflite => create nnpackage "addpkg" with models a1.tflite and a2.tflite in out/
 
 ```
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py
new file mode 100755 (executable)
index 0000000..0f38620
--- /dev/null
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+import shutil
+import sys
+
+
+def _is_json(myjson):
+    try:
+        json.load(myjson)
+    except ValueError as e:
+        return False
+    return True
+
+
+def _verify_args(args):
+    if args.config and len(args.config) != len(args.models):
+        raise Exception(
+            'error: when config file is provided, # of config file should be same with modelfile\n'
+            +
+            "Please provide config file for each model file, or don't provide config file."
+        )
+
+    for i in range(len(args.models)):
+        model_path = args.models[i]
+        if not os.path.isfile(model_path):
+            raise Exception(f'error: {model_path} does not exist.')
+
+        modelfile = os.path.basename(model_path)
+        if len(modelfile.split('.')) == 1:
+            raise Exception(
+                'error: modelfile does not have extension.\n' +
+                "Please provide extension so that $progname can identify what type of model you use."
+            )
+
+        if args.config:
+            config_path = os.path.basename(args.config[i])
+            if not os.path.isfile(config_path):
+                raise Exception(f'error: {config_path} does not exist.')
+
+    # Check each json file
+    for io_info_path in [path for path in (args.io_info or [])]:
+        with open(io_info_path, "r") as io_json:
+            if not _is_json(io_json):
+                raise Exception(
+                    f'error: io info file {io_info_path} is not json file.\n' +
+                    "Please provide json file that so that $progname can identify what inputs/outputs of model you use."
+                )
+
+    # Check size of indices of original model
+    size_inputs = 0
+    size_outputs = 0
+    for model_index, io_info_path in enumerate([path for path in (args.io_info or [])]):
+        with open(io_info_path, "r") as io_json:
+            model_io = json.load(io_json)
+            if model_index == 0:
+                size_inputs = len(model_io["org-model-io"]["inputs"]["new-indices"])
+                size_outputs = len(model_io["org-model-io"]["outputs"]["new-indices"])
+            else:
+                if size_inputs != len(model_io["org-model-io"]["inputs"]["new-indices"]):
+                    raise Exception(
+                        f'error: Invalid size of input indices\n' +
+                        "The size of orginal model's inputs in io info file {io_info_path} is different from the previous files."
+                    )
+                if size_outputs != len(
+                        model_io["org-model-io"]["outputs"]["new-indices"]):
+                    raise Exception(
+                        f'error: Invalid size of output indices.\n' +
+                        "The size of orginal model's outputs in io info file {io_info_path} is different from the previous files."
+                    )
+
+
+def _get_args():
+    parser = argparse.ArgumentParser(
+        description='Convert model files (tflite, circle or tvn) to nnpkg.',
+        usage=''' %(prog)s [options]
+  Examples:
+      %(prog)s -m add.tflite                           => create nnpkg "add" in current directory
+      %(prog)s -o out -m add.tflite                    => create nnpkg "add" in out/
+      %(prog)s -o out -p addpkg -m add.tflite          => create nnpkg "addpkg" in out/
+      %(prog)s -c add.cfg -m add.tflite                => create nnpkg "add" with add.cfg
+      %(prog)s -o out -p addpkg -m a1.tflite a2.tflite -i a1.json a2.json
+        => create nnpkg "addpkg" with models a1.tflite and a2.tflite in out/
+  ''')
+    parser.add_argument(
+        '-o',
+        '--outdir',
+        type=str,
+        default=os.getcwd(),
+        metavar='output_directory',
+        help='set nnpkg output directory')
+    parser.add_argument(
+        '-p',
+        '--nnpkg-name',
+        type=str,
+        metavar='nnpkg_name',
+        help='set nnpkg output name (default=[1st modelfile name])')
+    parser.add_argument(
+        '-c',
+        '--config',
+        type=str,
+        nargs='+',
+        default='',
+        metavar='conf',
+        help='provide configuration files')
+    parser.add_argument(
+        '-m',
+        '--models',
+        type=str,
+        nargs='+',
+        metavar='model',
+        help='provide model files')
+    parser.add_argument(
+        '-i', '--io-info', type=str, nargs='+', metavar='io_info', help='provide io info')
+
+    args = parser.parse_args()
+
+    _verify_args(args)
+
+    if not args.nnpkg_name:
+        first_model_name = os.path.basename(args.models[0]).rsplit('.', 1)[0]
+        args.nnpkg_name = first_model_name
+
+    args.prog = parser.prog
+
+    return args
+
+
+def _get_org_model_input_size(json_path):
+    with open(json_path, "r") as io_json:
+        model_io = json.load(io_json)
+        return len(model_io["org-model-io"]["inputs"]["new-indices"])
+
+
+def _get_org_model_output_size(json_path):
+    with open(json_path, "r") as io_json:
+        model_io = json.load(io_json)
+        return len(model_io["org-model-io"]["outputs"]["new-indices"])
+
+
+def _generate_io_conn_info(io_info_files):
+    ret = {}
+
+    if io_info_files is None:
+        return ret
+
+    pkg_inputs = list(range(_get_org_model_input_size(io_info_files[0])))
+    pkg_outputs = list(range(_get_org_model_output_size(io_info_files[0])))
+
+    org_model_io = []
+    new_model_io = {"inputs": [], "outputs": []}
+    for model_pos, io_info_path in enumerate(io_info_files):
+        with open(io_info_path, "r") as io_json:
+            model_io = json.load(io_json)
+
+            org_model_io.append(model_io["org-model-io"])
+            new_model_io["inputs"].append(model_io["new-model-io"]["inputs"])
+            new_model_io["outputs"].append(model_io["new-model-io"]["outputs"])
+
+    for model_pos in range(len(org_model_io)):
+        # Set pkg-inputs
+        for org_model_input_pos, new_input_index in enumerate(
+                org_model_io[model_pos]["inputs"]["new-indices"]):
+            if new_input_index != -1:
+                for new_model_input_pos, input_index in enumerate(
+                        new_model_io["inputs"][model_pos]["new-indices"]):
+                    if new_input_index == input_index:
+                        pkg_inputs[
+                            org_model_input_pos] = f'{model_pos}:0:{new_model_input_pos}'
+                        break
+
+                if pkg_inputs[org_model_input_pos] == 0:
+                    raise Exception(
+                        f'error: Wrong io information\n' +
+                        "The input index {new_input_index} exists in org-model-io, but not in new-model-io\n"
+                        + "Please check {io_info_files[model_pos]}")
+
+        # Set pkg-outputs
+        for org_model_output_pos, new_output_index in enumerate(
+                org_model_io[model_pos]["outputs"]["new-indices"]):
+            if new_output_index != -1:
+                for new_model_output_pos, output_index in enumerate(
+                        new_model_io["outputs"][model_pos]["new-indices"]):
+                    if new_output_index == output_index:
+                        pkg_outputs[
+                            org_model_output_pos] = f'{model_pos}:0:{new_model_output_pos}'
+                        break
+
+                if pkg_outputs[org_model_output_pos] == 0:
+                    raise Exception(
+                        f'error: Wrong io information\n' +
+                        "The output index {new_output_index} exists in org-model-io, but not in new-model-io\n"
+                        + "Please check {io_info_files[model_pos]}")
+
+    ret["pkg-inputs"] = pkg_inputs
+    ret["pkg-outputs"] = pkg_outputs
+
+    model_connect = {}
+    for input_model_pos, inputs in enumerate(new_model_io["inputs"]):
+        for output_model_pos, outputs in enumerate(new_model_io["outputs"]):
+            if input_model_pos == output_model_pos:
+                continue
+
+            for input_index_pos, org_input_index in enumerate(inputs["org-indices"]):
+                for output_index_pos, org_output_index in enumerate(
+                        outputs["org-indices"]):
+                    if org_input_index == org_output_index:
+                        edge_to = f'{input_model_pos}:0:{input_index_pos}'
+                        edge_from = f'{output_model_pos}:0:{output_index_pos}'
+
+                        if edge_from not in model_connect:
+                            model_connect[edge_from] = [edge_to]
+                        else:
+                            model_connect[edge_from].append(edge_to)
+
+    ret["model-connect"] = [{
+        "from": edge_from,
+        "to": edge_to
+    } for edge_from, edge_to in model_connect.items()]
+
+    return ret
+
+
+def _generate_manifest(args):
+    config_list = [""]
+    if args.config:
+        config_list = [os.path.basename(e) for e in args.config]
+    models_list = [os.path.basename(e) for e in args.models]
+    types_list = [os.path.basename(e).rsplit('.', 1)[1] for e in args.models]
+    io_conn_info = _generate_io_conn_info(args.io_info)
+
+    manifest = {}
+    manifest["major-version"] = "1"
+    manifest["minor-version"] = "2"
+    manifest["patch-version"] = "0"
+    manifest["configs"] = config_list
+    manifest["models"] = models_list
+    manifest["model-types"] = types_list
+    manifest = {**manifest, **io_conn_info}  # Requires python 3.5 or greater
+
+    return manifest
+
+
+def main():
+    try:
+        # parse arguments
+        args = _get_args()
+
+        print(f'{args.prog}: Generating nnpkg {args.nnpkg_name} in {args.outdir}')
+        # mkdir nnpkg directory
+        nnpkg_path = os.path.join(args.outdir, args.nnpkg_name)
+        os.makedirs(os.path.join(nnpkg_path, 'metadata'), exist_ok=True)
+
+        # dump manifest file
+        manifest = _generate_manifest(args)
+        manifest_path = os.path.join(nnpkg_path, 'metadata', 'MANIFEST')
+        with open(manifest_path, "w") as json_file:
+            json_file.write(f'{json.dumps(manifest, indent=2)}\n')
+
+        # copy models and configurations
+        for i in range(len(args.models)):
+            shutil.copy2(args.models[i], nnpkg_path)
+            if args.config:
+                shutil.copy2(args.config[i], os.path.join(nnpkg_path, 'metadata'))
+    except Exception as e:
+        print(e)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
index 5c7c35b..4625c49 100755 (executable)
@@ -47,7 +47,7 @@ while getopts "ho:p:c:m:" OPTION; do
     p) name=$OPTARG;;
     c)
       configs_src=($OPTARG)
-      until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do
+      until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z "$(eval "echo \${$OPTIND}")" ]; do
         if [[ $OPTIND -eq $# ]] && [[ ${#models_src[@]} -eq 0 ]]; then
           # Backward compatibility (will be deprecated)
           # The last remain parameter is model if there is no option "-m"
@@ -60,7 +60,7 @@ while getopts "ho:p:c:m:" OPTION; do
       ;;
     m)
       models_src=($OPTARG)
-      until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do
+      until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z "$(eval "echo \${$OPTIND}")" ]; do
         models_src+=($(eval "echo \${$OPTIND}"))
         OPTIND=$((OPTIND + 1))
       done
@@ -91,7 +91,7 @@ if [[ ${#configs_src[@]} -ne 0 ]] && [[ ${#configs_src[@]} -ne ${#models_src[@]}
 fi
 
 delim=""
-for modelpath in ${models_src[@]}
+for modelpath in "${models_src[@]}"
 do
   modelfile=$(basename "$modelpath")
 
@@ -101,8 +101,8 @@ do
     exit 1
   fi
 
-  if [ ! -e $modelpath ]; then
-    >&2 echo "error: "$modelpath" does not exist."
+  if [ ! -e "$modelpath" ]; then
+    >&2 echo "error: $modelpath does not exist."
     exit 1
   fi
 
@@ -112,12 +112,12 @@ do
 done
 
 delim=""
-for configpath in ${configs_src[@]}
+for configpath in "${configs_src[@]}"
 do
   configfile=$(basename "$configpath")
 
-  if [ ! -e $configpath ]; then
-    >&2 echo "error: "$configpath" does not exist."
+  if [ ! -e "$configpath" ]; then
+    >&2 echo "error: $configpath does not exist."
     exit 1
   fi
 
@@ -130,7 +130,7 @@ if [ -z "$name" ]; then
   name=${first_modelfile%.*}
 fi
 
-echo "$progname: Generating nnpackage "$name" in "$outdir""
+echo "$progname: Generating nnpackage $name in $outdir"
 mkdir -p "$outdir"/"$name"/metadata
 
 cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
@@ -144,12 +144,12 @@ cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
 }
 EOF
 
-for modelpath in ${models_src[@]}
+for modelpath in "${models_src[@]}"
 do
   cp "$modelpath" "$outdir"/"$name"
 done
 
-for configpath in ${configs_src[@]}
+for configpath in "${configs_src[@]}"
 do
   cp "$configpath" "$outdir/$name/metadata"
 done
index 9e5ae29..d35a381 100644 (file)
@@ -1,8 +1,12 @@
 # nncc-tc-to-nnpkg-tc
 
-`model2nnpkg` is a tool to convert model (either `tflite` or `circle`) to `nnpackage`.
+`nncc-tc-to-nnpkg-tc` is a tool to convert nncc testcase to nnpackage testcase.
 
-It takes `modelfile` as input and generates `nnpackage`.
+It takes `nncc-tc` as input and generates `nnpkg-tc`.
+
+## prerequisite
+
+Python 3.5 or greater (for internally using model2nnpkg)
 
 ## Usage
 
index bbc5b3e..4edb4ce 100755 (executable)
@@ -4,7 +4,7 @@ set -eu
 
 progname=$(basename "${BASH_SOURCE[0]}")
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-model2nnpkg=${model2nnpkg:-"$script_dir"/../model2nnpkg/model2nnpkg.sh}
+model2nnpkg=${model2nnpkg:-"$script_dir"/../model2nnpkg/model2nnpkg.py}
 # Need to install nncc package & set path to tf2nnpkg
 tf2nnpkg=$(which tf2nnpkg)
 
@@ -78,7 +78,7 @@ if [[ "$model_type" == "pb" ]]; then
   $tf2nnpkg --info "$indir/$tcname".info --graphdef "$indir/$tcname"."$model_type" \
   "$tf_intf_version" -o "$outdir"
 else
-  $model2nnpkg -o "$outdir" "$indir/$tcname"."$model_type"
+  $model2nnpkg -o "$outdir" -m "$indir/$tcname"."$model_type"
 fi
 
 extensions="
diff --git a/tools/nnpackage_tool/qnf/qnf.md b/tools/nnpackage_tool/qnf/qnf.md
new file mode 100644 (file)
index 0000000..ee3a88d
--- /dev/null
@@ -0,0 +1,35 @@
+# qnf
+
+`qnf` is a tool to convert between quantized and float.
+
+It gets quantization parameters from input circle file.
+
+## Prerequisite
+
+$ python -r requirements.txt
+
+## Usage
+
+```
+$ ./qnf.py -h
+$ python tools/nnpackage_tool/qnf/qnf.py -h
+usage: qnf.py [-h] [-o OUT_DIR] [-q | -d] h5 circle
+
+positional arguments:
+  h5                    path to h5 file either input or output to model
+  circle                path to quantized circle model
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -o OUT_DIR, --output OUT_DIR
+                        output directory
+  -q, --quantize        quantize f32 to q8u using circle input's qparam
+                        (default: false)
+  -d, --dequantize      dequantize q8u to f32 using circle output's qparam
+                        (default: false)
+
+Examples:
+  qnf.py -q input.h5 0c/0.circle    => generated quantized input as input_.h5
+  qnf.py -d output.h5 0c/0.circle   => generated dequantized output as output_.h5
+  qnf.py -o out/out.h5 -d output.h5 0c/0.circle   => generated dequantized output in out/output.h5
+```
diff --git a/tools/nnpackage_tool/qnf/qnf.py b/tools/nnpackage_tool/qnf/qnf.py
new file mode 100644 (file)
index 0000000..0e02771
--- /dev/null
@@ -0,0 +1,147 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+from circle_schema import circle
+import h5py
+import numpy as np
+import os
+import sys
+
+h5dtypes = {
+    "float32": ">f4",
+    "uint8": "u1",
+    "int8": "i1",
+    "bool": "u1",
+    "int32": "int32",
+    "int64": "int64"
+}
+
+
+def quantize(h5_path, circle_path, h5_out_path):
+    with open(circle_path, 'rb') as f:
+        graph = circle.Model.GetRootAsModel(f.read(), 0).Subgraphs(0)
+    input_tensor = graph.Tensors(graph.Inputs(0))
+    input_names = [input_tensor.Name()]
+
+    with h5py.File(h5_path, 'r') as hf:
+        dset = hf['/value/0']
+        arr = np.array(dset)
+
+    if not np.issubdtype(arr.dtype,
+                         np.float32) or input_tensor.Type() != circle.TensorType.UINT8:
+        print("Not f32 to q8u")
+        sys.exit(-1)
+
+    # copied from python-tools/examples/pytorch_tutorial/main.py
+    dtype = 'uint8'
+
+    def _quantize_input0(data):
+        qparam = graph.Tensors(graph.Inputs(0)).Quantization()
+        rescaled_data = data / qparam.ScaleAsNumpy()[0] + qparam.ZeroPointAsNumpy()[0]
+        return np.round(rescaled_data).clip(np.iinfo(dtype).min,
+                                            np.iinfo(dtype).max).astype(dtype)
+
+    qarr = _quantize_input0(arr)
+
+    ensure_output_dir(h5_out_path)
+    with h5py.File(h5_out_path, 'w') as hf:
+        name_grp = hf.create_group("name")
+        val_grp = hf.create_group("value")
+        idx = 0
+        val_grp.create_dataset(str(idx), data=qarr, dtype=h5dtypes[dtype])
+        name_grp.attrs[str(idx)] = input_names[0]
+
+
+def dequantize(h5_path, circle_path, h5_out_path):
+    with open(circle_path, 'rb') as f:
+        graph = circle.Model.GetRootAsModel(f.read(), 0).Subgraphs(0)
+    output_tensor = graph.Tensors(graph.Outputs(0))
+    output_names = [output_tensor.Name()]
+
+    with h5py.File(h5_path, 'r') as hf:
+        dset = hf['/value/0']
+        arr = np.array(dset)
+    if not np.issubdtype(arr.dtype,
+                         np.uint8) or output_tensor.Type() != circle.TensorType.UINT8:
+        print("Not q8u to f32")
+        sys.exit(-1)
+
+    # copied from python-tools/examples/pytorch_tutorial/main.py
+    def _dequantize_output0(data):
+        qparam = graph.Tensors(graph.Outputs(0)).Quantization()
+        return (data.astype(np.float32) -
+                qparam.ZeroPointAsNumpy()[0]) * qparam.ScaleAsNumpy()[0]
+
+    qarr = _dequantize_output0(arr)
+
+    ensure_output_dir(h5_out_path)
+    with h5py.File(h5_out_path, 'w') as hf:
+        name_grp = hf.create_group("name")
+        val_grp = hf.create_group("value")
+        idx = 0
+        val_grp.create_dataset(str(idx), data=qarr, dtype='>f4')
+        name_grp.attrs[str(idx)] = output_names[0]
+
+
+def makeArgParser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'h5', type=str, help='path to h5 file either input or output to model')
+    parser.add_argument('circle', type=str, help='path to quantized circle model')
+    parser.add_argument(
+        '-o', '--output', action='store', dest="out_path", help="output file")
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument(
+        '-q',
+        '--quantize',
+        action='store_true',
+        help="quantize f32 to q8u using circle input's qparam (default: false)")
+    group.add_argument(
+        '-d',
+        '--dequantize',
+        action='store_true',
+        help="dequantize q8u to f32 using circle output's qparam (default: false)")
+    return parser
+
+
+def parseArgs():
+    args = parser.parse_args()
+    return args
+
+
+def ensure_output_dir(out_path):
+    if os.path.dirname(out_path):
+        os.makedirs(os.path.dirname(out_path), exist_ok=True)
+
+
+if __name__ == '__main__':
+    parser = makeArgParser()
+    args = parseArgs()
+
+    h5_path, circle_path = args.h5, args.circle
+
+    if args.out_path:
+        out_path = args.out_path
+    else:
+        h5_name, ext = os.path.splitext(h5_path)
+        out_path = h5_name + '_' + ext
+
+    if args.quantize:
+        quantize(h5_path, circle_path, out_path)
+
+    if args.dequantize:
+        dequantize(h5_path, circle_path, out_path)
diff --git a/tools/nnpackage_tool/qnf/requirements.txt b/tools/nnpackage_tool/qnf/requirements.txt
new file mode 100644 (file)
index 0000000..b018b6e
--- /dev/null
@@ -0,0 +1,3 @@
+numpy>=1.18.5
+circle-schema>=0.4.0.dev0
+h5py>=2.10.0
index 56bff45..bc7b0b9 100755 (executable)
@@ -73,7 +73,7 @@ tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py $name.$suffix.json.f
 ${flatc} -o ./ -b ${circle_schema} $name.$suffix.json.fused.datalayout
 mv $name.$suffix.json.fused.circle $name.$suffix.circle
 tools/nnpackage_tool/gen_golden/gen_golden.py $name.$suffix.pb
-tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -o ${outdir} $name.$suffix.circle
+tools/nnpackage_tool/model2nnpkg/model2nnpkg.py -o ${outdir} -m $name.$suffix.circle
 mkdir -p ${outdir}/$name.$suffix/metadata/tc
 mv {input,expected}.h5 ${outdir}/$name.$suffix/metadata/tc/
 mv $name.$suffix.{pb,tflite} ${outdir}/$name.$suffix/
index 9f28eba..f425266 100644 (file)
@@ -41,6 +41,6 @@ nnpkg-tcs/cast
         └── input.h5
 
 # @ target
-# run nnpkg with nnpackage_run and compare with h5diff
+# run nnpkg with onert_run and compare with h5diff
 $ onert/test/onert-test nnpkg-test -i nnpkg-tcs cast
 ```
index 8aea53c..1f0569b 100755 (executable)
@@ -49,7 +49,7 @@ tflite_basename=$(basename "$1")
 name=${tflite_basename%.*}
 
 tools/nnpackage_tool/gen_golden/gen_golden.py $1
-tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -o ${outdir} $1
+tools/nnpackage_tool/model2nnpkg/model2nnpkg.py -o ${outdir} -m $1
 mkdir -p ${outdir}/$name/metadata/tc
 mv {input,expected}.h5 ${outdir}/$name/metadata/tc/
 cp $1 ${outdir}/$name/
index 85d999b..5a0e86e 100644 (file)
@@ -2,7 +2,7 @@ This folder contains the necessary scripts to perform a pareto front estimation
 
 The contents of the folder can be categorized into the following groups:
 
-- [Generator scripts to map decision variables to `nnpackage_run` parameters](#mapping-decision-to-parameters)
+- [Generator scripts to map decision variables to `onert_run` parameters](#mapping-decision-to-parameters)
 - [Estimator scripts to compute pareto front](#pareto-estimation)
 
 The following subsections describe the role of each script in detail.
@@ -10,7 +10,7 @@ The following subsections describe the role of each script in detail.
 ## Mapping Decision to Parameters
 The generator script `gen_oplist.py` is located under `generator` folder, and encodes large integer representations for `nnpackage` backend assignments. Effectively, it maps suitable backend assignments to integer values. For example, a graph with only three operations and two backends will have a integer representation in the range `(0, 7)`. Thus a value `0` might imply all operations run on `cpu`, while `7` might imply that all operations run on `acl_cl` backend. As will be described below, the integer representation of `nnpackage` parameters serves as a convenient decision space for pareto estimation.
 
-Setting up parameters for `nnpackage_run` requires a knowledge of model-specific operations. To this end, the `gen_oplist.py` script generates for each model, a `oplist` of unique operations. If an exhaustive mapping of backends to operation sequences is preferred, then `gen_oplist.py` also generates a so-called `opmap` list for uniquely observed `<operation name, data size>` pairs. 
+Setting up parameters for `onert_run` requires a knowledge of model-specific operations. To this end, the `gen_oplist.py` script generates for each model, a `oplist` of unique operations. If an exhaustive mapping of backends to operation sequences is preferred, then `gen_oplist.py` also generates a so-called `opmap` list for uniquely observed `<operation name, data size>` pairs.
 
 `gen_oplist.py` is run on the development environment (read: *Desktop PC*) as shown below:
 ```
@@ -31,26 +31,26 @@ python brute_force_profiler.py <model> <target> <run_folder> [--dumpfile=<filena
 For details, type `python brute_force_profiler.py --help`. Below is a example of the dump generated by the brute-force profiler:
 
 ```
-{"oplist": ["Pool2D", "BinaryArithmetic", "DepthwiseConv2D", "Conv2D", "Reshape"], 
+{"oplist": ["Pool2D", "BinaryArithmetic", "DepthwiseConv2D", "Conv2D", "Reshape"],
   "solutions": [
-    {"memory": 56388, "id": 0, "time": 72.525}, 
-    {"memory": 63624, "id": 1, "time": 86.532}, 
-    {"memory": 64320, "id": 2, "time": 69.352}, 
-    {"memory": 65376, "id": 3, "time": 76.436}, 
-    {"memory": 73016, "id": 4, "time": 69.634}, 
-    {"memory": 73492, "id": 5, "time": 47.013}, 
-    {"memory": 74488, "id": 6, "time": 95.01}, 
-    {"memory": 74844, "id": 7, "time": 111.329}, 
-    {"memory": 393324, "id": 8, "time": 98.956}, 
-    {"memory": 395088, "id": 9, "time": 103.24}, 
-    {"memory": 396180, "id": 10, "time": 68.107}, 
-    {"memory": 395932, "id": 11, "time": 86.109}, 
-    {"memory": 402468, "id": 12, "time": 25.477}, 
-    {"memory": 402800, "id": 13, "time": 25.42}, 
-    {"memory": 403904, "id": 14, "time": 9.168}, 
-    {"memory": 404476, "id": 15, "time": 7.801}, 
+    {"memory": 56388, "id": 0, "time": 72.525},
+    {"memory": 63624, "id": 1, "time": 86.532},
+    {"memory": 64320, "id": 2, "time": 69.352},
+    {"memory": 65376, "id": 3, "time": 76.436},
+    {"memory": 73016, "id": 4, "time": 69.634},
+    {"memory": 73492, "id": 5, "time": 47.013},
+    {"memory": 74488, "id": 6, "time": 95.01},
+    {"memory": 74844, "id": 7, "time": 111.329},
+    {"memory": 393324, "id": 8, "time": 98.956},
+    {"memory": 395088, "id": 9, "time": 103.24},
+    {"memory": 396180, "id": 10, "time": 68.107},
+    {"memory": 395932, "id": 11, "time": 86.109},
+    {"memory": 402468, "id": 12, "time": 25.477},
+    {"memory": 402800, "id": 13, "time": 25.42},
+    {"memory": 403904, "id": 14, "time": 9.168},
+    {"memory": 404476, "id": 15, "time": 7.801},
 ....
-    {"memory": 403940, "id": 30, "time": 9.145}, 
+    {"memory": 403940, "id": 30, "time": 9.145},
     {"memory": 403568, "id": 31, "time": 8.034}]}
 ```
 
@@ -71,21 +71,21 @@ After profiling, the results can be viewed under the filename provided by the `-
 
 ```
 {"configs": {
-  "4": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=cpu ", 
-  "10": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ", 
-  "14": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ", 
-  "16": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ", 
-  "20": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ", 
-  "21": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ", 
-  "31": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=acl_cl "}, 
-  "oplist": ["Pool2D", "DepthwiseConv2D", "Reshape", "Conv2D", "BinaryArithmetic"], 
+  "4": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=cpu ",
+  "10": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
+  "14": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
+  "16": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+  "20": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+  "21": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+  "31": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=acl_cl "},
+  "oplist": ["Pool2D", "DepthwiseConv2D", "Reshape", "Conv2D", "BinaryArithmetic"],
   "solutions": [
-    {"exec_time": 76.138, "max_rss": 62712, "id": 4}, 
-    {"exec_time": 72.719, "max_rss": 65272, "id": 16}, 
-    {"exec_time": 22.409, "max_rss": 403120, "id": 14}, 
-    {"exec_time": 28.138, "max_rss": 403064, "id": 10}, 
-    {"exec_time": 70.656, "max_rss": 65536, "id": 20}, 
-    {"exec_time": 68.805, "max_rss": 66076, "id": 21}, 
+    {"exec_time": 76.138, "max_rss": 62712, "id": 4},
+    {"exec_time": 72.719, "max_rss": 65272, "id": 16},
+    {"exec_time": 22.409, "max_rss": 403120, "id": 14},
+    {"exec_time": 28.138, "max_rss": 403064, "id": 10},
+    {"exec_time": 70.656, "max_rss": 65536, "id": 20},
+    {"exec_time": 68.805, "max_rss": 66076, "id": 21},
     {"exec_time": 8.201, "max_rss": 404656, "id": 31}], "mode": "name"}
 ```
 **Note**: The pareto-estimation algorithms require the use of python `numpy` package, so make sure to install it beforehand.
index 9516fc3..f30a798 100644 (file)
@@ -8,7 +8,7 @@ from utils import progressbar
 
 if __name__ == "__main__":
     parser = ProfileArgs(
-        prog="brute_force_profiler.py", description="Profiles nnpackage_run using oplist")
+        prog="brute_force_profiler.py", description="Profiles onert_run using oplist")
     # Parse arguments
     args = parser.parse_args()
     modelfile = args.model
index c4e019d..4690d12 100644 (file)
@@ -22,7 +22,7 @@ class ProfileArgs(argparse.ArgumentParser):
         super(ProfileArgs, self).__init__(args, kwargs)
         self.add_argument(
             'model', type=str, default=None, help='nnpackage name with path')
-        self.add_argument('run_folder', type=str, help="path to nnpackage_run executable")
+        self.add_argument('run_folder', type=str, help="path to onert_run executable")
         self.add_argument(
             '--mode',
             type=str.lower,
index d2b66d6..05532e3 100644 (file)
@@ -104,8 +104,8 @@ class Runner:
 
     def run_inference(self, solution):
         cmd_str = [
-            ". /tmp/envvars.sh && " + self._run_folder + "/nnpackage_run -w1 -r1 -m1 -l "
-            self._model + "/metadata/tc/input.h5 " + self._model + " 2> /dev/null"
+            ". /tmp/envvars.sh && " + self._run_folder + "/onert_run -w1 -r1 -m1 -l " +
+            self._model + "/metadata/tc/input.h5 " + self._model + " 2> /dev/null"
         ]
         res = exec_shell(cmd_str, newline_split=True)
         try:
index d630211..6e7e353 100644 (file)
@@ -66,7 +66,7 @@ class RemoteSSH():
              self.remote(remote_trace_path), local_trace_path])
 
     def profile_backend(self, backend, backend_op_list):
-        nnpkg_run_path = self.base_dir / 'out/bin/nnpackage_run'
+        nnpkg_run_path = self.base_dir / 'out/bin/onert_run'
         nnpkg_path = self.base_dir / self.nnpkg_dir.name
 
         cmd = ["ssh", f"{self.host}"]
index 9253fd1..200efb4 100644 (file)
@@ -88,6 +88,55 @@ opcodelist.txt test.tflite -g 1
 
 Above selects operator index 11, 12, 13 in subgraph 1
 
+### Generating separate models for multi-model from a model file by using model generator here
+
+To make one model multi-model, separate models and inputs/outputs information of each model are required.
+So run model generator with the option `--store-io-info`
+
+#### How to use
+
+```
+./select_operator.py <base model file> <opcode list txt file> <output file name> --store-io-info <output json file name>
+```
+
+#### Example
+
+This example generates one model into two separate models.
+
+```
+$ cat 0-26.txt
+0-26
+
+$ cat 27-30.txt
+27-30
+
+$ ./tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite 0-26.txt m1.tflite --store-io-info m1.json
+Input tensor(s): [81]
+Output tensor(s): [44]
+Append subgraphs, orginal index :  0 , new index :  0
+
+$ ./tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite 27-30.txt m2.tflite --store-io-info m2.json
+Input tensor(s): [6]
+Output tensor(s): [7]
+Append subgraphs, orginal index :  0 , new index :  0
+
+$ cat m1.json
+{"org-model-io": {"inputs": {"new-indices": [81]}, "outputs": {"new-indices": [-1]}}, "new-model-io": {"inputs": {"org-indices": [88], "new-indices": [81]}, "outputs": {"org-indices": [50], "new-indices": [44]}}}
+
+$ cat m2.json
+{"org-model-io": {"inputs": {"new-indices": [-1]}, "outputs": {"new-indices": [7]}}, "new-model-io": {"inputs": {"org-indices": [50], "new-indices": [6]}, "outputs": {"org-indices": [87], "new-indices": [7]}}}
+
+```
+The meaning of `m1.json` above is as follows:
+- original model has 1 input and 1 output
+  - The only input is located at tensors[81] from new model.
+  - The only output has new-index -1, which means it is not in new-model.
+- new-model has 1 input and 1 output
+  - The only input was located at tensors[88] from org model, and it is located at tensors[81] from new model.
+  - The only output was located at tensors[50] from org model, and it is located at tensors[44] from new model.
+
+With the model files and inputs/outputs infomation files generated above, you can use `model2nnpkg.py` to create nnpkg for multi-model.
+
 ## Colaboration model parser and model generator
 
 1. Get imformation about base model using model parser
index afd6a27..5eb35e6 100755 (executable)
@@ -54,7 +54,7 @@ def GetTypeSize(type_name):
 
 
 TYPE_TO_NPTYPE = {
-    'BOOL': np.bool,
+    'BOOL': np.bool_,
     'COMPLEX64': np.cdouble,
     'FLOAT16': np.float16,
     'FLOAT32': np.float32,
index fdef38c..5ec2b8b 100755 (executable)
@@ -23,6 +23,7 @@ import tflite.SubGraph
 import tflite.BuiltinOptions
 import argparse
 import pkg_resources
+import json
 
 
 # On flatbuffers 2.0, EndVector doesn't require length argument any more.
@@ -1142,10 +1143,10 @@ def GenerateSubgraphs(args, new_builder, sample_model, operator_list, new_input_
     # The selected subgraph will be primary subgraph of the model to be created newly
     selected_subgraph = sample_model.Subgraphs(args.subgraph)
 
-    # k: old subg index, v: new subg index
+    # k: orginal subg index, v: new subg index
     # new subg index is sequential in used_subgraphs_dic
     for k, v in used_subgraphs_dic.items():
-        print("Append subgraphs, old index : ", k, ", new index : ", v)
+        print("Append subgraphs, orginal index : ", k, ", new index : ", v)
         if k == args.subgraph:
             assert v == 0
             new_subgraph = GenerateSubgraph(new_builder, selected_subgraph, operator_list,
@@ -1253,6 +1254,55 @@ def GenerateModel(args, new_builder, sample_model, operator_list, new_input_tens
     return tflite.Model.ModelEnd(new_builder)
 
 
+def StoreIOInfo(path, used_tensors, org_inputs, org_outputs, new_inputs, new_outputs):
+    ioinfo = {}
+
+    # For inputs and outputs of org model
+    ioinfo["org-model-io"] = {
+        "inputs": {
+            "new-indices": []
+        },
+        "outputs": {
+            "new-indices": []
+        }
+    }
+    for input_tensor_idx in org_inputs:
+        if input_tensor_idx in used_tensors:
+            ioinfo["org-model-io"]["inputs"]["new-indices"].append(
+                used_tensors[input_tensor_idx])
+        else:
+            ioinfo["org-model-io"]["inputs"]["new-indices"].append(-1)
+    for output_tensor_idx in org_outputs:
+        if output_tensor_idx in used_tensors:
+            ioinfo["org-model-io"]["outputs"]["new-indices"].append(
+                used_tensors[output_tensor_idx])
+        else:
+            ioinfo["org-model-io"]["outputs"]["new-indices"].append(-1)
+
+    # For inputs and outputs of new model
+    ioinfo["new-model-io"] = {
+        "inputs": {
+            "org-indices": [],
+            "new-indices": []
+        },
+        "outputs": {
+            "org-indices": [],
+            "new-indices": []
+        }
+    }
+    for input_tensor_idx in new_inputs:
+        ioinfo["new-model-io"]["inputs"]["org-indices"].append(int(input_tensor_idx))
+        ioinfo["new-model-io"]["inputs"]["new-indices"].append(
+            used_tensors[input_tensor_idx])
+    for output_tensor_idx in new_outputs:
+        ioinfo["new-model-io"]["outputs"]["org-indices"].append(int(output_tensor_idx))
+        ioinfo["new-model-io"]["outputs"]["new-indices"].append(
+            used_tensors[output_tensor_idx])
+
+    with open(path, "w") as json_file:
+        json_file.write(f'{json.dumps(ioinfo, indent=2)}\n')
+
+
 def main(args):
     input_model_file = args.input_model
     oplist_file = args.opcode_list
@@ -1332,6 +1382,7 @@ def main(args):
             if tensor.Buffer() != 0:
                 used_buffers.append(tensor.Buffer())
 
+    used_buffers = list(set(used_buffers))
     used_buffers.sort()
 
     # Assign new index for operator
@@ -1408,6 +1459,12 @@ def main(args):
 
     output_model_file.write(new_buf)
 
+    org_inputs = sample_subgraph.InputsAsNumpy()
+    org_outputs = sample_subgraph.OutputsAsNumpy()
+    if args.store_io_info != "":
+        StoreIOInfo(args.store_io_info, used_tensors_dic, org_inputs, org_outputs,
+                    new_input_tensors, new_output_tensors)
+
 
 if __name__ == '__main__':
     # Define argument and read
@@ -1424,6 +1481,13 @@ if __name__ == '__main__':
         "output_model", type=argparse.FileType('wb'), help="output tflite model file")
     arg_parser.add_argument(
         '-g', '--subgraph', type=int, default=0, help="subgraph to use (default: 0)")
+    arg_parser.add_argument(
+        '-s',
+        '--store-io-info',
+        type=str,
+        required=False,
+        default="",
+        help="Path to io information to be stored")
 
     # TODO
     #   Select multiple subgraph