From: Chunseok Lee Date: Mon, 14 Dec 2020 05:43:43 +0000 (+0900) Subject: Imported Upstream version 1.12.0 X-Git-Tag: upstream/1.12.0^0 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=62529acabbafce7730601ed01d5709d7bc0d378a;p=platform%2Fcore%2Fml%2Fnnfw.git Imported Upstream version 1.12.0 --- diff --git a/.clang-format b/.clang-format index 7dcf11c..5699ccf 100644 --- a/.clang-format +++ b/.clang-format @@ -23,16 +23,16 @@ BinPackParameters: true BraceWrapping: AfterClass: true AfterControlStatement: true - AfterEnum: false + AfterEnum: true AfterFunction: true - AfterNamespace: false + AfterNamespace: true AfterObjCDeclaration: false AfterStruct: true - AfterUnion: true + AfterUnion: false BeforeCatch: true BeforeElse: true IndentBraces: false -BreakBeforeBraces: Allman +BreakBeforeBraces: Custom BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BreakAfterJavaFieldAnnotations: false diff --git a/.clang-format.8 b/.clang-format.8 new file mode 100644 index 0000000..d2db976 --- /dev/null +++ b/.clang-format.8 @@ -0,0 +1,92 @@ +Language: Cpp +BasedOnStyle: Google +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignEscapedNewlinesLeft: true +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: true + AfterUnion: false + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 2 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: false +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: false +SortUsingDeclarations: false +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 2 +UseTab: Never diff --git a/compiler/bcq-tools/generate_bcq_output_arrays b/compiler/bcq-tools/generate_bcq_output_arrays index b71a374..8544bbd 100644 --- a/compiler/bcq-tools/generate_bcq_output_arrays +++ b/compiler/bcq-tools/generate_bcq_output_arrays @@ -112,128 +112,22 @@ def print_bcqinfo_output_arrays_v1(flags): if infoname == "bcqinfo_dequant_weight": has_dequant_weight = True - # Ideal situation is that the user nodes of BCQ applicable constant nodes - # are BCQ applicable operations such as MatMul, GatherV2, etc. - # However, operations which do not change original values such as - # Ideneity or Transpose can exist between them. In view of TensorFlow Lite, - # real user nodes of BCQ applicable constant nodes must be found first. - # This work is done by BFS search with queue. - - prefix_node_dict = {} # key : prefix / value : list of candidates - matmul_node_prefix_dict = {} # key : Name of MatMul node / value : prefix - - queue_prefix = list(prefix_set) - queue_nodename = [queue_prefix[idx] + ":0" for idx in range(len(queue_prefix))] - - while len(queue_prefix) > 0: - prefix = queue_prefix.pop(0) - nodename = queue_nodename.pop(0) - if prefix not in prefix_node_dict.keys(): - prefix_node_dict[prefix] = [] - - # Usually, output name of op is like "outputname:0" - # -2 is for removing ":0" - for op in ops: - if op.type == "MatMul" and (op.inputs[0].name == nodename - or op.inputs[1].name == nodename): - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - matmul_node_prefix_dict[op.outputs[0].name[:-2]] = prefix - elif op.type == "Einsum" and (op.inputs[0].name == nodename - or op.inputs[1].name == nodename): - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - elif op.type == "GatherV2" and op.inputs[0].name == nodename: - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - elif len(op.outputs) == 1: - for i in range(len(op.inputs)): - if op.inputs[i].name == nodename: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - break - - # When TensorFlow model is converted to TensorFlow Lite model, - # more than one operation can be fused as one. - # For example, MatMul + BiasAdd + ReLU in TensorFlow can be fused as - # one FullyConnected in TensorFlow Lite. - # It means that even real user nodes of BCQ applicable constant nodes - # in TensorFlow are found, they may be real user nodes in TensorFlow Lite. - # Therefore additional candidates of real user nodes should be found either. - # Finding additional candidates is done by BFS search with queue. - - fuseop_prefix_dict = {} # key : Candidate operation / Value : prefix - - # These ops can be candidate. However other candidates may exists after these ops. - mark_type = ["Add", "AddV2", "BiasAdd", "Reshape", "Transpose"] - - # These ops can be candidate. And no more candidates will be found after these ops. - mark_and_stop_type = ["Relu", "Relu6", "Tanh"] - - # These ops cannot be candidates but other candidates may exists after these ops. - # NOTE : Some of following ops may be removed from the list but not sure for now. - pass_type = [ - "BatchToSpaceND", "Cast", "DepthToSpace", "ExpandDims", "ResizeBilinear", - "ResizeNearestNeighbor", "ScatterNd", "SpaceToBatchND", "SpaceToDepth", "Squeeze", - "Identity", "Pack", "Unpack", "Stack" - ] - - queue_prefix = list(matmul_node_prefix_dict.values()) - queue_nodename = [matmul + ":0" for matmul in matmul_node_prefix_dict.keys()] - - visited_nodes = set(queue_nodename) - while len(queue_prefix) > 0: - prefix = queue_prefix.pop(0) - nodename = queue_nodename.pop(0) - - # Usually, output name of op is like "outputname:0" - # -2 is for removing ":0" - for op in ops: - for i in range(len(op.inputs)): - if nodename == op.inputs[i].name: - if op.type in mark_type: - if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys(): - fuseop_prefix_dict[op.outputs[0].name[:-2]] = set() - fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix) - if op.outputs[0].name not in visited_nodes: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - visited_nodes.add(op.outputs[0].name) - elif op.type in mark_and_stop_type: - if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys(): - fuseop_prefix_dict[op.outputs[0].name[:-2]] = set() - fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix) - elif op.type in pass_type and op.outputs[0].name not in visited_nodes: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - visited_nodes.add(op.outputs[0].name) - # Write the name of metadata node with open(flags.metadata_path, 'w') as f_metadata: f_metadata.write("one_compiler/bcqinfo_one_metadata,") - # Write all pairs of candidate operations and related BCQ information nodes. + # Write all pairs of a constant node and related BCQ information nodes. with open(flags.output_arrays_path, 'w') as f_arrays: for prefix in prefix_set: - for fusable_op in prefix_node_dict[prefix]: - f_arrays.write("," + prefix + "/bcqinfo_do_w_x") - f_arrays.write("," + prefix + "/bcqinfo_alpha") - f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code") - f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters") - f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters") - f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters") - f_arrays.write("," + fusable_op) - if has_dequant_weight: - f_arrays.write("," + prefix + "/bcqinfo_dequant_weight") - for fuseop in fuseop_prefix_dict.keys(): - if len(fuseop_prefix_dict[fuseop]) == 1: - prefix = fuseop_prefix_dict[fuseop].pop() - f_arrays.write("," + prefix + "/bcqinfo_do_w_x") - f_arrays.write("," + prefix + "/bcqinfo_alpha") - f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code") - f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters") - f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters") - f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters") - f_arrays.write("," + fuseop) - if has_dequant_weight: - f_arrays.write("," + prefix + "/bcqinfo_dequant_weight") + f_arrays.write("," + prefix + "/bcqinfo_do_w_x") + f_arrays.write("," + prefix + "/bcqinfo_alpha") + f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code") + f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters") + f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters") + f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters") + f_arrays.write("," + prefix) + if has_dequant_weight: + f_arrays.write("," + prefix + "/bcqinfo_dequant_weight") def print_bcq_output_arrays(flags): diff --git a/compiler/bcq-tools/generate_bcq_output_arrays.py b/compiler/bcq-tools/generate_bcq_output_arrays.py index 0cc1318..5d9fbe6 100644 --- a/compiler/bcq-tools/generate_bcq_output_arrays.py +++ b/compiler/bcq-tools/generate_bcq_output_arrays.py @@ -81,129 +81,23 @@ def get_bcqinfo_output_arrays_v1(input_path, output_arrays): if infoname == "bcqinfo_dequant_weight": has_dequant_weight = True - # Ideal situation is that the user nodes of BCQ applicable constant nodes - # are BCQ applicable operations such as MatMul, GatherV2, etc. - # However, operations which do not change original values such as - # Ideneity or Transpose can exist between them. In view of TensorFlow Lite, - # real user nodes of BCQ applicable constant nodes must be found first. - # This work is done by BFS search with queue. - - prefix_node_dict = {} # key : prefix / value : list of candidates - matmul_node_prefix_dict = {} # key : Name of MatMul node / value : prefix - - queue_prefix = list(prefix_set) - queue_nodename = [queue_prefix[idx] + ":0" for idx in range(len(queue_prefix))] - - while len(queue_prefix) > 0: - prefix = queue_prefix.pop(0) - nodename = queue_nodename.pop(0) - if prefix not in prefix_node_dict.keys(): - prefix_node_dict[prefix] = [] - - # Usually, output name of op is like "outputname:0" - # -2 is for removing ":0" - for op in ops: - if op.type == "MatMul" and (op.inputs[0].name == nodename - or op.inputs[1].name == nodename): - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - matmul_node_prefix_dict[op.outputs[0].name[:-2]] = prefix - elif op.type == "Einsum" and (op.inputs[0].name == nodename - or op.inputs[1].name == nodename): - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - elif op.type == "GatherV2" and op.inputs[0].name == nodename: - prefix_node_dict[prefix].append(op.outputs[0].name[:-2]) - elif len(op.outputs) == 1: - for i in range(len(op.inputs)): - if op.inputs[i].name == nodename: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - break - - # When TensorFlow model is converted to TensorFlow Lite model, - # more than one operation can be fused as one. - # For example, MatMul + BiasAdd + ReLU in TensorFlow can be fused as - # one FullyConnected in TensorFlow Lite. - # It means that even real user nodes of BCQ applicable constant nodes - # in TensorFlow are found, they may be real user nodes in TensorFlow Lite. - # Therefore additional candidates of real user nodes should be found either. - # Finding additional candidates is done by BFS search with queue. - - fuseop_prefix_dict = {} # key : Candidate operation / Value : prefix - - # These ops can be candidate. However other candidates may exists after these ops. - mark_type = ["Add", "AddV2", "BiasAdd", "Reshape", "Transpose"] - - # These ops can be candidate. And no more candidates will be found after these ops. - mark_and_stop_type = ["Relu", "Relu6", "Tanh"] - - # These ops cannot be candidates but other candidates may exists after these ops. - # NOTE : Some of following ops may be removed from the list but not sure for now. - pass_type = [ - "BatchToSpaceND", "Cast", "DepthToSpace", "ExpandDims", "ResizeBilinear", - "ResizeNearestNeighbor", "ScatterNd", "SpaceToBatchND", "SpaceToDepth", "Squeeze", - "Identity", "Pack", "Unpack", "Stack" - ] - - queue_prefix = list(matmul_node_prefix_dict.values()) - queue_nodename = [matmul + ":0" for matmul in matmul_node_prefix_dict.keys()] - - visited_nodes = set(queue_nodename) - while len(queue_prefix) > 0: - prefix = queue_prefix.pop(0) - nodename = queue_nodename.pop(0) - - # Usually, output name of op is like "outputname:0" - # -2 is for removing ":0" - for op in ops: - for i in range(len(op.inputs)): - if nodename == op.inputs[i].name: - if op.type in mark_type: - if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys(): - fuseop_prefix_dict[op.outputs[0].name[:-2]] = set() - fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix) - if op.outputs[0].name not in visited_nodes: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - visited_nodes.add(op.outputs[0].name) - elif op.type in mark_and_stop_type: - if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys(): - fuseop_prefix_dict[op.outputs[0].name[:-2]] = set() - fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix) - elif op.type in pass_type and op.outputs[0].name not in visited_nodes: - queue_prefix.append(prefix) - queue_nodename.append(op.outputs[0].name) - visited_nodes.add(op.outputs[0].name) - # the name of metadata node ret_output_arrays = ['one_compiler/bcqinfo_one_metadata'] # given node from user - ret_output_arrays.append(output_arrays) + ret_output_arrays += output_arrays.split(',') - # all pairs of candidate operations and related BCQ information nodes + # all pairs of a constant node and related BCQ information nodes. for prefix in prefix_set: - for fusable_op in prefix_node_dict[prefix]: - ret_output_arrays.append(prefix + '/bcqinfo_do_w_x') - ret_output_arrays.append(prefix + '/bcqinfo_alpha') - ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code') - ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters') - ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters') - ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters') - ret_output_arrays.append(fusable_op) - if has_dequant_weight: - ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight') - for fuseop in fuseop_prefix_dict.keys(): - if len(fuseop_prefix_dict[fuseop]) == 1: - prefix = fuseop_prefix_dict[fuseop].pop() - ret_output_arrays.append(prefix + '/bcqinfo_do_w_x') - ret_output_arrays.append(prefix + '/bcqinfo_alpha') - ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code') - ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters') - ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters') - ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters') - ret_output_arrays.append(fuseop) - if has_dequant_weight: - ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight') + ret_output_arrays.append(prefix + '/bcqinfo_do_w_x') + ret_output_arrays.append(prefix + '/bcqinfo_alpha') + ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code') + ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters') + ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters') + ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters') + ret_output_arrays.append(prefix) + if has_dequant_weight: + ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight') return ret_output_arrays @@ -216,7 +110,7 @@ def get_bcq_output_arrays(input_path, output_arrays): if model_version == 1: return get_bcqinfo_output_arrays_v1(input_path, output_arrays) elif model_version == -1: - return None + return output_arrays.split(',') else: err_msg = "BCQ version of the model(v{}) ".format(model_version) err_msg += "is higher than " diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index 20e3ea9..cde5de8 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -110,6 +110,18 @@ int entry(int argc, char **argv) .default_value(false) .help("This will fuse BatchNorm operators of pre-activations to Convolution operator"); + arser.add_argument("--remove_redundant_transpose") + .nargs(0) + .required(false) + .default_value(false) + .help("This will fuse or remove subsequent Transpose operators"); + + arser.add_argument("--replace_cw_mul_add_with_depthwise_conv") + .nargs(0) + .required(false) + .default_value(false) + .help("This will replace channel-wise mul/add with DepthwiseConv2D operator"); + arser.add_argument("--resolve_customop_add") .nargs(0) .required(false) @@ -128,6 +140,19 @@ int entry(int argc, char **argv) .default_value(false) .help("This will convert Custom(Matmul) to Matmul operator"); + arser.add_argument("--shuffle_weight_to_16x1float32") + .nargs(0) + .required(false) + .default_value(false) + .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that " + "it only converts weights whose row is a multiple of 16"); + + arser.add_argument("--substitute_pack_to_reshape") + .nargs(0) + .required(false) + .default_value(false) + .help("This will convert single input Pack to Reshape"); + arser.add_argument("--mute_warnings") .nargs(0) .required(false) @@ -196,6 +221,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::ResolveCustomOpAdd); options->enable(Algorithms::ResolveCustomOpBatchMatMul); options->enable(Algorithms::ResolveCustomOpMatMul); + options->enable(Algorithms::RemoveRedundantTranspose); + options->enable(Algorithms::SubstitutePackToReshape); } if (arser.get("--fold_dequantize")) options->enable(Algorithms::FoldDequantize); @@ -213,12 +240,20 @@ int entry(int argc, char **argv) options->enable(Algorithms::MakeBatchNormGammaPositive); if (arser.get("--fuse_preactivation_batchnorm")) options->enable(Algorithms::FusePreActivationBatchNorm); + if (arser.get("--remove_redundant_transpose")) + options->enable(Algorithms::RemoveRedundantTranspose); + if (arser.get("--replace_cw_mul_add_with_depthwise_conv")) + options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv); if (arser.get("--resolve_customop_add")) options->enable(Algorithms::ResolveCustomOpAdd); if (arser.get("--resolve_customop_batchmatmul")) options->enable(Algorithms::ResolveCustomOpBatchMatMul); if (arser.get("--resolve_customop_matmul")) options->enable(Algorithms::ResolveCustomOpMatMul); + if (arser.get("--shuffle_weight_to_16x1float32")) + options->enable(Algorithms::ShuffleWeightTo16x1Float32); + if (arser.get("--substitute_pack_to_reshape")) + options->enable(Algorithms::SubstitutePackToReshape); if (arser.get("--mute_warnings")) settings->set(luci::UserSettings::Key::MuteWarnings, true); @@ -281,11 +316,14 @@ int entry(int argc, char **argv) luci::Importer importer; auto module = importer.importModule(circle_model); + // call luci optimizations for module + optimizer.optimize(module.get()); + for (size_t idx = 0; idx < module->size(); ++idx) { auto graph = module->graph(idx); - // call luci optimizations + // call luci optimizations for graph optimizer.optimize(graph); optimizer.sparsify(graph); diff --git a/compiler/circlechef/tests/CMakeLists.txt b/compiler/circlechef/tests/CMakeLists.txt index 4dc58ad..773ff54 100644 --- a/compiler/circlechef/tests/CMakeLists.txt +++ b/compiler/circlechef/tests/CMakeLists.txt @@ -26,6 +26,32 @@ foreach(RECIPE IN ITEMS ${RECIPES}) list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE}) endforeach(RECIPE) +# Add local files +file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe") + +foreach(RECIPE IN ITEMS ${RECIPES}) + get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY) + + set(RECIPE_SOURCE_FILE "${RECIPE_PREFIX}.recipe") + set(RECIPE_OUTPUT_FILE "${RECIPE_PREFIX}.circle") + + # Copy .recipe + add_custom_command(OUTPUT ${RECIPE_SOURCE_FILE} + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}" ${RECIPE_SOURCE_FILE} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}" + COMMENT "Generating ${RECIPE_SOURCE_FILE}") + + # Generate .circle + add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE} + COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE} + DEPENDS circlechef-file ${RECIPE_SOURCE_FILE} + COMMENT "Generating ${RECIPE_OUTPUT_FILE}") + + list(APPEND TESTS ${RECIPE_PREFIX}) + list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE}) +endforeach(RECIPE) + #Test circlechef-reverse file(GLOB GEN_CIRCLEFILES RELATIVE ${CIRCLERECIPES_DIR} "${CIRCLERECIPES_DIR}/*/test.reverse") # Note: While in development, circlechef-reverse may not handle the operator. @@ -58,6 +84,31 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES}) list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2}) endforeach(CIRCLEFILE) +# Test local circlechef-reverse +file(GLOB GEN_CIRCLEFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.reverse") + +foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES}) + get_filename_component(CIRCLE_PREFIX ${CIRCLEFILE} DIRECTORY) + + set(RECIPE_OUTPUT_FILE "${CIRCLE_PREFIX}.circle") + set(RECIPE_GEN_OUTPUT_FILE "${CIRCLE_PREFIX}.gen.recipe") + set(RECIPE_GEN_OUTPUT_FILE2 "${CIRCLE_PREFIX}.gen.circle") + + # Generate .gen.recipe from generated .circle + add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE} + COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE} + DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE} + COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}") + + add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2} + COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2} + DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE} + COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}") + + list(APPEND TESTS ${CIRCLE_PREFIX}.gen) + list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2}) +endforeach(CIRCLEFILE) + # Add a dummy target to create a target-level dependency. # TODO Find a way to create a dependency between circlechef_test and generated testfiles. add_custom_target(circlechef_testfiles ALL DEPENDS ${TESTFILES}) diff --git a/compiler/circlechef/tests/shape_signature/test.recipe b/compiler/circlechef/tests/shape_signature/test.recipe new file mode 100644 index 0000000..37968ab --- /dev/null +++ b/compiler/circlechef/tests/shape_signature/test.recipe @@ -0,0 +1,45 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 6 dim: 12 } + shape_signature { dim: -1 dim: 8 dim: 6 dim: 12 } +} +operand { + name: "gamma" + type: FLOAT32 + shape { dim: 12 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "beta" + type: FLOAT32 + shape { dim: 12 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 6 dim: 12 } + shape_signature { dim: -1 dim: 8 dim: 6 dim: 12 } +} +operation { + type: "InstanceNorm" + input: "ifm" + input: "gamma" + input: "beta" + output: "ofm" + instance_norm_options { + epsilon: 0.00001 + activation: NONE + } +} +input: "ifm" +output: "ofm" diff --git a/compiler/circlechef/tests/shape_signature/test.reverse b/compiler/circlechef/tests/shape_signature/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst index b2abfd5..34a4d2c 100644 --- a/compiler/common-artifacts/exclude.lst +++ b/compiler/common-artifacts/exclude.lst @@ -16,10 +16,6 @@ tcgenerate(AddN_000) tcgenerate(Add_001) # runtime doesn't support tcgenerate(Add_U8_000) tcgenerate(All_000) -tcgenerate(ArgMax_U8_000) -tcgenerate(ArgMax_U8_001) -tcgenerate(ArgMax_U8_002) -tcgenerate(ArgMax_U8_003) tcgenerate(ArgMin_000) tcgenerate(ArgMin_001) tcgenerate(ArgMin_002) @@ -35,58 +31,35 @@ tcgenerate(BatchToSpaceND_000) tcgenerate(Cast_000) tcgenerate(Cast_001) tcgenerate(Ceil_000) -tcgenerate(Concatenation_U8_000) tcgenerate(Conv2D_003) # runtime doesn't support dilation -tcgenerate(Conv2D_U8_000) -tcgenerate(Conv2D_U8_001) tcgenerate(Cos_000) tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation -tcgenerate(DepthwiseConv2D_U8_000) tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet tcgenerate(Dequantize_000) # runtime and luci-interpreter doesn't support Dequantize op yet -tcgenerate(Div_000) -tcgenerate(Equal_000) -tcgenerate(Exp_000) tcgenerate(ExpandDims_000) tcgenerate(ExpandDims_001) tcgenerate(ExpandDims_002) tcgenerate(ExpandDims_003) tcgenerate(Fill_000) tcgenerate(Fill_001) -tcgenerate(Floor_000) -tcgenerate(FloorDiv_000) -tcgenerate(FloorDiv_001) tcgenerate(FloorMod_000) tcgenerate(FloorMod_001) -tcgenerate(FullyConnected_002) tcgenerate(FullyConnected_U8_000) tcgenerate(Gather_000) tcgenerate(GatherNd_000) tcgenerate(GatherNd_001) -tcgenerate(Greater_000) -tcgenerate(GreaterEqual_000) tcgenerate(If_000) tcgenerate(If_001) tcgenerate(L2Pool2D_U8_000) -tcgenerate(Less_000) -tcgenerate(LessEqual_000) tcgenerate(Log_000) -tcgenerate(LogicalAnd_000) -tcgenerate(LogicalNot_000) -tcgenerate(LogicalOr_000) -tcgenerate(LogSoftmax_000) tcgenerate(MatMul_000) tcgenerate(MatrixBandPart_000) tcgenerate(MatrixDiag_000) tcgenerate(MatrixSetDiag_000) -tcgenerate(Maximum_000) -tcgenerate(MaxPool2D_U8_000) tcgenerate(MaxPoolWithArgMax_000) tcgenerate(MaxPoolWithArgMax_001) tcgenerate(MaxPoolWithArgMax_002) -tcgenerate(Mean_U8_000) -tcgenerate(Minimum_000) tcgenerate(NonMaxSuppressionV4_000) tcgenerate(NonMaxSuppressionV4_001) tcgenerate(NonMaxSuppressionV5_000) @@ -99,36 +72,38 @@ tcgenerate(Net_InstanceNorm_001) tcgenerate(Net_InstanceNorm_002) tcgenerate(Net_InstanceNorm_003) tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim -tcgenerate(NotEqual_000) tcgenerate(OneHot_000) tcgenerate(OneHot_001) tcgenerate(OneHot_002) tcgenerate(OneHot_003) tcgenerate(Pack_000) tcgenerate(Pack_U8_000) -tcgenerate(Pad_U8_000) tcgenerate(PadV2_000) -tcgenerate(Pow_000) tcgenerate(Range_000) tcgenerate(Rank_000) tcgenerate(ReduceAny_000) tcgenerate(ReduceAny_001) tcgenerate(ReduceAny_002) tcgenerate(ReduceAny_003) +tcgenerate(ReduceAny_dynamic_000) +tcgenerate(ReduceAny_dynamic_001) +tcgenerate(ReduceAny_dynamic_002) +tcgenerate(ReduceAny_dynamic_003) tcgenerate(ReduceMax_000) +tcgenerate(ReduceMax_dynamic_000) tcgenerate(ReduceMin_000) +tcgenerate(ReduceMin_dynamic_000) tcgenerate(ReduceProd_000) tcgenerate(ReduceProd_001) tcgenerate(ReduceProd_002) tcgenerate(ReduceProd_003) -tcgenerate(ReLU_000) -tcgenerate(ReLU6_000) +tcgenerate(ReduceProd_dynamic_000) +tcgenerate(ReduceProd_dynamic_001) +tcgenerate(ReduceProd_dynamic_002) +tcgenerate(ReduceProd_dynamic_003) tcgenerate(ReLUN1To1_000) +tcgenerate(ReLUN1To1_dynamic_000) tcgenerate(Reshape_003) # luci-interpreter doesn't support reshape without built-in option -tcgenerate(Reshape_U8_000) -tcgenerate(ResizeBilinear_000) -tcgenerate(ResizeBilinear_U8_000) # luci-interpreter -tcgenerate(ResizeNearestNeighbor_000) tcgenerate(ReverseSequence_000) tcgenerate(ReverseV2_000) tcgenerate(Round_000) @@ -142,7 +117,6 @@ tcgenerate(SelectV2_001) tcgenerate(SelectV2_002) tcgenerate(Shape_000) tcgenerate(Sin_000) -tcgenerate(Softmax_U8_000) tcgenerate(SpaceToBatchND_000) tcgenerate(SpaceToBatchND_001) tcgenerate(SpaceToBatchND_002) @@ -151,11 +125,10 @@ tcgenerate(SparseToDense_000) tcgenerate(SplitV_000) tcgenerate(Square_000) tcgenerate(SquaredDifference_000) -tcgenerate(Sub_000) -tcgenerate(Sub_001) -tcgenerate(Sub_U8_000) tcgenerate(Sum_000) tcgenerate(Sum_001) +tcgenerate(Sum_dynamic_000) +tcgenerate(Sum_dynamic_001) tcgenerate(Tile_000) tcgenerate(Tile_U8_000) tcgenerate(TopKV2_000) @@ -184,3 +157,4 @@ tcgenerate(BCQFullyConnected_001) tcgenerate(BCQGather_000) tcgenerate(CircleBatchMatMul_000) tcgenerate(InstanceNorm_000) +tcgenerate(InstanceNorm_001) diff --git a/compiler/exo/src/Circle/CircleExporterUtils.h b/compiler/exo/src/Circle/CircleExporterUtils.h index fdd162b..78f0cf7 100644 --- a/compiler/exo/src/Circle/CircleExporterUtils.h +++ b/compiler/exo/src/Circle/CircleExporterUtils.h @@ -65,7 +65,7 @@ namespace circle_detail { /** - * @breif Record the information of T/F Lite SubGraph and its mapping to loco + * @brief Record the information of T/F Lite SubGraph and its mapping to loco */ struct SubGraphContext { diff --git a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp index f4bb103..26cc561 100644 --- a/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp +++ b/compiler/exo/src/Dialect/Service/TFLShapeInferenceRule.cpp @@ -116,7 +116,7 @@ private: }; /** - * @breif Expand shape x and y to same rank by align right and filling with 1 + * @brief Expand shape x and y to same rank by align right and filling with 1 */ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) { @@ -136,7 +136,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) } /** - * @breif Returns shape of expanded dimension of input x and y having same rank + * @brief Returns shape of expanded dimension of input x and y having same rank */ loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y) { diff --git a/compiler/exo/src/TFLite/TFLExporterUtils.h b/compiler/exo/src/TFLite/TFLExporterUtils.h index dbd7a52..f2fe607 100644 --- a/compiler/exo/src/TFLite/TFLExporterUtils.h +++ b/compiler/exo/src/TFLite/TFLExporterUtils.h @@ -65,7 +65,7 @@ namespace tflite_detail { /** - * @breif Record the information of T/F Lite SubGraph and its mapping to loco + * @brief Record the information of T/F Lite SubGraph and its mapping to loco */ struct SubGraphContext { diff --git a/compiler/hermes/include/hermes/core/Message.h b/compiler/hermes/include/hermes/core/Message.h index 28cfd79..460163f 100644 --- a/compiler/hermes/include/hermes/core/Message.h +++ b/compiler/hermes/include/hermes/core/Message.h @@ -37,7 +37,7 @@ public: public: /// @brief The number of lines uint32_t lines(void) const { return _lines.size(); } - /// @breif The content of a specific line + /// @brief The content of a specific line const std::string &line(uint32_t n) const { return _lines.at(n); } private: diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp index 47e2498..c5069e4 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp @@ -135,7 +135,17 @@ void Conv2D::execute() const } throw std::runtime_error("Unsupported type."); case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } break; case DataType::S16: evalQuantizedS16(); @@ -219,6 +229,92 @@ void Conv2D::evalQuantized() const getTensorData(_im2col.get()), gemmlowp_context.get()); } +void Conv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector effective_output_scale = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + const std::vector multipliers_raw = + quantizeMultipliers(effective_output_scale); + BroadcastableWrapper quant_multipliers(multipliers_raw); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int32_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + acc += static_cast(input_val - input()->zero_point()) * + static_cast(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + void Conv2D::evalQuantizedS16() const { const auto *input_data = getTensorData(input()); diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h index 83ac67d..86f73c2 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.h +++ b/compiler/luci-interpreter/src/kernels/Conv2D.h @@ -44,6 +44,7 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; void evalQuantizedS16() const; private: diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp index 7aa66a8..35a0c54 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp @@ -169,6 +169,78 @@ TEST(Conv2DTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } +TEST(Conv2DTest, Uint8_CWQ) +{ + const int output_channels = 3; + std::vector input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair input_quant_param = quantizationParams(0, 4); + std::pair output_quant_param = quantizationParams(-127, 128); + + std::vector> filter_quant_params; + filter_quant_params.push_back(quantizationParams(0, 4)); + filter_quant_params.push_back(quantizationParams(-1, 1)); + filter_quant_params.push_back(quantizationParams(-1, 1)); + + std::vector filter_scales; + std::vector filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor({2, 2, 4, 1}, input_quant_param.first, + input_quant_param.second, input_data); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_scales, filter_zerops, 0, filter_data); + Tensor bias_tensor = + makeInputTensor({output_channels}, bias_scales, zerop, 0, bias_data); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + kernel.execute(); + + std::vector ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + TEST(Conv2DTest, SInt16) { Shape input_shape{1, 4, 3, 2}; diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp index 1957f3c..9211331 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp @@ -111,7 +111,17 @@ void DepthwiseConv2D::execute() const } throw std::runtime_error("Unsupported type."); case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast(filter()->shape().dim(3))); + evalQuantizedPerChannel(); + } break; case DataType::S16: evalQuantizedS16(); @@ -144,6 +154,97 @@ void DepthwiseConv2D::evalFloat() const getTensorShape(output()), getTensorData(output())); } +void DepthwiseConv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + const int32_t depth_multiplier = _params.depth_multiplier; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector quant_multipliers_raw = + quantizeMultipliers(effective_output_scales); + BroadcastableWrapper quant_multipliers(quant_multipliers_raw); + + for (int batch = 0; batch < batches; ++batch) + { + for (int out_y = 0; out_y < output_height; ++out_y) + { + for (int out_x = 0; out_x < output_width; ++out_x) + { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) + { + for (int m = 0; m < depth_multiplier; ++m) + { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - _padding_width; + const int in_y_origin = (out_y * stride_height) - _padding_height; + int32 acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); + if (is_point_inside_image) + { + int32 input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)]; + int32 filter_val = + filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)]; + acc += (filter_val - filter()->zero_points()[output_channel]) * + (input_val - input()->zero_point()); + } + } + } + if (bias_data) + { + acc += bias_data[output_channel]; + } + int32_t output_multiplier = quant_multipliers[output_channel].multiplier; + int output_shift = quant_multipliers[output_channel].shift; + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] = + static_cast(scaled_acc); + } + } + } + } + } +} + void DepthwiseConv2D::evalQuantized() const { const auto input_scale = static_cast(input()->scale()); diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h index 400bebe..6d700dd 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h @@ -42,6 +42,7 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; void evalQuantizedS16() const; private: diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp index 0c76b58..f79e888 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp @@ -220,6 +220,79 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights) EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } +TEST(DepthwiseConv2DTest, Uint8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector ref_output_shape{1, 2, 1, output_channels}; + + std::vector input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector bias_data{1, 2, 3, 4}; + std::vector ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair input_quant_param = quantizationParams(0, 16); + std::pair output_quant_param = quantizationParams(-127, 128); + + std::vector> filter_quant_params; + filter_quant_params.push_back(quantizationParams(-9, 13)); + filter_quant_params.push_back(quantizationParams(-14, 10)); + filter_quant_params.push_back(quantizationParams(-11, 15)); + filter_quant_params.push_back(quantizationParams(-16, 12)); + + std::vector filter_scales; + std::vector filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor(input_shape, input_quant_param.first, + input_quant_param.second, input_data); + Tensor filter_tensor = + makeInputTensor(filter_shape, filter_scales, filter_zerops, 3, filter_data); + Tensor bias_tensor = makeInputTensor(bias_shape, bias_scales, zerop, 0, bias_data); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + TEST(DepthwiseConv2DTest, InvalidBiasType_NEG) { Shape input_shape{1, 4, 2, 2}; diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp index b0ee905..491ae51 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp @@ -93,7 +93,17 @@ void TransposeConv::execute() const evalFloat(); break; case DataType::U8: - evalQuantized(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } break; case DataType::S16: evalQuantizedS16(); @@ -147,6 +157,98 @@ void TransposeConv::evalQuantized() const getTensorData(_scratch_tensor.get())); } +void TransposeConv::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData(input()); + const auto *filter_data = getTensorData(filter()); + const auto *bias_data = getTensorData(bias()); + auto *output_data = getTensorData(output()); + auto *scratch_data = getTensorData(_scratch_tensor.get()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); + + std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int32_t)); + + BroadcastableWrapper output_multipliers(_quant_multipliers); + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int32_t out_y_origin = in_y * stride_height - _padding_height; + const int32_t out_x_origin = in_x * stride_width - _padding_width; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t out_x = out_x_origin + filter_x; + const int32_t out_y = out_y_origin + filter_y; + if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width)) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] += + static_cast(input_val - input()->zero_point()) * + static_cast(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + } + } + } + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)]; + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + void TransposeConv::evalQuantizedS16() const { const auto *input_data = getTensorData(input()); diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h index f51e169..2e0beec 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.h +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h @@ -47,6 +47,7 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedPerChannel() const; void evalQuantizedS16() const; private: diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp index 8564de0..b1309c1 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp @@ -154,6 +154,65 @@ TEST(TransposeConvTest, UInt8) EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } +TEST(TransposeConvTest, UInt8_CWQ) +{ + const int32_t output_channels = 2; + std::vector input_data{1, 2, 3, 4}; + std::vector filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector bias_data{3, 4}; + std::vector output_shape_data{1, 5, 5, 2}; + std::vector ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + // Choose quantization parameters carefully. + auto input_quant = quantizationParams(-8.0, 7.9375); // s = 1 / 16, zp = 128 + auto output_quant = quantizationParams(-64.0, 191.0); // s = 1, zp = 64 + + std::vector> filter_quant_params; + filter_quant_params.push_back(quantizationParams(0, 17)); + filter_quant_params.push_back(quantizationParams(0, 18)); + + std::vector filter_scales; + std::vector filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant.first); + std::vector zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor({1, 2, 2, 1}, input_quant.first, + input_quant.second, input_data); + Tensor filter_tensor = makeInputTensor({output_channels, 3, 3, 1}, filter_scales, + filter_zerops, 0, filter_data); + Tensor bias_tensor = + makeInputTensor({output_channels}, bias_scales, zerop, 0, bias_data); + Tensor output_shape_tensor = makeInputTensor({4}, output_shape_data); + Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, params); + kernel.configure(); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + TEST(TransposeConvTest, SInt16) { std::vector input_data{1, 2, 3, 4}; diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index c52d99e..09e9235 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -57,8 +57,12 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size) return getNodeDataImpl(node, data_size); case DataType::FLOAT32: return getNodeDataImpl(node, data_size); + case DataType::S16: + return getNodeDataImpl(node, data_size); case DataType::S32: return getNodeDataImpl(node, data_size); + case DataType::S64: + return getNodeDataImpl(node, data_size); default: throw std::runtime_error("Unsupported type."); } diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp index 860cebf..df75427 100644 --- a/compiler/luci/export/src/CircleExporterImpl.cpp +++ b/compiler/luci/export/src/CircleExporterImpl.cpp @@ -16,7 +16,6 @@ #include "CircleExporterImpl.h" #include "Optimize.h" -#include "TypeBridge.h" #include "CircleTensorExporter.h" #include "CircleOperationExporter.h" #include "CircleExporterUtils.h" @@ -150,9 +149,6 @@ void CircleExporterImpl::exportGraph(loco::Graph *graph) // do graph optimization optimize(graph); - // copy shape/dtype inference data to CircleNode - copy_shape_dtype(graph); - _builder.Clear(); SerializedModelData md; @@ -223,9 +219,6 @@ void CircleExporterImpl::exportModule(Module *module) optimize(graph); - // copy shape/dtype inference data to CircleNode - copy_shape_dtype(graph); - SerializedGraphData gd; // set Subgraph name diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp index 1fdb40e..3715513 100644 --- a/compiler/luci/export/src/CircleExporterUtils.cpp +++ b/compiler/luci/export/src/CircleExporterUtils.cpp @@ -87,6 +87,22 @@ circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode) } } +circle::FullyConnectedOptionsWeightsFormat +to_circle_weightsformat(luci::CircleFullyConnected::WeightsFormat format) +{ + switch (format) + { + case luci::CircleFullyConnected::WeightsFormat::DEFAULT: + return circle::FullyConnectedOptionsWeightsFormat_DEFAULT; + case luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8: + return circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8; + case luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32: + return circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32; + default: + INTERNAL_EXN_V("trying to convert unsupported luci::WeightsFormat", oops::to_uint32(format)); + } +} + circle::DimensionType to_circle_dimensiontype(luci::DimensionType type) { switch (type) diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h index 7857213..95310b3 100644 --- a/compiler/luci/export/src/CircleExporterUtils.h +++ b/compiler/luci/export/src/CircleExporterUtils.h @@ -32,6 +32,8 @@ namespace luci circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func); circle::TensorType to_circle_tensortype(loco::DataType type); circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode); +circle::FullyConnectedOptionsWeightsFormat +to_circle_weightsformat(luci::CircleFullyConnected::WeightsFormat format); circle::DimensionType to_circle_dimensiontype(luci::DimensionType type); flatbuffers::Offset to_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb, const SparseIndexVector &sparse_idx_vec); diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp index c937109..4343cf3 100644 --- a/compiler/luci/export/src/CircleOperationExporter.cpp +++ b/compiler/luci/export/src/CircleOperationExporter.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -930,7 +929,8 @@ void OperationExporter::visit(luci::CircleFullyConnected *node) { export_simple( node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions, - CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())) + CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), + to_circle_weightsformat(node->weights_format())) .Union()); } diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp index 1429d28..9bdfa00 100644 --- a/compiler/luci/export/src/CircleTensorExporter.cpp +++ b/compiler/luci/export/src/CircleTensorExporter.cpp @@ -111,10 +111,10 @@ void allocateCircleTensorInfo(CircleNode *node, CircleTensorContext &ctx) CircleTensoInfo tensor_info; tensor_info.name(tensor_name); - tensor_info.dtype(to_circle_tensortype(luci::node_dtype(node))); + tensor_info.dtype(to_circle_tensortype(node->dtype())); tensor_info.shape_signature(node->shape_signature()); if (node->shape_status() == ShapeStatus::VALID) - tensor_info.shape(to_shape_description(luci::node_shape(node))); + tensor_info.shape(to_shape_description(node)); tensor_info.shape_status(node->shape_status()); tensor_info.content(dynamic_cast(node)); @@ -243,6 +243,9 @@ flatbuffers::Offset> encodeShape(FlatBufferBuilder &builder, flatbuffers::Offset> encodeShapeSignature(FlatBufferBuilder &builder, const ShapeSignature &shape_signature) { + if (shape_signature.rank() == 0) + return 0; + return builder.CreateVector(shape_signature.as_vector()); } diff --git a/compiler/luci/export/src/Optimize.cpp b/compiler/luci/export/src/Optimize.cpp index 6fa50b5..036a4a2 100644 --- a/compiler/luci/export/src/Optimize.cpp +++ b/compiler/luci/export/src/Optimize.cpp @@ -18,6 +18,7 @@ #include "ProgressReporter.h" #include +#include #include #include @@ -34,6 +35,7 @@ void optimize(loco::Graph *g) // prepare type and shape before optimization phase.emplace_back(std::make_unique()); phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); // TODO add more optimization passes (with a knob) } diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h index 46b1ac2..c41f50e 100644 --- a/compiler/luci/export/src/SerializedData.h +++ b/compiler/luci/export/src/SerializedData.h @@ -64,7 +64,7 @@ namespace luci { /** - * @breif Record the information of T/F Lite SubGraph and its mapping to loco + * @brief Record the information of T/F Lite SubGraph and its mapping to loco */ struct SubGraphContext { diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h index 8636b1d..8e210dd 100644 --- a/compiler/luci/import/include/luci/Import/CircleReader.h +++ b/compiler/luci/import/include/luci/Import/CircleReader.h @@ -46,6 +46,8 @@ loco::DataType luci_datatype(circle::TensorType type); FusedActFunc luci_actfunc(const circle::ActivationFunctionType type); Padding luci_padding(const circle::Padding padding); MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode); +luci::CircleFullyConnected::WeightsFormat +luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format); std::unique_ptr luci_quantparam(const circle::QuantizationParametersT *quantization); diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp index 068de52..b33c920 100644 --- a/compiler/luci/import/src/CircleReader.cpp +++ b/compiler/luci/import/src/CircleReader.cpp @@ -151,6 +151,22 @@ MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode) return MirrorPadMode::UNDEFINED; } +luci::CircleFullyConnected::WeightsFormat +luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format) +{ + switch (weights_format) + { + case circle::FullyConnectedOptionsWeightsFormat_DEFAULT: + return luci::CircleFullyConnected::WeightsFormat::DEFAULT; + case circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8: + return luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8; + case circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32: + return luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32; + default: + throw std::runtime_error("Invalid FullyConnectedOptionsWeightsFormat"); + } +} + DimensionType luci_dim_type(const circle::DimensionType dim_type) { switch (dim_type) diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp index 65a863b..17293ad 100644 --- a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp +++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp @@ -53,12 +53,7 @@ CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT const auto *options = op.builtin_options.AsFullyConnectedOptions(); node->fusedActivationFunction(luci_actfunc(options->fused_activation_function)); - if (options->weights_format != circle::FullyConnectedOptionsWeightsFormat_DEFAULT) - { - throw oops::UserExn( - "Unsupported weights format", - circle::EnumNameFullyConnectedOptionsWeightsFormat(options->weights_format)); - } + node->weights_format(luci_weights_format(options->weights_format)); return node; } diff --git a/compiler/luci/lang/include/luci/IR/AttrDilation.h b/compiler/luci/lang/include/luci/IR/AttrDilation.h index c2b28d7..ed82325 100644 --- a/compiler/luci/lang/include/luci/IR/AttrDilation.h +++ b/compiler/luci/lang/include/luci/IR/AttrDilation.h @@ -27,15 +27,17 @@ class Dilation final public: Dilation() : _w(1), _h(1) {} - int32_t w() const { return _w; } - void w(int32_t w) { _w = w; } + uint32_t w() const { return _w; } + void w(uint32_t w) { _w = w; } + void w(int32_t w); - int32_t h() const { return _h; } - void h(int32_t h) { _h = h; } + uint32_t h() const { return _h; } + void h(uint32_t h) { _h = h; } + void h(int32_t h); private: - int32_t _w; - int32_t _h; + uint32_t _w; + uint32_t _h; }; } // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/AttrFilter.h b/compiler/luci/lang/include/luci/IR/AttrFilter.h index 7909fa5..af9d751 100644 --- a/compiler/luci/lang/include/luci/IR/AttrFilter.h +++ b/compiler/luci/lang/include/luci/IR/AttrFilter.h @@ -27,15 +27,17 @@ class Filter final public: Filter() : _w(1), _h(1) {} - int32_t w() const { return _w; } - void w(int32_t w) { _w = w; } + uint32_t w() const { return _w; } + void w(uint32_t w) { _w = w; } + void w(int32_t w); - int32_t h() const { return _h; } - void h(int32_t h) { _h = h; } + uint32_t h() const { return _h; } + void h(uint32_t h) { _h = h; } + void h(int32_t h); private: - int32_t _w; - int32_t _h; + uint32_t _w; + uint32_t _h; }; } // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/AttrStride.h b/compiler/luci/lang/include/luci/IR/AttrStride.h index 654967d..6be6979 100644 --- a/compiler/luci/lang/include/luci/IR/AttrStride.h +++ b/compiler/luci/lang/include/luci/IR/AttrStride.h @@ -27,15 +27,17 @@ class Stride final public: Stride() : _w(1), _h(1) {} - int32_t w() const { return _w; } - void w(int32_t w) { _w = w; } + uint32_t w() const { return _w; } + void w(uint32_t w) { _w = w; } + void w(int32_t w); - int32_t h() const { return _h; } - void h(int32_t h) { _h = h; } + uint32_t h() const { return _h; } + void h(uint32_t h) { _h = h; } + void h(int32_t h); private: - int32_t _w; - int32_t _h; + uint32_t _w; + uint32_t _h; }; } // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h b/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h index 970f1b5..18a2604 100644 --- a/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h +++ b/compiler/luci/lang/include/luci/IR/CircleShapeSignature.h @@ -46,6 +46,8 @@ private: std::vector _shape_signature{}; }; +bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs); + } // namespace luci #endif // __LUCI_IR_SHAPE_SIGNATURE_H__ diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h index d78f394..952befc 100644 --- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h @@ -35,6 +35,16 @@ class CircleFullyConnected final public LuciNodeMixin { public: + enum class WeightsFormat + { + UNDEFINED, // This is not defined by Circle. This was added to prevent programming error. + + DEFAULT, + SHUFFLED4x16INT8, + SHUFFLED16x1FLOAT32, + }; + +public: loco::Node *input(void) const { return at(0)->node(); } void input(loco::Node *node) { at(0)->node(node); } @@ -43,6 +53,13 @@ public: loco::Node *bias(void) const override { return at(2)->node(); } void bias(loco::Node *node) override { at(2)->node(node); } + +public: + WeightsFormat weights_format(void) const { return _weights_format; } + void weights_format(WeightsFormat weights_format) { _weights_format = weights_format; } + +private: + WeightsFormat _weights_format{WeightsFormat::DEFAULT}; }; } // namespace luci diff --git a/tools/nnapi_quickcheck/inc/memory.h b/compiler/luci/lang/src/AttrDilation.cpp similarity index 64% rename from tools/nnapi_quickcheck/inc/memory.h rename to compiler/luci/lang/src/AttrDilation.cpp index 3f1bca8..a9f4795 100644 --- a/tools/nnapi_quickcheck/inc/memory.h +++ b/compiler/luci/lang/src/AttrDilation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,21 +14,23 @@ * limitations under the License. */ -#ifndef __MEMORY_H__ -#define __MEMORY_H__ +#include "luci/IR/AttrDilation.h" -#include +#include -template inline T *make_alloc(void) +namespace luci { - auto ptr = malloc(sizeof(T)); - if (ptr == nullptr) - { - throw std::bad_alloc{}; - } +void Dilation::w(int32_t w) +{ + assert(w >= 0); + _w = static_cast(w); +} - return reinterpret_cast(ptr); +void Dilation::h(int32_t h) +{ + assert(h >= 0); + _h = static_cast(h); } -#endif // __MEMORY_H__ +} // namespace luci diff --git a/compiler/luci/lang/src/AttrDilation.test.cpp b/compiler/luci/lang/src/AttrDilation.test.cpp new file mode 100644 index 0000000..3e46589 --- /dev/null +++ b/compiler/luci/lang/src/AttrDilation.test.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrDilation.h" + +#include + +TEST(CircleAttrDilationTest, set) +{ + auto d = luci::Dilation(); + + d.h(10u); + d.w(10u); + + ASSERT_EQ(d.h(), 10u); + ASSERT_EQ(d.w(), 10u); + + d.h(10); // int32_t + d.w(10); + + ASSERT_EQ(d.h(), 10u); + ASSERT_EQ(d.w(), 10u); +} diff --git a/tools/nnapi_quickcheck/lib/env.test.cpp b/compiler/luci/lang/src/AttrFilter.cpp similarity index 55% rename from tools/nnapi_quickcheck/lib/env.test.cpp rename to compiler/luci/lang/src/AttrFilter.cpp index dd9ac8b..9c571e7 100644 --- a/tools/nnapi_quickcheck/lib/env.test.cpp +++ b/compiler/luci/lang/src/AttrFilter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,32 +14,23 @@ * limitations under the License. */ -#include "env.h" +#include "luci/IR/AttrFilter.h" -#include - -#include #include -inline void ensure(int err) { assert(err == 0); } - -int main(int argc, char **argv) +namespace luci { - const std::string key{"TEST"}; - const int num{3}; - - const auto str = std::to_string(num); - - ensure(unsetenv(key.c_str())); - ensure(setenv(key.c_str(), str.c_str(), 0)); - - int value = 0; - - assert(value != num); - IntVar buffer(key, value); - - assert(buffer() == num); +void Filter::w(int32_t w) +{ + assert(w >= 0); + _w = static_cast(w); +} - return 0; +void Filter::h(int32_t h) +{ + assert(h >= 0); + _h = static_cast(h); } + +} // namespace luci diff --git a/compiler/luci/lang/src/AttrFilter.test.cpp b/compiler/luci/lang/src/AttrFilter.test.cpp new file mode 100644 index 0000000..06dbcac --- /dev/null +++ b/compiler/luci/lang/src/AttrFilter.test.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrFilter.h" + +#include + +TEST(CircleAttrFilterTest, set) +{ + auto f = luci::Filter(); + + f.h(10u); + f.w(10u); + + ASSERT_EQ(f.h(), 10u); + ASSERT_EQ(f.w(), 10u); + + f.h(10); // int32_t + f.w(10); + + ASSERT_EQ(f.h(), 10u); + ASSERT_EQ(f.w(), 10u); +} diff --git a/compiler/luci/lang/src/AttrStride.cpp b/compiler/luci/lang/src/AttrStride.cpp new file mode 100644 index 0000000..9720d12 --- /dev/null +++ b/compiler/luci/lang/src/AttrStride.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrStride.h" + +#include + +namespace luci +{ + +void Stride::w(int32_t w) +{ + assert(w >= 0); + _w = static_cast(w); +} + +void Stride::h(int32_t h) +{ + assert(h >= 0); + _h = static_cast(h); +} + +} // namespace luci diff --git a/compiler/luci/lang/src/AttrStride.test.cpp b/compiler/luci/lang/src/AttrStride.test.cpp new file mode 100644 index 0000000..e91365b --- /dev/null +++ b/compiler/luci/lang/src/AttrStride.test.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/AttrStride.h" + +#include + +TEST(CircleAttrStrideTest, set) +{ + auto s = luci::Stride(); + + s.h(10u); + s.w(10u); + + ASSERT_EQ(s.h(), 10u); + ASSERT_EQ(s.w(), 10u); + + s.h(10); // int32_t + s.w(10); + + ASSERT_EQ(s.h(), 10u); + ASSERT_EQ(s.w(), 10u); +} diff --git a/compiler/luci/lang/src/CircleShapeSignature.cpp b/compiler/luci/lang/src/CircleShapeSignature.cpp new file mode 100644 index 0000000..9700002 --- /dev/null +++ b/compiler/luci/lang/src/CircleShapeSignature.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/CircleShapeSignature.h" + +namespace luci +{ + +bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs) +{ + if (lhs.rank() != rhs.rank()) + return false; + + for (uint32_t i = 0; i < lhs.rank(); ++i) + if (lhs.dim(i) != rhs.dim(i)) + return false; + + return true; +} + +} // namespace luci diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index db5bdb5..906760e 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -19,6 +19,8 @@ #include +#include + #include #include @@ -47,6 +49,10 @@ public: FusePreActivationBatchNorm, MakeBatchNormGammaPositive, FuseActivationFunction, + ShuffleWeightTo16x1Float32, + RemoveRedundantTranspose, + ReplaceMulAddWithDepthwiseConv, + SubstitutePackToReshape, }; enum AlgorithmParameters @@ -77,6 +83,8 @@ public: Options *options(void); public: + void optimize(luci::Module *) const; + void optimize(loco::Graph *) const; void quantize(loco::Graph *) const; diff --git a/compiler/luci/pass/include/luci/ModulePass.h b/compiler/luci/pass/include/luci/ModulePass.h new file mode 100644 index 0000000..1835f6e --- /dev/null +++ b/compiler/luci/pass/include/luci/ModulePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MODULE_PASS_H__ +#define __MODULE_PASS_H__ + +#include +#include + +#include + +namespace luci +{ + +class Pass : public logo::Pass +{ +public: + // Run module pass and return false if there was nothing changed + virtual bool run(luci::Module *) = 0; +}; + +} // namespace luci + +#endif // __MODULE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h b/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h new file mode 100644 index 0000000..379b44c --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/CircleTypeInferencePass.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__ +#define __LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__ + +#include + +#include + +namespace luci +{ + +/** + * @brief Pass to infer type of circle nodes + */ +class CircleTypeInferencePass : public luci::Pass +{ +public: + virtual const char *name(void) const { return "luci::CircleTypeInferencePass"; } + +public: + bool run(luci::Module *m); + bool run(loco::Graph *g); +}; + +} // namespace luci + +#endif //__LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h b/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h index 4404a9f..912ad42 100644 --- a/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h +++ b/compiler/luci/pass/include/luci/Pass/FuseBCQPass.h @@ -17,7 +17,7 @@ #ifndef __LUCI_FUSE_BCQ_PASS_H__ #define __LUCI_FUSE_BCQ_PASS_H__ -#include +#include namespace luci { @@ -26,10 +26,11 @@ namespace luci * @brief Class to fuse certain pattern of subgraph into CircleBCQFullyConnected or CircleBCQGather * */ -struct FuseBCQPass final : public logo::Pass +struct FuseBCQPass final : public luci::Pass { const char *name(void) const final { return "luci::FuseBCQPass"; } + bool run(luci::Module *m) final; bool run(loco::Graph *g) final; }; diff --git a/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h b/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h new file mode 100644 index 0000000..c0ebc4e --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/MigrateLegacyShapeDtypePass.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__ +#define __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__ + +#include + +#include + +namespace luci +{ + +/** + * @brief Pass to copy shape/dtype of loco to circle node + * + * CAUTION : This pass will be removed after refactoring is finished + */ +class MigrateLegacyShapeDtypePass : public luci::Pass +{ +public: + virtual const char *name(void) const { return "luci::MigrateLegacyShapeDtypePass"; } + +public: + bool run(luci::Module *m); + bool run(loco::Graph *graph); +}; + +} // namespace luci + +#endif //__LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h new file mode 100644 index 0000000..7e0c44b --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ +#define __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to propagate quantization parameters of an operator's output to input + */ +struct PropagateQuantParamPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::PropagateQuantParamPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h new file mode 100644 index 0000000..ca20da5 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantTransposePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__ +#define __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__ + +#include + +namespace luci +{ + +/** + * @brief fuse or remove subsequent Transpose operators + */ +struct RemoveRedundantTransposePass final : public logo::Pass +{ + const char *name(void) const final { return "luci::RemoveRedundantTransposePass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h new file mode 100644 index 0000000..5dbcc8f --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__ +#define __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to replace channel-wise mul/add with CircleDepthwiseConv2D + */ +struct ReplaceMulAddWithDepthwiseConvPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ReplaceMulAddWithDepthwiseConvPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h b/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h index 86bb2ab..e21ab4c 100644 --- a/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h +++ b/compiler/luci/pass/include/luci/Pass/ShapeInferencePass.h @@ -19,7 +19,7 @@ #include -#include +#include namespace luci { @@ -27,12 +27,13 @@ namespace luci /** * @brief Pass to infer shape of nodes */ -class ShapeInferencePass : public logo::Pass +class ShapeInferencePass : public luci::Pass { public: virtual const char *name(void) const { return "luci::ShapeInferencePass"; } public: + bool run(luci::Module *m); bool run(loco::Graph *graph); }; diff --git a/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h b/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h new file mode 100644 index 0000000..2c6ffcf --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ShapeSignatureInferencePass.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__ +#define __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__ + +#include + +#include + +namespace luci +{ + +/** + * @brief Pass to infer shape_signature of nodes + */ +class ShapeSignatureInferencePass : public luci::Pass +{ +public: + virtual const char *name(void) const { return "luci::ShapeSignatureInferencePass"; } + +public: + bool run(luci::Module *m); + bool run(loco::Graph *graph); +}; + +} // namespace luci + +#endif //__LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h b/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h new file mode 100644 index 0000000..3d84f51 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ShuffleWeightTo16x1Float32Pass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__ +#define __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to convert weight format of FullyConnected to SHUFFLED16x1FLOAT32 + */ +struct ShuffleWeightTo16x1Float32Pass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ShuffleWeightTo16x1Float32Pass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h b/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h new file mode 100644 index 0000000..36d13f1 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/SubstitutePackToReshapePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__ +#define __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to Substitute Pack with 1 input to single reshape node. + */ +struct SubstitutePackToReshapePass final : public logo::Pass +{ + const char *name(void) const final { return "luci::SubstitutePackToReshapePass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h b/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h index c607ac6..9d964bd 100644 --- a/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h +++ b/compiler/luci/pass/include/luci/Pass/TypeInferencePass.h @@ -20,7 +20,7 @@ #include -#include +#include namespace luci { @@ -28,12 +28,13 @@ namespace luci /** * @brief Pass to infer type of nodes */ -class TypeInferencePass : public logo::Pass +class TypeInferencePass : public luci::Pass { public: virtual const char *name(void) const { return "luci::TypeInferencePass"; } public: + bool run(luci::Module *m); bool run(loco::Graph *graph); }; diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 34f6473..cc9fe48 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -24,6 +24,9 @@ #include "luci/Pass/FuseInstanceNormPass.h" #include "luci/Pass/FusePreActivationBatchNormPass.h" #include "luci/Pass/MakeBatchNormGammaPositivePass.h" +#include "luci/Pass/PropagateQuantParamPass.h" +#include "luci/Pass/RemoveRedundantTransposePass.h" +#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h" #include "luci/Pass/ResolveCustomOpAddPass.h" #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h" #include "luci/Pass/ResolveCustomOpMatMulPass.h" @@ -31,14 +34,21 @@ #include "luci/Pass/QuantizeWithMinMaxPass.h" #include "luci/Pass/QuantizeDequantizeWeightsPass.h" #include "luci/Pass/SparsifyTensorPass.h" +#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" +#include "luci/Pass/SubstitutePackToReshapePass.h" // TODO add more passes #include "luci/Pass/ShapeInferencePass.h" +#include "luci/Pass/ShapeSignatureInferencePass.h" #include "luci/Pass/TypeInferencePass.h" +// Following passes will be removed after refactoring is finished +#include "luci/Pass/MigrateLegacyShapeDtypePass.h" + // logo passes #include +#include "ModulePhase.h" #include "ProgressReporter.h" #include "CircleOptimizerUtils.h" @@ -124,11 +134,44 @@ CircleOptimizer::Options *CircleOptimizer::options(void) return _options.get(); } +void CircleOptimizer::optimize(luci::Module *m) const +{ + luci::Phase phase; + + // Following passes will be deprecated after refactoring is finished. + phase.emplace_back(std::make_unique()); + + // Following passes are needed everytime when other passes create new node or modify some nodes. + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + + if (_options->query(Options::Algorithm::FuseBCQ)) + { + phase.emplace_back(std::make_unique()); + } + + ModuleProgressReporter prog(m, logo::PhaseStrategy::Restart); + PhaseRunner phase_runner{m}; + phase_runner.attach(&prog); + phase_runner.run(phase); +} + void CircleOptimizer::optimize(loco::Graph *g) const { logo::Phase phase; /* TRANSFORM DECLARATION BEGIN */ + phase.emplace_back(std::make_unique()); + + // Following passes will be deprecated after refactoring is finished. + phase.emplace_back(std::make_unique()); + + // Following passes are needed everytime when other passes create new node or modify some nodes. + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + if (_options->query(Options::Algorithm::ResolveCustomOpAdd)) { phase.emplace_back(std::make_unique()); @@ -145,10 +188,6 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique()); } - if (_options->query(Options::Algorithm::FuseBCQ)) - { - phase.emplace_back(std::make_unique()); - } if (_options->query(Options::Algorithm::FuseBatchNormWithTConv)) { phase.emplace_back(std::make_unique()); @@ -173,15 +212,27 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique()); } + if (_options->query(Options::Algorithm::ShuffleWeightTo16x1Float32)) + { + phase.emplace_back(std::make_unique()); + } + if (_options->query(Options::Algorithm::RemoveRedundantTranspose)) + { + phase.emplace_back(std::make_unique()); + } + if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv)) + { + phase.emplace_back(std::make_unique()); + } + if (_options->query(Options::Algorithm::SubstitutePackToReshape)) + { + phase.emplace_back(std::make_unique()); + } - // Shape inference is needed for added nodes doing above transformations - phase.emplace_back(std::make_unique()); - phase.emplace_back(std::make_unique()); - phase.emplace_back(std::make_unique()); /* TRANSFORM DECLARATION END */ - ProgressReporter prog(g, logo::PhaseStrategy::Saturate); - logo::PhaseRunner phase_runner{g}; + ProgressReporter prog(g, logo::PhaseStrategy::Restart); + logo::PhaseRunner phase_runner{g}; phase_runner.attach(&prog); phase_runner.run(phase); } @@ -258,6 +309,20 @@ void CircleOptimizer::quantize(loco::Graph *g) const luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity)); quantizer.run(g); + + // Post-quantization optimizations + logo::Phase phase; + + phase.emplace_back(std::make_unique()); + + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + phase.emplace_back(std::make_unique()); + + ProgressReporter prog(g, logo::PhaseStrategy::Saturate); + logo::PhaseRunner phase_runner{g}; + phase_runner.attach(&prog); + phase_runner.run(phase); } // Requantize diff --git a/compiler/luci/pass/src/CircleTypeInferencePass.cpp b/compiler/luci/pass/src/CircleTypeInferencePass.cpp new file mode 100644 index 0000000..67bd253 --- /dev/null +++ b/compiler/luci/pass/src/CircleTypeInferencePass.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/CircleTypeInferencePass.h" + +#include + +#include + +namespace luci +{ + +bool CircleTypeInferencePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + +bool CircleTypeInferencePass::run(loco::Graph *g) +{ + luci::tinf::Rule type_infer_rule; + bool changed = false; + + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) + { + loco::DataType dtype; + auto circle_node = loco::must_cast(node); + + if (type_infer_rule.infer(circle_node, dtype) && circle_node->dtype() != dtype) + { + circle_node->dtype(dtype); + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp index ebf2877..c0583d8 100644 --- a/compiler/luci/pass/src/FuseBCQPass.cpp +++ b/compiler/luci/pass/src/FuseBCQPass.cpp @@ -25,6 +25,85 @@ namespace { +bool is_fusable_const(luci::CircleConst *before, luci::CircleConst *after, bool do_w_x) +{ + if (after->dtype() != loco::DataType::FLOAT32) + return false; + + if (after->rank() != 2) + return false; + + if (after->size() != before->size()) + return false; + + auto after_dim0 = after->dim(0).value(); + auto after_dim1 = after->dim(1).value(); + + if (before->rank() == 2) + { + if (do_w_x) + { + // Check for [dim0, dim1] --> [dim0, dim1] + if (!(after->dim(0) == before->dim(0) && after->dim(1) == before->dim(1))) + return false; + + for (uint32_t i = 0; i < after->size(); ++i) + if (after->at(i) != before->at(i)) + return false; + } + else + { + // Check for [dim0, dim1] --> [dim1, dim0] + if (!(after->dim(0) == before->dim(1) && after->dim(1) == before->dim(0))) + return false; + + for (uint32_t i = 0; i < after_dim0; ++i) + for (uint32_t j = 0; j < after_dim1; ++j) + if (after->at(i * after_dim1 + j) != + before->at(j * after_dim0 + i)) + return false; + } + + return true; + } + else if (before->rank() == 3) + { + if (do_w_x) + { + // This case is not found yet. + return false; + } + else + { + // When Einsum op is converted to FullyConnected, original rank can be 3. + auto before_dim0 = before->dim(0).value(); + auto before_dim1 = before->dim(1).value(); + auto before_dim2 = before->dim(2).value(); + + // Check if [dim0, dim1, dim2] --> [dim2, dim0 * dim1] or + // [dim0, dim1, dim2] --> [dim1 * dim2, dim0] + if ((after_dim0 == before_dim1 * before_dim2 && after_dim1 == before_dim0) || + (after_dim0 == before_dim2 && after_dim1 == before_dim0 * before_dim1)) + { + for (uint32_t i = 0; i < after_dim0; ++i) + for (uint32_t j = 0; j < after_dim1; ++j) + if (after->at(i * after_dim1 + j) != + before->at(j * after_dim0 + i)) + return false; + } + } + + return true; + } + + return false; +} + +} // namespace + +namespace +{ + // V means the version of BCQ. template class BCQFuser; @@ -38,11 +117,9 @@ public: } public: - bool fuseBCQ(loco::Graph *g) + void register_bcq_info(loco::Graph *g) { - - const auto output_nodes = loco::output_nodes(g); - for (auto node : output_nodes) + for (auto node : loco::output_nodes(g)) { auto output_node = loco::must_cast(node); @@ -61,28 +138,29 @@ public: add_BCQ_info_node(prefix, metadata_type, circle_node); } } + } + bool fuseBCQ(loco::Graph *g) + { if (!is_bcqinfo_valid()) return false; - for (auto f : _fusable_op) + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) { - auto prefix = f.first; - luci::CircleNode *node = f.second; - - if (!is_valid_prefix(prefix)) - continue; - // Fuse Gather to BCQGather if (auto gather = dynamic_cast(node)) { if (auto params = dynamic_cast(gather->params())) { + auto prefix = get_prefix_of_const(params); + if (prefix == -1 || !is_valid_prefix(prefix)) + continue; + auto bcq_gather = g->nodes()->create(); bcq_gather->op_version(1); - bcq_gather->input_scales(_alpha[prefix]); - bcq_gather->input_binary(_packed_binary_code[prefix]); + bcq_gather->input_scales(alpha(g, prefix)); + bcq_gather->input_binary(packed_binary_code(g, prefix)); bcq_gather->indices(gather->indices()); bcq_gather->input_clusters(packed_clusters(g, prefix)); @@ -122,29 +200,20 @@ public: } } - // Einsum is unpacked to FullyConnected, Pack and Reshape - if (auto reshape = dynamic_cast(node)) - { - node = dynamic_cast(reshape->tensor()); - } - if (auto pack = dynamic_cast(node)) - { - if (pack->values_count() == 1 && pack->rank() == 3) - { - node = dynamic_cast(pack->values(0)); - } - } - // Fuse FullyConnected to BCQFullyConnected if (auto fully_connected = dynamic_cast(node)) { if (auto weights = dynamic_cast(fully_connected->weights())) { + auto prefix = get_prefix_of_const(weights); + if (prefix == -1 || !is_valid_prefix(prefix)) + continue; + auto bcq_fc = g->nodes()->create(); bcq_fc->op_version(1); - bcq_fc->weights_scales(_alpha[prefix]); - bcq_fc->weights_binary(_packed_binary_code[prefix]); + bcq_fc->weights_scales(alpha(g, prefix)); + bcq_fc->weights_binary(packed_binary_code(g, prefix)); bcq_fc->bias(fully_connected->bias()); bcq_fc->weights_clusters(packed_clusters(g, prefix)); bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction()); @@ -179,43 +248,69 @@ public: } // If x_w formation, we should insert Transpose in front and back of BCQFullyConnected - if (_do_w_x[prefix]->at(0)) - { - bcq_fc->weights_hidden_size(weights->dim(0).value()); - bcq_fc->input(bcq_input); - loco::replace(fully_connected).with(bcq_fc); - } - else - { - bcq_fc->weights_hidden_size(weights->dim(1).value()); + bcq_fc->weights_hidden_size(weights->dim(1).value()); - auto perm = g->nodes()->create(); - perm->dtype(loco::DataType::S32); - perm->size(2); - perm->rank(1); - perm->dim(0) = 2; - perm->at(0) = 1; - perm->at(1) = 0; - perm->shape_status(luci::ShapeStatus::VALID); + auto perm = g->nodes()->create(); + perm->dtype(loco::DataType::S32); + perm->size(2); + perm->rank(1); + perm->dim(0) = 2; + perm->at(0) = 1; + perm->at(1) = 0; + perm->shape_status(luci::ShapeStatus::VALID); - auto input_transpose = g->nodes()->create(); - input_transpose->a(bcq_input); - input_transpose->perm(perm); + auto input_transpose = g->nodes()->create(); + input_transpose->a(bcq_input); + input_transpose->perm(perm); - bcq_fc->input(input_transpose); + bcq_fc->input(input_transpose); - auto output_transpose = g->nodes()->create(); - output_transpose->a(bcq_fc); - output_transpose->perm(perm); + auto output_transpose = g->nodes()->create(); + output_transpose->a(bcq_fc); + output_transpose->perm(perm); - loco::replace(fully_connected).with(output_transpose); - } + loco::replace(fully_connected).with(output_transpose); return true; } - else + else if (auto weights_as_input = + dynamic_cast(fully_connected->input())) { - // TODO Is there any case that input() is constant, instead of weights()? + auto prefix = get_prefix_of_const(weights_as_input); + if (prefix == -1 || !is_valid_prefix(prefix)) + continue; + + assert(_do_w_x[prefix]->at(0) == true); + + auto perm = g->nodes()->create(); + perm->dtype(loco::DataType::S32); + perm->size(2); + perm->rank(1); + perm->dim(0) = 2; + perm->at(0) = 1; + perm->at(1) = 0; + perm->shape_status(luci::ShapeStatus::VALID); + + auto input_transpose = g->nodes()->create(); + input_transpose->a(fully_connected->weights()); + input_transpose->perm(perm); + + auto bcq_fc = g->nodes()->create(); + + assert(dynamic_cast(fully_connected->bias()) != nullptr); + + bcq_fc->op_version(1); + bcq_fc->weights_scales(alpha(g, prefix)); + bcq_fc->weights_binary(packed_binary_code(g, prefix)); + bcq_fc->bias(fully_connected->bias()); + bcq_fc->weights_clusters(packed_clusters(g, prefix)); + bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction()); + + bcq_fc->weights_hidden_size(weights_as_input->dim(1).value()); + bcq_fc->input(input_transpose); + loco::replace(fully_connected).with(bcq_fc); + + return true; } } } @@ -268,6 +363,19 @@ private: _dequant_weight[prefix] = const_node; } + int32_t get_prefix_of_const(luci::CircleConst *w_after) + { + for (auto n : _fusable_op) + { + auto prefix = n.first; + auto w_before = loco::must_cast(n.second); + if (is_fusable_const(w_before, w_after, _do_w_x[prefix]->at(0))) + return prefix; + } + + return -1; + } + bool is_bcqinfo_valid() { LOGGER(l); @@ -332,6 +440,16 @@ private: } } + for (auto n : _fusable_op) + { + // fusable_op should be FLOAT32 type + if (n.second->dtype() != loco::DataType::FLOAT32) + { + WARN(l) << "FuseBCQPass : fusable_op has wrong type" << std::endl; + return false; + } + } + // As dequant_weight is not used for fusing, skip validation. return true; @@ -377,12 +495,50 @@ private: return false; } + if (_fusable_op.find(prefix) == _fusable_op.end()) + { + WARN(l) << "fusable_op is not found" << std::endl; + return false; + } + // As dequant_weight is not used for fusing, skip validation. return true; } private: + luci::CircleConst *alpha(loco::Graph *graph, int32_t prefix) + { + auto new_alpha = graph->nodes()->create(); + + new_alpha->dtype(loco::DataType::FLOAT32); + new_alpha->size(_alpha[prefix]->size()); + new_alpha->rank(1); + new_alpha->dim(0) = _alpha[prefix]->dim(0); + for (uint32_t i = 0; i < _alpha[prefix]->size(); ++i) + new_alpha->at(i) = _alpha[prefix]->at(i); + new_alpha->shape_status(luci::ShapeStatus::VALID); + + return new_alpha; + } + + luci::CircleConst *packed_binary_code(loco::Graph *graph, int32_t prefix) + { + auto new_beta = graph->nodes()->create(); + + new_beta->dtype(loco::DataType::S32); + new_beta->size(_packed_binary_code[prefix]->size()); + new_beta->rank(2); + new_beta->dim(0) = _packed_binary_code[prefix]->dim(0); + new_beta->dim(1) = _packed_binary_code[prefix]->dim(1); + for (uint32_t i = 0; i < _packed_binary_code[prefix]->size(); ++i) + new_beta->at(i) = + _packed_binary_code[prefix]->at(i); + new_beta->shape_status(luci::ShapeStatus::VALID); + + return new_beta; + } + luci::CircleConst *packed_clusters(loco::Graph *graph, int32_t prefix) { auto qbits_of_clusters = _qbits_of_clusters[prefix]; @@ -428,15 +584,17 @@ private: namespace luci { -bool FuseBCQPass::run(loco::Graph *g) +bool FuseBCQPass::run(luci::Module *m) { bool changed = false; const int32_t start_magicnum = -2e9 + 27; const int32_t end_magicnum = 2e9 - 27; + loco::Graph *main_graph = m->graph(0); + luci::CircleConst *metadata_node = nullptr; - for (auto node : loco::output_nodes(g)) + for (auto node : loco::output_nodes(main_graph)) { auto output_node = loco::must_cast(node); @@ -474,8 +632,11 @@ bool FuseBCQPass::run(loco::Graph *g) const auto bundle_cnt = metadata_node->at(3); BCQFuser<1> fuser{original_output_cnt, bundle_cnt}; - if (fuser.fuseBCQ(g)) - changed = true; + fuser.register_bcq_info(main_graph); + + for (size_t g = 0; g < m->size(); ++g) + if (fuser.fuseBCQ(m->graph(g))) + changed = true; } else { @@ -486,12 +647,12 @@ bool FuseBCQPass::run(loco::Graph *g) // Remove all of BCQ information nodes iff there is no change if (changed == false) { - for (auto node : loco::output_nodes(g)) + for (auto node : loco::output_nodes(main_graph)) { auto output_node = loco::must_cast(node); if (output_node->index() == 0 || (int)output_node->index() > original_output_cnt) { - auto noOp = g->nodes()->create(); + auto noOp = main_graph->nodes()->create(); noOp->dtype(loco::DataType::FLOAT32); // TODO Remove this setting output_node->from(noOp); changed = true; @@ -503,4 +664,10 @@ bool FuseBCQPass::run(loco::Graph *g) return changed; } +bool FuseBCQPass::run(loco::Graph *) +{ + // Do nothing for graph + return false; +} + } // namespace luci diff --git a/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp b/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp new file mode 100644 index 0000000..beb962a --- /dev/null +++ b/compiler/luci/pass/src/MigrateLegacyShapeDtypePass.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/MigrateLegacyShapeDtypePass.h" + +#include +#include + +#include + +#include + +namespace +{ + +bool has_same_shape(luci::CircleNode *node, loco::TensorShape shape) +{ + if (node->rank() != shape.rank()) + return false; + + for (uint32_t i = 0; i < shape.rank(); ++i) + if (!(node->dim(i) == shape.dim(i))) + return false; + + return true; +} + +} // namespace + +namespace luci +{ + +bool MigrateLegacyShapeDtypePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + +bool MigrateLegacyShapeDtypePass::run(loco::Graph *g) +{ + bool changed = false; + + for (auto node : loco::all_nodes(g)) + { + auto circle_node = loco::must_cast(node); + if (loco::shape_known(node)) + { + auto loco_shape = loco::shape_get(node).as(); + + assert(circle_node->shape_signature().rank() == 0 || + circle_node->shape_signature().rank() == loco_shape.rank()); + + // When shape of loco is copied to circle node, ShapeSignature should be applied. + loco::TensorShape new_shape; + new_shape.rank(loco_shape.rank()); + for (uint32_t i = 0; i < loco_shape.rank(); ++i) + { + if (circle_node->shape_signature().rank() > 0 && + circle_node->shape_signature().dim(i) == -1) + new_shape.dim(i) = 1; + else + new_shape.dim(i) = loco_shape.dim(i); + } + + if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED || + !has_same_shape(circle_node, new_shape)) + { + circle_node->rank(new_shape.rank()); + for (uint32_t i = 0; i < new_shape.rank(); ++i) + circle_node->dim(i) = new_shape.dim(i); + + if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED) + circle_node->shape_status(luci::ShapeStatus::VALID); + + changed = true; + } + } + + if (loco::dtype_known(node)) + { + if (loco::dtype_get(node) != circle_node->dtype()) + { + circle_node->dtype(loco::dtype_get(node)); + changed = true; + } + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ModulePhase.cpp b/compiler/luci/pass/src/ModulePhase.cpp new file mode 100644 index 0000000..46819a0 --- /dev/null +++ b/compiler/luci/pass/src/ModulePhase.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ModulePhase.h" + +namespace luci +{ + +void PhaseRunner::run(const Phase &phase) const +{ + notifyPhaseBegin(); + + for (bool changed = true; changed;) + { + changed = false; + + for (auto &pass : phase) + { + notifyPassBegin(pass.get()); + + bool pass_changed = pass->run(_module); + changed = changed || pass_changed; + + notifyPassEnd(pass.get(), pass_changed); + } + } + + notifyPhaseEnd(); +} + +void PhaseRunner::run(const Phase &phase) const +{ + notifyPhaseBegin(); + + for (bool changed = true; changed;) + { + changed = false; + + for (auto &pass : phase) + { + notifyPassBegin(pass.get()); + + bool pass_changed = pass->run(_module); + changed = changed || pass_changed; + + notifyPassEnd(pass.get(), pass_changed); + + if (changed) + { + break; + } + } + } + + notifyPhaseEnd(); +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ModulePhase.h b/compiler/luci/pass/src/ModulePhase.h new file mode 100644 index 0000000..05966cc --- /dev/null +++ b/compiler/luci/pass/src/ModulePhase.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MODULE_PHASE_H__ +#define __MODULE_PHASE_H__ + +#include + +#include + +#include + +namespace luci +{ + +using Phase = std::vector>; + +template class PhaseRunner; + +template <> +class PhaseRunner final : public logo::PhaseRunnerMixinObservable +{ +public: + PhaseRunner(luci::Module *module) : _module{module} + { + // DO NOTHING + } + +public: + void run(const Phase &) const; + +private: + luci::Module *_module; +}; + +template <> +class PhaseRunner final : public logo::PhaseRunnerMixinObservable +{ +public: + PhaseRunner(luci::Module *module) : _module{module} + { + // DO NOTHING + } + +public: + void run(const Phase &) const; + +private: + luci::Module *_module; +}; + +} // namespace luci + +#endif // __MODULE_PHASE_H__ diff --git a/compiler/luci/pass/src/ProgressReporter.cpp b/compiler/luci/pass/src/ProgressReporter.cpp index dcf47ab..515739d 100644 --- a/compiler/luci/pass/src/ProgressReporter.cpp +++ b/compiler/luci/pass/src/ProgressReporter.cpp @@ -81,4 +81,46 @@ void ProgressReporter::notify(const logo::PhaseEventInfo *) +{ + LOGGER(prime); + + INFO(prime) << "=============================================================="; + INFO(prime) << "ModulePhaseRunner<" << to_str(strategy()) << ">"; + INFO(prime) << "Initial graphs"; + for (size_t g = 0; g < module()->size(); ++g) + { + INFO(prime) << "graphs #" << g; + INFO(prime) << luci::fmt(module()->graph(g)); + } +} + +void ModuleProgressReporter::notify(const logo::PhaseEventInfo *) +{ + LOGGER(prime); + + INFO(prime) << "ModulePhaseRunner<" << to_str(strategy()) << "> - done"; +} + +void ModuleProgressReporter::notify(const logo::PhaseEventInfo *info) +{ + LOGGER(prime); + + INFO(prime) << "--------------------------------------------------------------"; + INFO(prime) << "Before " << logo::pass_name(info->pass()); +} + +void ModuleProgressReporter::notify(const logo::PhaseEventInfo *info) +{ + LOGGER(prime); + + INFO(prime) << "After " << logo::pass_name(info->pass()) + << " (changed: " << to_char(info->changed()) << ")"; + for (size_t g = 0; g < module()->size(); ++g) + { + INFO(prime) << "graphs #" << g; + INFO(prime) << luci::fmt(module()->graph(g)); + } +} + } // namespace luci diff --git a/compiler/luci/pass/src/ProgressReporter.h b/compiler/luci/pass/src/ProgressReporter.h index bd2ba98..cf30da7 100644 --- a/compiler/luci/pass/src/ProgressReporter.h +++ b/compiler/luci/pass/src/ProgressReporter.h @@ -21,6 +21,8 @@ #include +#include + namespace luci { @@ -48,6 +50,30 @@ private: logo::PhaseStrategy _strategy; }; +class ModuleProgressReporter : public logo::PhaseEventListener +{ +public: + ModuleProgressReporter(luci::Module *module, logo::PhaseStrategy strategy) + : _module{module}, _strategy{strategy} + { + // DO NOTHING + } + +public: + void notify(const logo::PhaseEventInfo *) override; + void notify(const logo::PhaseEventInfo *) override; + void notify(const logo::PhaseEventInfo *) override; + void notify(const logo::PhaseEventInfo *) override; + +public: + luci::Module *module(void) const { return _module; } + logo::PhaseStrategy strategy(void) const { return _strategy; } + +private: + luci::Module *_module; + logo::PhaseStrategy _strategy; +}; + } // namespace luci #endif // __LUCI_PROGRESSREPORTER_H__ diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp new file mode 100644 index 0000000..af83cd8 --- /dev/null +++ b/compiler/luci/pass/src/PropagateQuantParamPass.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/PropagateQuantParamPass.h" + +#include +#include +#include + +#include + +namespace +{ + +bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst) +{ + assert(src->scale.size() == dst->scale.size()); + assert(src->zerop.size() == dst->zerop.size()); + + // src and dst have the same qparam + if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) && + std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) && + src->quantized_dimension == dst->quantized_dimension) + return false; + + dst->scale.assign(src->scale.begin(), src->scale.end()); + dst->zerop.assign(src->zerop.begin(), src->zerop.end()); + dst->quantized_dimension = src->quantized_dimension; + return true; +} + +bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst) +{ + // Skip nodes that do not have quantparams + auto src_qparam = src->quantparam(); + if (not src_qparam) + return false; + + auto dst_qparam = dst->quantparam(); + if (not dst_qparam) + return false; + + return copy_qparam(src_qparam, dst_qparam); +} + +// Visitor to propagate quantization parameters +struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor +{ + PropagateQuantParam() = default; + + bool visit(luci::CircleNode *) { return false; } + + bool visit(luci::CircleReshape *node) + { + auto input = node->tensor(); + if (loco::succs(input).size() != 1) + return false; + + auto input_node = loco::must_cast(input); + return copy_qparam(node, input_node); + } + + // TODO : Add more Ops (e.g., Transpose) +}; + +} // namespace + +namespace luci +{ + +bool PropagateQuantParamPass::run(loco::Graph *g) +{ + bool changed = false; + LOGGER(l); + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast(node); + INFO(l) << "PropagateQuantParamPass visit node: " << circle_node->name() << std::endl; + + PropagateQuantParam pqp; + changed = circle_node->accept(&pqp); + if (changed) + break; + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp new file mode 100644 index 0000000..15adbfc --- /dev/null +++ b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/PropagateQuantParamPass.h" + +#include + +#include + +namespace +{ + +void addQuantParam(luci::CircleNode *node, const std::vector &scale, + const std::vector &zp) +{ + assert(node->quantparam() == nullptr); + + auto quantparam = std::make_unique(); + quantparam->scale = scale; + quantparam->zerop = zp; + node->quantparam(std::move(quantparam)); +} + +/** + * Simple graph for test + * + * BEFORE + * + * [Conv] (qparam 1) + * | + * [Reshape] (qparam 2) + * + * AFTER + * + * [Conv] (qparam 2) + * | + * [Reshape] (qparam 2) + * + */ +class SimpleGraph +{ +public: + SimpleGraph() + { + input = g.nodes()->create(); + conv = g.nodes()->create(); + reshape = g.nodes()->create(); + output = g.nodes()->create(); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20}); + addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10}); + + conv->input(input); + reshape->tensor(conv); + output->from(reshape); + } + +public: + loco::Graph g; + luci::CircleInput *input; + luci::CircleConv2D *conv; + luci::CircleReshape *reshape; + luci::CircleOutput *output; +}; + +} // namespace + +TEST(PropagateQuantParam, simple) +{ + SimpleGraph g; + + luci::PropagateQuantParamPass pass; + while (pass.run(&g.g)) + ; + + EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[0]); + EXPECT_FLOAT_EQ(0.4, g.conv->quantparam()->scale[1]); + EXPECT_FLOAT_EQ(0.6, g.conv->quantparam()->scale[2]); + EXPECT_EQ(-10, g.conv->quantparam()->zerop[0]); + EXPECT_EQ(0, g.conv->quantparam()->zerop[1]); + EXPECT_EQ(10, g.conv->quantparam()->zerop[2]); +} + +TEST(PropagateQuantParam, wrong_op_NEG) +{ + SimpleGraph g; + g.output->from(g.conv); + g.reshape->drop(); + + luci::PropagateQuantParamPass pass; + while (pass.run(&g.g)) + ; + + EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]); + EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]); + EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]); + EXPECT_EQ(0, g.conv->quantparam()->zerop[0]); + EXPECT_EQ(10, g.conv->quantparam()->zerop[1]); + EXPECT_EQ(20, g.conv->quantparam()->zerop[2]); +} diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp index 0ecab00..f6eebe3 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp @@ -86,6 +86,100 @@ void quant_const_values(luci::CircleConst *const_node, float scaling_factor, flo } } +// Quantize const per channel +// +// The last dimension of const is the same as the dimension of channel +// And the rest of the const dimensions should be 1 +// So, a 'single value' is quantized per channel +// +// Quantization spec (f: fp value, q: quantized value) +// +// uint8 +// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0] +// Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1] +// +// int16 +// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0] +// Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0] +void quant_const_per_channel(CircleConst *node, loco::DataType quant_type) +{ + assert(node->dtype() == loco::DataType::FLOAT32); + assert(node->rank() > 0); + + for (uint32_t i = 0; i < node->rank() - 1; i++) + { + // Caller should call this function when the below condition is satisfied + if (node->dim(i).value() != 1) + throw std::runtime_error("Non-channel dimension of const node must be 1"); + } + + uint32_t size = node->size(); + assert(size == node->dim(node->rank() - 1).value()); + + auto quantparam = std::make_unique(); + quantparam->quantized_dimension = node->rank() - 1; + std::vector quantized_data(size); + + for (uint32_t i = 0; i < size; ++i) + { + auto data = node->at(i); + if (quant_type == loco::DataType::U8) + { + if (data >= 0) + { + quantparam->scale.push_back(data); + quantparam->zerop.push_back(0); + quantized_data[i] = 1; + } + else + { + quantparam->scale.push_back(-data); + quantparam->zerop.push_back(1); + quantized_data[i] = 0; + } + } + else if (quant_type == loco::DataType::S16) + { + if (data >= 0) + { + quantparam->scale.push_back(data); + quantized_data[i] = 1; + } + else + { + quantparam->scale.push_back(-data); + quantized_data[i] = -1; + } + quantparam->zerop.push_back(0); + } + } + node->quantparam(std::move(quantparam)); + + switch (quant_type) + { + case loco::DataType::U8: + node->dtype(loco::DataType::U8); + node->size(size); + for (uint32_t i = 0; i < size; ++i) + { + assert(quantized_data[i] == 0 || quantized_data[i] == 1); + node->at(i) = quantized_data[i]; + } + break; + case loco::DataType::S16: + node->dtype(loco::DataType::S16); + node->size(size); + for (uint32_t i = 0; i < size; ++i) + { + assert(quantized_data[i] == -1 || quantized_data[i] == 1); + node->at(i) = quantized_data[i]; + } + break; + default: + throw std::runtime_error("Unsupported data type"); + } +} + void quant_const(CircleConst *node, loco::DataType quant_type) { assert(node->dtype() == loco::DataType::FLOAT32); @@ -612,10 +706,51 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor } }; +void quant_instnorm(luci::CircleInstanceNorm *node, loco::DataType output_type, + QuantizationGranularity granularity) +{ + auto gamma = loco::must_cast(node->gamma()); + auto beta = loco::must_cast(node->beta()); + assert(gamma->dtype() == loco::DataType::FLOAT32); + assert(beta->dtype() == loco::DataType::FLOAT32); + + if (granularity == QuantizationGranularity::LayerWise) + { + quant_const(gamma, output_type); + quant_const(beta, output_type); + } + else if (granularity == QuantizationGranularity::ChannelWise) + { + quant_const_per_channel(gamma, output_type); + quant_const_per_channel(beta, output_type); + } + else + throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'"); +} + +void quant_prelu(luci::CirclePRelu *node, loco::DataType output_type, + QuantizationGranularity granularity) +{ + auto alpha = loco::must_cast(node->alpha()); + assert(alpha->dtype() == loco::DataType::FLOAT32); + + if (granularity == QuantizationGranularity::LayerWise) + { + quant_const(alpha, output_type); + } + else if (granularity == QuantizationGranularity::ChannelWise) + { + quant_const_per_channel(alpha, output_type); + } + else + throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'"); +} + /** * @brief Quantize const input tensors using min/max of const values */ -void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type) +void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type, + QuantizationGranularity granularity) { auto opcode = node->opcode(); auto arity = node->arity(); @@ -660,20 +795,26 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type) quant_const(const_node, output_type); break; + case luci::CircleOpcode::INSTANCE_NORM: + quant_instnorm(loco::must_cast(node), output_type, granularity); + break; + + case luci::CircleOpcode::PRELU: + quant_prelu(loco::must_cast(node), output_type, granularity); + break; + case luci::CircleOpcode::ADD: case luci::CircleOpcode::ADD_N: case luci::CircleOpcode::DIV: case luci::CircleOpcode::EQUAL: case luci::CircleOpcode::GREATER: case luci::CircleOpcode::GREATER_EQUAL: - case luci::CircleOpcode::INSTANCE_NORM: case luci::CircleOpcode::LESS: case luci::CircleOpcode::LESS_EQUAL: case luci::CircleOpcode::MAXIMUM: case luci::CircleOpcode::MINIMUM: case luci::CircleOpcode::MUL: case luci::CircleOpcode::NOT_EQUAL: - case luci::CircleOpcode::PRELU: case luci::CircleOpcode::SUB: // Quantize all const inputs using their values for (uint32_t i = 0; i < arity; i++) @@ -817,7 +958,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) for (auto node : loco::active_nodes(loco::output_nodes(g))) { auto circle_node = loco::must_cast(node); - quantize_const_inputs(circle_node, _output_dtype); + quantize_const_inputs(circle_node, _output_dtype, _granularity); } // Propagate quantization parameters of concat Op diff --git a/compiler/luci/pass/src/RemoveRedundantTranspose.cpp b/compiler/luci/pass/src/RemoveRedundantTranspose.cpp new file mode 100644 index 0000000..33cb765 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantTranspose.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveRedundantTransposePass.h" + +#include + +namespace +{ + +/// @brief Return true if first_perm[second_perm[i]] == i +bool check_perm(const luci::CircleConst *first_perm, const luci::CircleConst *second_perm) +{ + assert(first_perm->rank() == 1); + assert(second_perm->rank() == 1); + assert(second_perm->size() == first_perm->size()); + for (int32_t i = 0; i < static_cast(first_perm->size()); i++) + { + if (first_perm->at(second_perm->at(i)) != i) + return false; + } + return true; +} + +bool remove_consecutive_transpose_function(luci::CircleNode *node) +{ + auto target_node = dynamic_cast(node); + if (target_node == nullptr) + return false; + auto pred_node = dynamic_cast(target_node->a()); + if (pred_node == nullptr) + return false; + if (loco::succs(pred_node).size() != 1) + return false; + + auto pred_perm = dynamic_cast(target_node->perm()); + if (pred_perm == nullptr) + return false; + + auto main_perm = dynamic_cast(pred_node->perm()); + if (main_perm == nullptr) + return false; + + auto main_node = loco::must_cast(pred_node->a()); + if (check_perm(pred_perm, main_perm)) + { + replace(node).with(main_node); + } + else + { + auto g = main_perm->graph(); + auto new_const_node = g->nodes()->create(); + + new_const_node->dtype(loco::DataType::S32); + new_const_node->rank(1); + new_const_node->dim(0) = main_perm->dim(0); + new_const_node->size(main_perm->dim(0).value()); + new_const_node->shape_status(luci::ShapeStatus::VALID); + for (uint32_t i = 0; i < main_perm->size(); i++) + { + new_const_node->at(i) = + pred_perm->at(main_perm->at(i)); + } + pred_node->perm(new_const_node); + replace(node).with(pred_node); + } + return true; +} + +} // namespace + +namespace luci +{ +/** + * BEFORE + * | + * [CircleNode] [CircleConst] + * (main_node) (main_perm) + * \ / + * [CircleTranspose] [CircleConst] + * (pred_node) (pred_perm) + * \ / + * [CircleTranspose] + * (target_node) + * | + * + * AFTER + * + * + * | | | + * [CircleNode] [CircleConst] | + * (main_node) (new_const_node) | + * \ / or [CircleNode] + * [CircleTranspose] (main_node) + * (pred_node) | + * | | + * + */ +bool RemoveRedundantTransposePass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast(node); + if (remove_consecutive_transpose_function(circle_node)) + { + changed = true; + break; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp b/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp new file mode 100644 index 0000000..db608b6 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantTranspose.test.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci/Pass/RemoveRedundantTransposePass.h" + +#include + +#include + +#include + +namespace +{ + +void setValue(luci::CircleConst *node, const std::vector &v) +{ + node->dtype(loco::DataType::S32); + node->size(v.size()); + node->rank(1); + node->dim(0).set(v.size()); + for (int i = 0; i < v.size(); ++i) + { + node->at(i) = v[i]; + } +} + +/** + * Type1 + * BEFORE + * | + * [CircleNode] [CircleConst] + * \ / + * [CircleTranspose] [CircleConst] + * \ / + * [CircleTranspose] + * | + * + * AFTER + * | + * [CircleNode] + * | Remove Both + * + * -------------------------------------------- + * + * Type2 + * BEFORE + * | + * [CircleNode] [CircleConst] + * \ / + * [CircleTranspose] [CircleConst] + * \ / + * [CircleTranspose] + * | + * + * AFTER + * | | + * [CircleNode] [CircleConst] + * \ / + * [CircleTranspose] + * | + * + */ +void create_redundunt_transpose(loco::Graph *g, const std::vector &perm1, + const std::vector &perm2) +{ + assert(g); + + auto input = g->nodes()->create(); + auto graph_input = g->inputs()->create(); + input->index(graph_input->index()); + + // Create perm1 + auto perm1_node = g->nodes()->create(); + setValue(perm1_node, perm1); + + auto transpose1 = g->nodes()->create(); + transpose1->dtype(loco::DataType::FLOAT32); + transpose1->a(input); + transpose1->perm(perm1_node); + + // Create perm2 + auto perm2_node = g->nodes()->create(); + setValue(perm2_node, perm2); + + auto transpose2 = g->nodes()->create(); + transpose2->dtype(loco::DataType::FLOAT32); + transpose2->a(transpose1); + transpose2->perm(perm2_node); + + // Output + auto output = g->nodes()->create(); + output->from(transpose2); + auto graph_output = g->outputs()->create(); + output->index(graph_output->index()); +} + +} // namespace + +TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type1) +{ + auto graph = loco::make_graph(); + create_redundunt_transpose(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3}); + + luci::RemoveRedundantTransposePass pass; + while (pass.run(graph.get())) + ; + luci::CircleTranspose *transpose_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + auto trans = dynamic_cast(node); + if (not trans) + continue; + transpose_node = trans; + break; + } + // No transpose node is in graph. + ASSERT_EQ(nullptr, transpose_node); +} + +TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2) +{ + auto graph = loco::make_graph(); + create_redundunt_transpose(graph.get(), {0, 1, 3, 2}, {1, 0, 2, 3}); + + luci::RemoveRedundantTransposePass pass; + while (pass.run(graph.get())) + ; + luci::CircleTranspose *transpose_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + auto trans = dynamic_cast(node); + if (not trans) + continue; + transpose_node = trans; + break; + } + // Just one transpose node, with updated perm constant. + ASSERT_NE(nullptr, transpose_node); + auto perm = loco::must_cast(transpose_node->perm()); + ASSERT_EQ(1, perm->at(0)); + ASSERT_EQ(0, perm->at(1)); + ASSERT_EQ(3, perm->at(2)); + ASSERT_EQ(2, perm->at(3)); +} diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp new file mode 100644 index 0000000..7096c25 --- /dev/null +++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h" + +#include + +namespace +{ + +luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma) +{ + assert(gamma->rank() == 1); + auto channel_size = gamma->dim(0).value(); + + // Channel-wise MUL is the same as DEPTHWISE_CONV2D with filter shape (1,1,1,channel_size) + auto weights = gamma->graph()->nodes()->create(); + weights->dtype(loco::DataType::FLOAT32); + weights->rank(4); + weights->dim(0).set(1); + weights->dim(1).set(1); + weights->dim(2).set(1); + weights->dim(3).set(channel_size); + weights->shape_status(luci::ShapeStatus::VALID); + weights->size(channel_size); + for (uint32_t i = 0; i < channel_size; i++) + { + weights->at(i) = gamma->at(i); + } + + return weights; +} + +luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta) +{ + assert(beta->rank() == 1); + auto channel_size = beta->dim(0).value(); + + // Channel-wise ADD is the same as bias (shape = (channel_size)) of DEPTHWISE_CONV2D + auto bias = beta->graph()->nodes()->create(); + bias->dtype(loco::DataType::FLOAT32); + bias->rank(1); + bias->dim(0).set(channel_size); + bias->size(channel_size); + bias->shape_status(luci::ShapeStatus::VALID); + for (uint32_t i = 0; i < channel_size; i++) + { + bias->at(i) = beta->at(i); + } + + return bias; +} + +bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta) +{ + auto x = loco::must_cast(add->x()); + auto y = loco::must_cast(add->y()); + + luci::CircleMul *pred = nullptr; + luci::CircleConst *constant = nullptr; + + if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL) + { + pred = loco::must_cast(y); + constant = loco::must_cast(x); + } + else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST) + { + pred = loco::must_cast(x); + constant = loco::must_cast(y); + } + else + { + return false; + } + + if (constant->rank() != 1) + return false; + + auto channel_dim = constant->dim(0); + // Assumption: Layout is channel-last + if (!(channel_dim == add->dim(add->rank() - 1))) + return false; + + mul = pred; + beta = constant; + return true; +} + +// Check if mul is batchnorm mul +bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node, + luci::CircleConst *&gamma) +{ + auto x = dynamic_cast(mul->x()); + auto y = dynamic_cast(mul->y()); + + luci::CircleNode *pred = nullptr; + luci::CircleConst *constant = nullptr; + + if (x != nullptr && y == nullptr) + { + pred = loco::must_cast(mul->y()); + constant = x; + } + else if (x == nullptr && y != nullptr) + { + pred = loco::must_cast(mul->x()); + constant = y; + } + else + { + return false; + } + + if (constant->rank() != 1) + return false; + + auto channel_dim = constant->dim(0); + if (!(channel_dim == mul->dim(mul->rank() - 1))) + return false; + + pred_node = pred; + gamma = constant; + return true; +} + +/** + * Replace channel-wise Mul/Add with DepthwiseConv2D + * + * BEFORE + * + * [Node] [gamma] + * | / + * [Mul] [beta] + * | / + * [Add] + * + * AFTER + * + * [Node] [weights] [bias] + * \ / / + * [DepthwiseConv2D] + */ +bool replace_mul_add_with_dwconv(luci::CircleAdd *add) +{ + luci::CircleNode *pred_node = nullptr; + luci::CircleMul *mul = nullptr; + luci::CircleConst *beta = nullptr; + luci::CircleConst *gamma = nullptr; + + if (!is_batchnorm_add(add, mul, beta)) + return false; + + if (loco::succs(mul).size() != 1) + return false; + + if (!is_batchnorm_mul(mul, pred_node, gamma)) + return false; + + if (pred_node->rank() != 4) + return false; + + if (pred_node->dtype() != loco::DataType::FLOAT32 || beta->dtype() != loco::DataType::FLOAT32 || + gamma->dtype() != loco::DataType::FLOAT32) + return false; + + auto weights = create_weights_from_gamma(gamma); + auto bias = create_bias_from_beta(beta); + + auto dwconv = add->graph()->nodes()->create(); + dwconv->input(pred_node); + dwconv->filter(weights); + dwconv->bias(bias); + dwconv->padding(luci::Padding::SAME); + dwconv->stride()->w(1); + dwconv->stride()->h(1); + dwconv->depthMultiplier(1); + dwconv->dilation()->w(1); + dwconv->dilation()->h(1); + dwconv->fusedActivationFunction(add->fusedActivationFunction()); + + loco::replace(add).with(dwconv); + return true; +} + +} // namespace + +namespace luci +{ + +bool ReplaceMulAddWithDepthwiseConvPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto add = dynamic_cast(node); + if (not add) + continue; + + if (replace_mul_add_with_dwconv(add)) + { + changed = true; + break; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp new file mode 100644 index 0000000..a90182a --- /dev/null +++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h" + +#include + +#include + +namespace +{ + +/** + * Simple graph for test + * + * BEFORE + * + * [Node] [gamma] + * | / + * [Mul] [beta] + * | / + * [Add] + * + * AFTER + * + * [Node] [weights] [bias] + * \ / / + * [DepthwiseConv2D] + */ +class SimpleGraph +{ +public: + SimpleGraph() + { + input = g.nodes()->create(); + mul = g.nodes()->create(); + gamma = g.nodes()->create(); + add = g.nodes()->create(); + beta = g.nodes()->create(); + output = g.nodes()->create(); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + input->dtype(loco::DataType::FLOAT32); + mul->dtype(loco::DataType::FLOAT32); + gamma->dtype(loco::DataType::FLOAT32); + add->dtype(loco::DataType::FLOAT32); + beta->dtype(loco::DataType::FLOAT32); + output->dtype(loco::DataType::FLOAT32); + + uint32_t channel_size = 16; + input->shape({1, 4, 4, channel_size}); + mul->shape({1, 4, 4, channel_size}); + gamma->shape({channel_size}); + add->shape({1, 4, 4, channel_size}); + beta->shape({channel_size}); + output->shape({1, 4, 4, channel_size}); + + gamma->size(channel_size); + beta->size(channel_size); + for (uint32_t i = 0; i < channel_size; i++) + { + gamma->at(i) = i; + beta->at(i) = i; + } + + mul->x(input); + mul->y(gamma); + add->x(mul); + add->y(beta); + output->from(add); + } + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleMul *mul = nullptr; + luci::CircleConst *gamma = nullptr; + luci::CircleAdd *add = nullptr; + luci::CircleConst *beta = nullptr; + luci::CircleOutput *output = nullptr; +}; + +} // namespace + +TEST(ReplaceMulAddWithDepthwiseConv, simple) +{ + SimpleGraph g; + + luci::ReplaceMulAddWithDepthwiseConvPass pass; + while (pass.run(&g.g)) + ; + + auto dwconv = dynamic_cast(g.output->from()); + EXPECT_NE(nullptr, dwconv); + + uint32_t channel_size = 16; + auto weights = dynamic_cast(dwconv->filter()); + auto bias = dynamic_cast(dwconv->bias()); + EXPECT_NE(nullptr, weights); + EXPECT_EQ(4, weights->rank()); + EXPECT_EQ(channel_size, weights->dim(3).value()); + EXPECT_NE(nullptr, bias); + EXPECT_EQ(1, bias->rank()); + EXPECT_EQ(channel_size, bias->dim(0).value()); + + for (int i = 0; i < channel_size; i++) + { + EXPECT_FLOAT_EQ(i, weights->at(i)); + EXPECT_FLOAT_EQ(i, bias->at(i)); + } +} + +TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG) +{ + SimpleGraph g; + // swap mul/add (changed to add->mul) + g.add->x(g.input); + loco::replace(g.add).with(g.mul); + g.mul->x(g.add); + + luci::ReplaceMulAddWithDepthwiseConvPass pass; + auto changed = pass.run(&g.g); + + EXPECT_EQ(false, changed); +} diff --git a/compiler/luci/pass/src/ShapeInferencePass.cpp b/compiler/luci/pass/src/ShapeInferencePass.cpp index f681b3d..4bd0aae 100644 --- a/compiler/luci/pass/src/ShapeInferencePass.cpp +++ b/compiler/luci/pass/src/ShapeInferencePass.cpp @@ -28,6 +28,19 @@ namespace luci { +bool ShapeInferencePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + bool ShapeInferencePass::run(loco::Graph *g) { loco::CanonicalShapeInferenceRule canonical_rule; diff --git a/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp b/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp new file mode 100644 index 0000000..115b77a --- /dev/null +++ b/compiler/luci/pass/src/ShapeSignatureInferencePass.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ShapeSignatureInferencePass.h" + +#include +#include + +#include + +namespace luci +{ + +bool ShapeSignatureInferencePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + +bool ShapeSignatureInferencePass::run(loco::Graph *g) +{ + luci::ssinf::Rule signature_inference_rule; + bool changed = false; + + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) + { + luci::ShapeSignature shape_signature; + + auto circle_node = loco::must_cast(node); + if (signature_inference_rule.infer(circle_node, shape_signature)) + { + if (!(circle_node->shape_signature() == shape_signature)) + { + circle_node->shape_signature(shape_signature); + changed = true; + } + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp new file mode 100644 index 0000000..6a58f18 --- /dev/null +++ b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" + +#include + +#include +#include + +namespace +{ + +bool satisfy_precondition(luci::CircleFullyConnected *fc) +{ + // check if it's already been shuffled + if (fc->weights_format() != luci::CircleFullyConnected::WeightsFormat::DEFAULT) + return false; + + // check if its data type is FLOAT32 + if (fc->dtype() != loco::DataType::FLOAT32) + return false; + + auto weights = loco::must_cast(fc->weights()); + // rank must be 2 + if (weights->rank() != 2) + return false; + + // check if it has sparsity parameter + if (weights->sparsityparam()) + return false; + + // check if the number of row of FullyConnected's weight is a multiple of 16 + const uint32_t MULTIPLE = 16; + uint32_t rows = weights->dim(0).value(); + if (rows % MULTIPLE) + return false; + + return true; +} + +// get FullyConnected op vector that has same tensor +void get_FCs_having_same_tensor(std::vector &fc_vec, loco::Graph *g, + luci::CircleFullyConnected *fc) +{ + auto the_tensor = fc->weights(); + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto fc = dynamic_cast(node); + if (not fc) + continue; + + if (fc->weights() == the_tensor) + fc_vec.push_back(fc); + } +} + +luci::CircleConst *shuffle_weight(luci::CircleFullyConnected *fc) +{ + auto the_weights = loco::must_cast(fc->weights()); + + // create CircleConst where shuffled data will be stored + luci::CircleConst *new_weights = fc->graph()->nodes()->create(); + new_weights->dtype(loco::DataType::FLOAT32); + new_weights->size(the_weights->size()); + new_weights->rank(the_weights->rank()); + new_weights->shape_status(the_weights->shape_status()); + for (uint32_t r = 0; r < new_weights->rank(); r++) + { + new_weights->dim(r).set(the_weights->dim(r).value()); + } + + // suffle weight + const uint32_t MULTIPLE = 16; + const uint32_t rows = the_weights->dim(0).value(); + const uint32_t cols = the_weights->dim(1).value(); + const uint32_t r_step = rows / MULTIPLE; + uint32_t index = 0; + for (uint32_t r = 0; r < r_step; r++) + { + for (uint32_t c = 0; c < cols; c++) + { + for (uint32_t i = 0; i < MULTIPLE; i++) + { + new_weights->at(index++) = + the_weights->at((r * MULTIPLE + i) * cols + c); + } + } + } + + return new_weights; +} + +} // namespace + +namespace luci +{ + +bool ShuffleWeightTo16x1Float32Pass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto fc = dynamic_cast(node); + if (not fc) + continue; + + if (not satisfy_precondition(fc)) + continue; + + std::vector fc_vec; + get_FCs_having_same_tensor(fc_vec, g, fc); + auto new_weights = shuffle_weight(fc); + + // replace to new weights + for (const auto fc : fc_vec) + { + fc->weights(new_weights); + fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32); + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp new file mode 100644 index 0000000..9745e57 --- /dev/null +++ b/compiler/luci/pass/src/ShuffleWeightTo16x1Float32Pass.test.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" + +#include + +#include + +void create_fc_net(loco::Graph *g) +{ + assert(g); + + const uint32_t ROW = 16; + const uint32_t COL = 2; + const uint32_t elements_num = ROW * COL; + + // input + auto input = g->nodes()->create(); + auto graph_input = g->inputs()->create(); + input->index(graph_input->index()); + + // fc weights + auto weights = g->nodes()->create(); + weights->dtype(loco::DataType::FLOAT32); + weights->size(elements_num); + weights->rank(2); + weights->dim(0).set(ROW); + weights->dim(1).set(COL); + for (uint32_t idx = 0; idx < elements_num; idx++) + { + weights->at(idx) = idx; + } + + // fc + auto fc = g->nodes()->create(); + fc->dtype(loco::DataType::FLOAT32); + fc->input(input); + fc->weights(weights); + + // output + auto output = g->nodes()->create(); + output->from(fc); + auto graph_output = g->outputs()->create(); + output->index(graph_output->index()); +} + +TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1) +{ + auto graph = loco::make_graph(); + create_fc_net(graph.get()); + + luci::CircleFullyConnected *fc_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + auto fc = dynamic_cast(node); + if (not fc) + continue; + + fc_node = fc; + break; + } + ASSERT_NE(fc_node, nullptr); + auto weights = loco::must_cast(fc_node->weights()); + // before + ASSERT_EQ(0, weights->at(0)); + ASSERT_EQ(1, weights->at(1)); + ASSERT_EQ(2, weights->at(2)); + ASSERT_EQ(3, weights->at(3)); + ASSERT_EQ(4, weights->at(4)); + ASSERT_EQ(5, weights->at(5)); + ASSERT_EQ(6, weights->at(6)); + ASSERT_EQ(7, weights->at(7)); + ASSERT_EQ(8, weights->at(8)); + ASSERT_EQ(9, weights->at(9)); + ASSERT_EQ(10, weights->at(10)); + ASSERT_EQ(11, weights->at(11)); + ASSERT_EQ(12, weights->at(12)); + ASSERT_EQ(13, weights->at(13)); + ASSERT_EQ(14, weights->at(14)); + ASSERT_EQ(15, weights->at(15)); + + luci::ShuffleWeightTo16x1Float32Pass pass; + while (pass.run(graph.get())) + ; + + weights = loco::must_cast(fc_node->weights()); + // after + ASSERT_EQ(0, weights->at(0)); + ASSERT_EQ(2, weights->at(1)); + ASSERT_EQ(4, weights->at(2)); + ASSERT_EQ(6, weights->at(3)); + ASSERT_EQ(8, weights->at(4)); + ASSERT_EQ(10, weights->at(5)); + ASSERT_EQ(12, weights->at(6)); + ASSERT_EQ(14, weights->at(7)); + ASSERT_EQ(16, weights->at(8)); + ASSERT_EQ(18, weights->at(9)); + ASSERT_EQ(20, weights->at(10)); + ASSERT_EQ(22, weights->at(11)); + ASSERT_EQ(24, weights->at(12)); + ASSERT_EQ(26, weights->at(13)); + ASSERT_EQ(28, weights->at(14)); + ASSERT_EQ(30, weights->at(15)); +} diff --git a/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp b/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp new file mode 100644 index 0000000..44e974b --- /dev/null +++ b/compiler/luci/pass/src/SubstitutePackToReshapePass.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/SubstitutePackToReshapePass.h" + +#include + +namespace +{ + +bool substitute_pack_to_reshape(luci::CircleNode *node) +{ + auto target_node = dynamic_cast(node); + if (target_node == nullptr) + return false; + if (target_node->values_count() != 1) + return false; + auto value_node = loco::must_cast(target_node->values(0)); + if (value_node->shape_status() != luci::ShapeStatus::VALID) + return false; + int32_t axis = target_node->axis(); + if (axis < 0) + axis = axis + static_cast(value_node->rank()) + 1; + + auto graph = target_node->graph(); + auto reshape_node = graph->nodes()->create(); + reshape_node->tensor(value_node); + + auto const_node = graph->nodes()->create(); + const_node->dtype(loco::DataType::S32); + const_node->size(value_node->rank() + 1); + const_node->shape_status(luci::ShapeStatus::VALID); + const_node->rank(1); + const_node->dim(0).set(value_node->rank() + 1); + for (int32_t i = 0; i < static_cast(value_node->rank()) + 1; i++) + { + if (i == axis) + { + const_node->at(i) = 1; + } + else if (i < axis) + { + const_node->at(i) = value_node->dim(i).value(); + } + else + { + const_node->at(i) = value_node->dim(i - 1).value(); + } + } + reshape_node->shape(const_node); + replace(target_node).with(reshape_node); + return true; +} + +} // namespace + +namespace luci +{ + +/** + * BEFORE + * | + * [CircleNode] + * | + * [CirclePack] + * | + * [CircleNode] + * | + * + * AFTER + * | + * [CircleNode] [CircleConst] + * \ / + * [CircleReshape] + * | + * [CircleNode] + * | + * + */ +bool SubstitutePackToReshapePass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast(node); + if (substitute_pack_to_reshape(circle_node)) + { + changed = true; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp b/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp new file mode 100644 index 0000000..143b888 --- /dev/null +++ b/compiler/luci/pass/src/SubstitutePackToReshapePass.test.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci/Pass/SubstitutePackToReshapePass.h" + +#include + +#include + +namespace +{ + +/** + * BEFORE + * | + * [CircleNode] + * | + * [CirclePack] + * | + * [CircleNode] + * | + * + * AFTER + * | + * [CircleNode] [CircleConst] + * \ / + * [CircleReshape] + * | + * [CircleNode] + * | + * + */ +void create_substitute_pack_to_reshape(loco::Graph *g, const std::initializer_list shape, + int32_t axis) +{ + assert(g); + + // Input Create. + auto input = g->nodes()->create(); + auto graph_input = g->inputs()->create(); + input->index(graph_input->index()); + input->shape_status(luci::ShapeStatus::VALID); + input->rank(shape.size()); + input->shape(shape); + + // Pack Node create. + auto pack = g->nodes()->create(1); + pack->values(0, input); + pack->axis(axis); + + // Output Connect. + auto output = g->nodes()->create(); + output->from(pack); + auto graph_output = g->outputs()->create(); + output->index(graph_output->index()); + + return; +} + +} // namespace + +TEST(SubstitutePackToReshapePass, simple_case) +{ + auto graph = loco::make_graph(); + create_substitute_pack_to_reshape(graph.get(), {1, 2, 3, 4}, 0); + luci::SubstitutePackToReshapePass pass; + while (pass.run(graph.get())) + ; + luci::CircleReshape *reshape_node = nullptr; + luci::CirclePack *pack_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + if (auto reshape = dynamic_cast(node)) + reshape_node = reshape; + else if (auto pack = dynamic_cast(node)) + pack_node = pack; + } + ASSERT_NE(nullptr, reshape_node); + ASSERT_EQ(nullptr, pack_node); + auto new_shape = loco::must_cast(reshape_node->shape()); + ASSERT_EQ(1, new_shape->at(0)); + ASSERT_EQ(1, new_shape->at(1)); + ASSERT_EQ(2, new_shape->at(2)); + ASSERT_EQ(3, new_shape->at(3)); + ASSERT_EQ(4, new_shape->at(4)); +} + +TEST(SubstitutePackToReshapePass, simple_case_neg_axis) +{ + auto graph = loco::make_graph(); + create_substitute_pack_to_reshape(graph.get(), {1, 2, 3, 4}, -1); + luci::SubstitutePackToReshapePass pass; + while (pass.run(graph.get())) + ; + luci::CircleReshape *reshape_node = nullptr; + luci::CirclePack *pack_node = nullptr; + for (auto node : loco::active_nodes(loco::output_nodes(graph.get()))) + { + if (auto reshape = dynamic_cast(node)) + reshape_node = reshape; + else if (auto pack = dynamic_cast(node)) + pack_node = pack; + } + ASSERT_NE(nullptr, reshape_node); + ASSERT_EQ(nullptr, pack_node); + auto new_shape = loco::must_cast(reshape_node->shape()); + ASSERT_EQ(1, new_shape->at(0)); + ASSERT_EQ(2, new_shape->at(1)); + ASSERT_EQ(3, new_shape->at(2)); + ASSERT_EQ(4, new_shape->at(3)); + ASSERT_EQ(1, new_shape->at(4)); +} diff --git a/compiler/luci/pass/src/TypeInferencePass.cpp b/compiler/luci/pass/src/TypeInferencePass.cpp index 2c7b3a8..6374404 100644 --- a/compiler/luci/pass/src/TypeInferencePass.cpp +++ b/compiler/luci/pass/src/TypeInferencePass.cpp @@ -26,6 +26,19 @@ namespace luci { +bool TypeInferencePass::run(luci::Module *m) +{ + bool changed = false; + + for (size_t g = 0; g < m->size(); ++g) + { + if (run(m->graph(g))) + changed = true; + } + + return changed; +} + bool TypeInferencePass::run(loco::Graph *g) { loco::CanonicalTypeInferenceRule canonical_rule; diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h index fb934c2..c301db5 100644 --- a/compiler/luci/service/include/luci/Service/CircleShapeInference.h +++ b/compiler/luci/service/include/luci/Service/CircleShapeInference.h @@ -21,6 +21,10 @@ #include +#include +#include +#include + namespace luci { @@ -36,6 +40,155 @@ struct ShapeInference static ShapeDescription get(loco::Node *node); }; +namespace sinf // namespace for Shape Inference +{ + +struct Rule +{ + bool infer(const luci::CircleNode *, loco::TensorShape &) const; +}; + +class Algorithm final : public luci::CircleNodeVisitor +{ +public: + // TODO Remove this when all of visit function is implemented + loco::TensorShape visit(const luci::CircleNode *node) final { return sinf::circle_shape(node); } + + // loco::TensorShape visit(const luci::CircleAbs *node) final; + // loco::TensorShape visit(const luci::CircleAdd *node) final; + // loco::TensorShape visit(const luci::CircleAddN *node) final; + // loco::TensorShape visit(const luci::CircleArgMax *node) final; + // loco::TensorShape visit(const luci::CircleArgMin *node) final; + // loco::TensorShape visit(const luci::CircleAveragePool2D *node) final; + // loco::TensorShape visit(const luci::CircleBatchMatMul *node) final; + // loco::TensorShape visit(const luci::CircleBatchToSpaceND *node) final; + // loco::TensorShape visit(const luci::CircleCast *node) final; + // loco::TensorShape visit(const luci::CircleCeil *node) final; + // loco::TensorShape visit(const luci::CircleConcatenation *node) final; + // loco::TensorShape visit(const luci::CircleConst *node) final; + // loco::TensorShape visit(const luci::CircleConv2D *node) final; + // loco::TensorShape visit(const luci::CircleCos *node) final; + // loco::TensorShape visit(const luci::CircleCustom *node) final; + // loco::TensorShape visit(const luci::CircleDepthToSpace *node) final; + // loco::TensorShape visit(const luci::CircleDepthwiseConv2D *node) final; + // loco::TensorShape visit(const luci::CircleDequantize *node) final; + // loco::TensorShape visit(const luci::CircleDiv *node) final; + // loco::TensorShape visit(const luci::CircleElu *node) final; + // loco::TensorShape visit(const luci::CircleEqual *node) final; + // loco::TensorShape visit(const luci::CircleExp *node) final; + // loco::TensorShape visit(const luci::CircleExpandDims *node) final; + // loco::TensorShape visit(const luci::CircleFill *node) final; + // loco::TensorShape visit(const luci::CircleFloor *node) final; + // loco::TensorShape visit(const luci::CircleFloorDiv *node) final; + // loco::TensorShape visit(const luci::CircleFloorMod *node) final; + // loco::TensorShape visit(const luci::CircleFullyConnected *node) final; + // loco::TensorShape visit(const luci::CircleGather *node) final; + // loco::TensorShape visit(const luci::CircleGatherNd *node) final; + // loco::TensorShape visit(const luci::CircleGreater *node) final; + // loco::TensorShape visit(const luci::CircleGreaterEqual *node) final; + // loco::TensorShape visit(const luci::CircleIf *node) final; + // loco::TensorShape visit(const luci::CircleL2Normalize *node) final; + // loco::TensorShape visit(const luci::CircleL2Pool2D *node) final; + // loco::TensorShape visit(const luci::CircleLeakyRelu *node) final; + // loco::TensorShape visit(const luci::CircleLess *node) final; + // loco::TensorShape visit(const luci::CircleLessEqual *node) final; + // loco::TensorShape visit(const luci::CircleLocalResponseNormalization *node) final; + // loco::TensorShape visit(const luci::CircleLog *node) final; + // loco::TensorShape visit(const luci::CircleLogicalAnd *node) final; + // loco::TensorShape visit(const luci::CircleLogicalNot *node) final; + // loco::TensorShape visit(const luci::CircleLogicalOr *node) final; + // loco::TensorShape visit(const luci::CircleLogistic *node) final; + // loco::TensorShape visit(const luci::CircleLogSoftmax *node) final; + // loco::TensorShape visit(const luci::CircleMatrixDiag *node) final; + // loco::TensorShape visit(const luci::CircleMatrixSetDiag *node) final; + // loco::TensorShape visit(const luci::CircleMaximum *node) final; + // loco::TensorShape visit(const luci::CircleMaxPool2D *node) final; + // loco::TensorShape visit(const luci::CircleMean *node) final; + // loco::TensorShape visit(const luci::CircleMinimum *node) final; + // loco::TensorShape visit(const luci::CircleMirrorPad *node) final; + // loco::TensorShape visit(const luci::CircleNeg *node) final; + // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4 *node) final; + // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5 *node) final; + // loco::TensorShape visit(const luci::CircleNotEqual *node) final; + // loco::TensorShape visit(const luci::CirclePack *node) final; + // loco::TensorShape visit(const luci::CirclePad *node) final; + // loco::TensorShape visit(const luci::CirclePadV2 *node) final; + // loco::TensorShape visit(const luci::CirclePow *node) final; + // loco::TensorShape visit(const luci::CirclePRelu *node) final; + // loco::TensorShape visit(const luci::CircleRange *node) final; + // loco::TensorShape visit(const luci::CircleRank *node) final; + // loco::TensorShape visit(const luci::CircleMul *node) final; + // loco::TensorShape visit(const luci::CircleOneHot *node) final; + // loco::TensorShape visit(const luci::CircleReduceAny *node) final; + // loco::TensorShape visit(const luci::CircleReduceMax *node) final; + // loco::TensorShape visit(const luci::CircleReduceMin *node) final; + // loco::TensorShape visit(const luci::CircleReduceProd *node) final; + // loco::TensorShape visit(const luci::CircleRelu *node) final; + // loco::TensorShape visit(const luci::CircleRelu6 *node) final; + // loco::TensorShape visit(const luci::CircleReluN1To1 *node) final; + // loco::TensorShape visit(const luci::CircleReshape *node) final; + // loco::TensorShape visit(const luci::CircleResizeBilinear *node) final; + // loco::TensorShape visit(const luci::CircleResizeNearestNeighbor *node) final; + // loco::TensorShape visit(const luci::CircleReverseSequence *node) final; + // loco::TensorShape visit(const luci::CircleReverseV2 *node) final; + // loco::TensorShape visit(const luci::CircleRound *node) final; + // loco::TensorShape visit(const luci::CircleRsqrt *node) final; + // loco::TensorShape visit(const luci::CircleScatterNd *node) final; + // loco::TensorShape visit(const luci::CircleSegmentSum *node) final; + // loco::TensorShape visit(const luci::CircleSelect *node) final; + // loco::TensorShape visit(const luci::CircleSelectV2 *node) final; + // loco::TensorShape visit(const luci::CircleShape *node) final; + // loco::TensorShape visit(const luci::CircleSin *node) final; + // loco::TensorShape visit(const luci::CircleSlice *node) final; + // loco::TensorShape visit(const luci::CircleSoftmax *node) final; + // loco::TensorShape visit(const luci::CircleSpaceToBatchND *node) final; + // loco::TensorShape visit(const luci::CircleSpaceToDepth *node) final; + // loco::TensorShape visit(const luci::CircleSparseToDense *node) final; + // loco::TensorShape visit(const luci::CircleSplit *node) final; + // loco::TensorShape visit(const luci::CircleSplitV *node) final; + // loco::TensorShape visit(const luci::CircleSqrt *node) final; + // loco::TensorShape visit(const luci::CircleSquare *node) final; + // loco::TensorShape visit(const luci::CircleSquaredDifference *node) final; + // loco::TensorShape visit(const luci::CircleSqueeze *node) final; + // loco::TensorShape visit(const luci::CircleStridedSlice *node) final; + // loco::TensorShape visit(const luci::CircleSub *node) final; + // loco::TensorShape visit(const luci::CircleSum *node) final; + // loco::TensorShape visit(const luci::CircleTanh *node) final; + // loco::TensorShape visit(const luci::CircleTile *node) final; + // loco::TensorShape visit(const luci::CircleTopKV2 *node) final; + // loco::TensorShape visit(const luci::CircleTranspose *node) final; + // loco::TensorShape visit(const luci::CircleTransposeConv *node) final; + // loco::TensorShape visit(const luci::CircleUnidirectionalSequenceLSTM *node) final; + // loco::TensorShape visit(const luci::CircleUnique *node) final; + // loco::TensorShape visit(const luci::CircleUnpack *node) final; + // loco::TensorShape visit(const luci::CircleWhere *node) final; + // loco::TensorShape visit(const luci::CircleWhile *node) final; + // loco::TensorShape visit(const luci::CircleZerosLike *node) final; + + // Circle Only + // loco::TensorShape visit(const luci::CircleBCQFullyConnected *node) final; + // loco::TensorShape visit(const luci::CircleBCQGather *node) final; + // loco::TensorShape visit(const luci::CircleInstanceNorm *node) final; + + // Virtual + // loco::TensorShape visit(const luci::CircleInput *node) final; + // loco::TensorShape visit(const luci::CircleOutput *node) final; + // loco::TensorShape visit(const luci::CircleOutputDummy *node) final; + // loco::TensorShape visit(const luci::CircleOutputExclude *node) final; + // loco::TensorShape visit(const luci::CircleCustomOut *node) final; + // loco::TensorShape visit(const luci::CircleIfOut *node) final; + // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final; + // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final; + // loco::TensorShape visit(const luci::CircleSplitOut *node) final; + // loco::TensorShape visit(const luci::CircleSplitVOut *node) final; + // loco::TensorShape visit(const luci::CircleTopKV2Out *node) final; + // loco::TensorShape visit(const luci::CircleUniqueOut *node) final; + // loco::TensorShape visit(const luci::CircleUnpackOut *node) final; + // loco::TensorShape visit(const luci::CircleWhileOut *node) final; +}; + +} // namespace sinf + } // namespace luci #endif // __LUCI_CIRCLE_SHAPE_INFERENCE_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h new file mode 100644 index 0000000..dd6a5a4 --- /dev/null +++ b/compiler/luci/service/include/luci/Service/CircleShapeInferenceHelper.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__ +#define __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__ + +#include + +#include +#include + +namespace luci +{ +namespace sinf // Namespace for Shape Inference +{ + +// Return shape of circle node as loco::TensorShape +loco::TensorShape circle_shape(const luci::CircleNode *node); + +} // namespace sinf +} // namespace luci + +#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceRule.h b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInference.h similarity index 87% rename from compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceRule.h rename to compiler/luci/service/include/luci/Service/CircleShapeSignatureInference.h index 4d1d830..f7ea89b 100644 --- a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceRule.h +++ b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInference.h @@ -14,22 +14,26 @@ * limitations under the License. */ -#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__ -#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__ +#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__ +#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__ #include #include #include +#include namespace luci { -struct CircleShapeSignatureInferenceRule +namespace ssinf // namespace for Shape Signature Inference +{ + +struct Rule { bool infer(const luci::CircleNode *, ShapeSignature &) const; }; -class ShapeSignatureInferenceAlgorithm final : public luci::CircleNodeVisitor +class Algorithm final : public luci::CircleNodeVisitor { public: // TODO Remove this when visit function is implemented for all the operations. @@ -84,7 +88,7 @@ public: // ShapeSignature visit(const luci::CircleMatrixSetDiag *node) final; // ShapeSignature visit(const luci::CircleMaximum *node) final; // ShapeSignature visit(const luci::CircleMaxPool2D *node) final; - // ShapeSignature visit(const luci::CircleMean *node) final; + ShapeSignature visit(const luci::CircleMean *node) final; // ShapeSignature visit(const luci::CircleMinimum *node) final; // ShapeSignature visit(const luci::CircleMirrorPad *node) final; // ShapeSignature visit(const luci::CircleNeg *node) final; @@ -100,13 +104,13 @@ public: // ShapeSignature visit(const luci::CircleRank *node) final; // ShapeSignature visit(const luci::CircleMul *node) final; // ShapeSignature visit(const luci::CircleOneHot *node) final; - // ShapeSignature visit(const luci::CircleReduceAny *node) final; - // ShapeSignature visit(const luci::CircleReduceMax *node) final; - // ShapeSignature visit(const luci::CircleReduceMin *node) final; - // ShapeSignature visit(const luci::CircleReduceProd *node) final; - // ShapeSignature visit(const luci::CircleRelu *node) final; - // ShapeSignature visit(const luci::CircleRelu6 *node) final; - // ShapeSignature visit(const luci::CircleReluN1To1 *node) final; + ShapeSignature visit(const luci::CircleReduceAny *node) final; + ShapeSignature visit(const luci::CircleReduceMax *node) final; + ShapeSignature visit(const luci::CircleReduceMin *node) final; + ShapeSignature visit(const luci::CircleReduceProd *node) final; + ShapeSignature visit(const luci::CircleRelu *node) final; + ShapeSignature visit(const luci::CircleRelu6 *node) final; + ShapeSignature visit(const luci::CircleReluN1To1 *node) final; // ShapeSignature visit(const luci::CircleReshape *node) final; // ShapeSignature visit(const luci::CircleResizeBilinear *node) final; // ShapeSignature visit(const luci::CircleResizeNearestNeighbor *node) final; @@ -133,7 +137,7 @@ public: // ShapeSignature visit(const luci::CircleSqueeze *node) final; // ShapeSignature visit(const luci::CircleStridedSlice *node) final; // ShapeSignature visit(const luci::CircleSub *node) final; - // ShapeSignature visit(const luci::CircleSum *node) final; + ShapeSignature visit(const luci::CircleSum *node) final; // ShapeSignature visit(const luci::CircleTanh *node) final; // ShapeSignature visit(const luci::CircleTile *node) final; // ShapeSignature visit(const luci::CircleTopKV2 *node) final; @@ -152,10 +156,10 @@ public: // ShapeSignature visit(const luci::CircleInstanceNorm *node) final; // Virtual - // ShapeSignature visit(const luci::CircleInput *node) final; - // ShapeSignature visit(const luci::CircleOutput *node) final; - // ShapeSignature visit(const luci::CircleOutputDummy *node) final; - // ShapeSignature visit(const luci::CircleOutputExclude *node) final; + ShapeSignature visit(const luci::CircleInput *node) final; + ShapeSignature visit(const luci::CircleOutput *node) final; + ShapeSignature visit(const luci::CircleOutputDummy *node) final; + ShapeSignature visit(const luci::CircleOutputExclude *node) final; // ShapeSignature visit(const luci::CircleCustomOut *node) final; // ShapeSignature visit(const luci::CircleIfOut *node) final; // ShapeSignature visit(const luci::CircleNonMaxSuppressionV4Out *node) final; @@ -168,6 +172,8 @@ public: // ShapeSignature visit(const luci::CircleWhileOut *node) final; }; +} // namespace ssinf + } // namespace luci -#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__ +#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h new file mode 100644 index 0000000..fb5b3b3 --- /dev/null +++ b/compiler/luci/service/include/luci/Service/CircleShapeSignatureInferenceHelper.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__ +#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__ + +#include +#include + +namespace luci +{ + +namespace ssinf // Namespace for Shape Signature Inference +{ + +// Return empty signature if all of dimensions are known. +// If at least one of dimensions is unknown, return signature without change. +ShapeSignature legalized_signature(const luci::ShapeSignature &signature); + +// Return reduced input_signature with indices and keep_dims. +// - indices : reduction index +// - keep_dims : If true, rank is not changed. If false, rank is reduced along indices. +ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims); + +// Return signature of index-th argument of node. +ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index); + +} // namespace ssinf + +} // namespace luci + +#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInference.h b/compiler/luci/service/include/luci/Service/CircleTypeInference.h index ea7a3c5..3422148 100644 --- a/compiler/luci/service/include/luci/Service/CircleTypeInference.h +++ b/compiler/luci/service/include/luci/Service/CircleTypeInference.h @@ -21,6 +21,10 @@ #include +#include +#include +#include + namespace luci { @@ -37,6 +41,155 @@ struct TypeInference static circle::TensorType get(loco::Node *node); }; +namespace tinf // namespace for Type Inference +{ + +struct Rule +{ + bool infer(const luci::CircleNode *, loco::DataType &) const; +}; + +class Algorithm final : public luci::CircleNodeVisitor +{ +public: + // TODO Remove this when all of visit function is implemented + loco::DataType visit(const luci::CircleNode *node) final { return node->dtype(); } + + // loco::DataType visit(const luci::CircleAbs *node) final; + // loco::DataType visit(const luci::CircleAdd *node) final; + // loco::DataType visit(const luci::CircleAddN *node) final; + // loco::DataType visit(const luci::CircleArgMax *node) final; + // loco::DataType visit(const luci::CircleArgMin *node) final; + // loco::DataType visit(const luci::CircleAveragePool2D *node) final; + // loco::DataType visit(const luci::CircleBatchMatMul *node) final; + // loco::DataType visit(const luci::CircleBatchToSpaceND *node) final; + // loco::DataType visit(const luci::CircleCast *node) final; + // loco::DataType visit(const luci::CircleCeil *node) final; + // loco::DataType visit(const luci::CircleConcatenation *node) final; + // loco::DataType visit(const luci::CircleConst *node) final; + // loco::DataType visit(const luci::CircleConv2D *node) final; + // loco::DataType visit(const luci::CircleCos *node) final; + // loco::DataType visit(const luci::CircleCustom *node) final; + // loco::DataType visit(const luci::CircleDepthToSpace *node) final; + // loco::DataType visit(const luci::CircleDepthwiseConv2D *node) final; + // loco::DataType visit(const luci::CircleDequantize *node) final; + // loco::DataType visit(const luci::CircleDiv *node) final; + // loco::DataType visit(const luci::CircleElu *node) final; + // loco::DataType visit(const luci::CircleEqual *node) final; + // loco::DataType visit(const luci::CircleExp *node) final; + // loco::DataType visit(const luci::CircleExpandDims *node) final; + // loco::DataType visit(const luci::CircleFill *node) final; + // loco::DataType visit(const luci::CircleFloor *node) final; + // loco::DataType visit(const luci::CircleFloorDiv *node) final; + // loco::DataType visit(const luci::CircleFloorMod *node) final; + // loco::DataType visit(const luci::CircleFullyConnected *node) final; + // loco::DataType visit(const luci::CircleGather *node) final; + // loco::DataType visit(const luci::CircleGatherNd *node) final; + // loco::DataType visit(const luci::CircleGreater *node) final; + // loco::DataType visit(const luci::CircleGreaterEqual *node) final; + // loco::DataType visit(const luci::CircleIf *node) final; + // loco::DataType visit(const luci::CircleL2Normalize *node) final; + // loco::DataType visit(const luci::CircleL2Pool2D *node) final; + // loco::DataType visit(const luci::CircleLeakyRelu *node) final; + // loco::DataType visit(const luci::CircleLess *node) final; + // loco::DataType visit(const luci::CircleLessEqual *node) final; + // loco::DataType visit(const luci::CircleLocalResponseNormalization *node) final; + // loco::DataType visit(const luci::CircleLog *node) final; + // loco::DataType visit(const luci::CircleLogicalAnd *node) final; + // loco::DataType visit(const luci::CircleLogicalNot *node) final; + // loco::DataType visit(const luci::CircleLogicalOr *node) final; + // loco::DataType visit(const luci::CircleLogistic *node) final; + // loco::DataType visit(const luci::CircleLogSoftmax *node) final; + // loco::DataType visit(const luci::CircleMatrixDiag *node) final; + // loco::DataType visit(const luci::CircleMatrixSetDiag *node) final; + // loco::DataType visit(const luci::CircleMaximum *node) final; + // loco::DataType visit(const luci::CircleMaxPool2D *node) final; + // loco::DataType visit(const luci::CircleMean *node) final; + // loco::DataType visit(const luci::CircleMinimum *node) final; + // loco::DataType visit(const luci::CircleMirrorPad *node) final; + // loco::DataType visit(const luci::CircleNeg *node) final; + // loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final; + // loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final; + // loco::DataType visit(const luci::CircleNotEqual *node) final; + // loco::DataType visit(const luci::CirclePack *node) final; + // loco::DataType visit(const luci::CirclePad *node) final; + // loco::DataType visit(const luci::CirclePadV2 *node) final; + // loco::DataType visit(const luci::CirclePow *node) final; + // loco::DataType visit(const luci::CirclePRelu *node) final; + // loco::DataType visit(const luci::CircleRange *node) final; + // loco::DataType visit(const luci::CircleRank *node) final; + // loco::DataType visit(const luci::CircleMul *node) final; + // loco::DataType visit(const luci::CircleOneHot *node) final; + // loco::DataType visit(const luci::CircleReduceAny *node) final; + // loco::DataType visit(const luci::CircleReduceMax *node) final; + // loco::DataType visit(const luci::CircleReduceMin *node) final; + // loco::DataType visit(const luci::CircleReduceProd *node) final; + // loco::DataType visit(const luci::CircleRelu *node) final; + // loco::DataType visit(const luci::CircleRelu6 *node) final; + // loco::DataType visit(const luci::CircleReluN1To1 *node) final; + // loco::DataType visit(const luci::CircleReshape *node) final; + // loco::DataType visit(const luci::CircleResizeBilinear *node) final; + // loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final; + // loco::DataType visit(const luci::CircleReverseSequence *node) final; + // loco::DataType visit(const luci::CircleReverseV2 *node) final; + // loco::DataType visit(const luci::CircleRound *node) final; + // loco::DataType visit(const luci::CircleRsqrt *node) final; + // loco::DataType visit(const luci::CircleScatterNd *node) final; + // loco::DataType visit(const luci::CircleSegmentSum *node) final; + // loco::DataType visit(const luci::CircleSelect *node) final; + // loco::DataType visit(const luci::CircleSelectV2 *node) final; + // loco::DataType visit(const luci::CircleShape *node) final; + // loco::DataType visit(const luci::CircleSin *node) final; + // loco::DataType visit(const luci::CircleSlice *node) final; + // loco::DataType visit(const luci::CircleSoftmax *node) final; + // loco::DataType visit(const luci::CircleSpaceToBatchND *node) final; + // loco::DataType visit(const luci::CircleSpaceToDepth *node) final; + // loco::DataType visit(const luci::CircleSparseToDense *node) final; + // loco::DataType visit(const luci::CircleSplit *node) final; + // loco::DataType visit(const luci::CircleSplitV *node) final; + // loco::DataType visit(const luci::CircleSqrt *node) final; + // loco::DataType visit(const luci::CircleSquare *node) final; + // loco::DataType visit(const luci::CircleSquaredDifference *node) final; + // loco::DataType visit(const luci::CircleSqueeze *node) final; + // loco::DataType visit(const luci::CircleStridedSlice *node) final; + // loco::DataType visit(const luci::CircleSub *node) final; + // loco::DataType visit(const luci::CircleSum *node) final; + // loco::DataType visit(const luci::CircleTanh *node) final; + // loco::DataType visit(const luci::CircleTile *node) final; + // loco::DataType visit(const luci::CircleTopKV2 *node) final; + // loco::DataType visit(const luci::CircleTranspose *node) final; + // loco::DataType visit(const luci::CircleTransposeConv *node) final; + // loco::DataType visit(const luci::CircleUnidirectionalSequenceLSTM *node) final; + // loco::DataType visit(const luci::CircleUnique *node) final; + // loco::DataType visit(const luci::CircleUnpack *node) final; + // loco::DataType visit(const luci::CircleWhere *node) final; + // loco::DataType visit(const luci::CircleWhile *node) final; + // loco::DataType visit(const luci::CircleZerosLike *node) final; + + // Circle Only + // loco::DataType visit(const luci::CircleBCQFullyConnected *node) final; + // loco::DataType visit(const luci::CircleBCQGather *node) final; + // loco::DataType visit(const luci::CircleInstanceNorm *node) final; + + // Virtual + // loco::DataType visit(const luci::CircleInput *node) final; + // loco::DataType visit(const luci::CircleOutput *node) final; + // loco::DataType visit(const luci::CircleOutputDummy *node) final; + // loco::DataType visit(const luci::CircleOutputExclude *node) final; + // loco::DataType visit(const luci::CircleCustomOut *node) final; + // loco::DataType visit(const luci::CircleIfOut *node) final; + // loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final; + // loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final; + // loco::DataType visit(const luci::CircleSplitOut *node) final; + // loco::DataType visit(const luci::CircleSplitVOut *node) final; + // loco::DataType visit(const luci::CircleTopKV2Out *node) final; + // loco::DataType visit(const luci::CircleUniqueOut *node) final; + // loco::DataType visit(const luci::CircleUnpackOut *node) final; + // loco::DataType visit(const luci::CircleWhileOut *node) final; +}; + +} // namespace tinf + } // namespace luci #endif // __LUCI_CIRCLE_TYPE_INFERENCE_H__ diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h b/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h new file mode 100644 index 0000000..296f993 --- /dev/null +++ b/compiler/luci/service/include/luci/Service/CircleTypeInferenceHelper.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__ +#define __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__ + +#include + +#include + +namespace luci +{ +namespace tinf // Namespace for Type Inference +{ + +// Helper function will be added + +} // namespace tinf +} // namespace luci + +#endif // __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__ diff --git a/compiler/luci/service/include/luci/Service/ShapeDescription.h b/compiler/luci/service/include/luci/Service/ShapeDescription.h index 949cce5..4d92be1 100644 --- a/compiler/luci/service/include/luci/Service/ShapeDescription.h +++ b/compiler/luci/service/include/luci/Service/ShapeDescription.h @@ -20,6 +20,8 @@ #include #include +#include + #include #include @@ -33,6 +35,7 @@ struct ShapeDescription }; // TODO remove these when CircleDialect is fully functioal +ShapeDescription to_shape_description(const luci::CircleNode *node); ShapeDescription to_shape_description(const loco::TensorShape &shape); ShapeDescription to_shape_description(const loco::FeatureShape &shape); ShapeDescription to_shape_description(const loco::FilterShape &shape); diff --git a/compiler/luci/service/src/CircleShapeInference.cpp b/compiler/luci/service/src/CircleShapeInference.cpp index 0732849..db8ffd8 100644 --- a/compiler/luci/service/src/CircleShapeInference.cpp +++ b/compiler/luci/service/src/CircleShapeInference.cpp @@ -20,7 +20,10 @@ #include #include +#include + #include +#include namespace luci { @@ -32,3 +35,60 @@ ShapeDescription ShapeInference::get(loco::Node *node) } } // namespace luci + +namespace +{ + +std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape) +{ + os << "["; + for (uint32_t r = 0; r < tensor_shape.rank(); ++r) + { + if (r) + os << ","; + os << tensor_shape.dim(r).value(); + } + os << "]"; + return os; +} + +bool inputs_shape_ready(const luci::CircleNode *node) +{ + for (uint32_t arity = 0; arity < node->arity(); ++arity) + { + auto node_input = loco::must_cast(node->arg(arity)); + if (node_input->shape_status() == luci::ShapeStatus::UNDEFINED) + return false; + } + + return true; +} + +} // namespace + +namespace luci +{ +namespace sinf +{ + +bool Rule::infer(const luci::CircleNode *circle_node, loco::TensorShape &shape) const +{ + LOGGER(l); + VERBOSE(l, 1) << "[CircleShapeInference] " << circle_node->name(); + VERBOSE(l, 1) << " before: " << circle_shape(circle_node); + + if (!inputs_shape_ready(circle_node)) + { + VERBOSE(l, 1) << " after: Some inputs are not ready for inference"; + return false; + } + + Algorithm alg; + shape = circle_node->accept(&alg); + VERBOSE(l, 1) << " after: " << shape; + + return true; +} + +} // namespace ssinf +} // namespace luci diff --git a/compiler/luci/service/src/CircleShapeInferenceHelper.cpp b/compiler/luci/service/src/CircleShapeInferenceHelper.cpp new file mode 100644 index 0000000..f7eb6c3 --- /dev/null +++ b/compiler/luci/service/src/CircleShapeInferenceHelper.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleShapeInferenceHelper.h" + +namespace luci +{ +namespace sinf +{ + +loco::TensorShape circle_shape(const luci::CircleNode *node) +{ + loco::TensorShape shape; + shape.rank(node->rank()); + for (uint32_t r = 0; r < node->rank(); ++r) + shape.dim(r) = loco::Dimension(node->dim(r).value()); + return shape; +} + +} // namespace sinf +} // namespace luci diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp index a55f50b..38ff619 100644 --- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp @@ -102,7 +102,7 @@ private: }; /** - * @breif Expand shape x and y to same rank by align right and filling with 1 + * @brief Expand shape x and y to same rank by align right and filling with 1 */ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) { @@ -122,7 +122,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) } /** - * @breif Returns shape of expanded dimension of input x and y having same rank + * @brief Returns shape of expanded dimension of input x and y having same rank */ loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y) { diff --git a/compiler/luci/service/src/CircleShapeSignatureInferenceRule.cpp b/compiler/luci/service/src/CircleShapeSignatureInference.cpp similarity index 83% rename from compiler/luci/service/src/CircleShapeSignatureInferenceRule.cpp rename to compiler/luci/service/src/CircleShapeSignatureInference.cpp index dc7df3e..1ccaa19 100644 --- a/compiler/luci/service/src/CircleShapeSignatureInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeSignatureInference.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "luci/Service/CircleShapeSignatureInferenceRule.h" +#include "luci/Service/CircleShapeSignatureInference.h" #include @@ -39,14 +39,16 @@ std::ostream &operator<<(std::ostream &os, const luci::ShapeSignature &shape_sig namespace luci { -bool CircleShapeSignatureInferenceRule::infer(const luci::CircleNode *circle_node, - ShapeSignature &shape_signature) const +namespace ssinf +{ + +bool Rule::infer(const luci::CircleNode *circle_node, ShapeSignature &shape_signature) const { LOGGER(l); // There is nothing to check before ShapeSignatureInference. - ShapeSignatureInferenceAlgorithm alg; + Algorithm alg; shape_signature = circle_node->accept(&alg); @@ -57,4 +59,6 @@ bool CircleShapeSignatureInferenceRule::infer(const luci::CircleNode *circle_nod return true; } +} // namespace ssinf + } // namespace luci diff --git a/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp b/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp new file mode 100644 index 0000000..d7d1a24 --- /dev/null +++ b/compiler/luci/service/src/CircleShapeSignatureInferenceHelper.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleShapeSignatureInferenceHelper.h" + +#include + +#include + +#include + +namespace luci +{ + +namespace ssinf +{ + +luci::ShapeSignature legalized_signature(const luci::ShapeSignature &signature) +{ + // If shape signature has at least one -1, it is not static. + for (uint32_t i = 0; i < signature.rank(); ++i) + if (signature.dim(i) == -1) + return signature; + + // If all dimensions are static, return empty shape signature. + return luci::ShapeSignature(); +} + +ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims) +{ + LOGGER(l); + + ShapeSignature input_signature; + ShapeSignature output_signature; + + auto circle_node = loco::must_cast(node); + if (circle_node->shape_signature().rank() > 0) + input_signature = circle_node->shape_signature(); + else + { + input_signature.rank(circle_node->rank()); + for (uint32_t i = 0; i < circle_node->rank(); ++i) + input_signature.dim(i) = circle_node->dim(i).value(); + } + + // If input rank is 0, it means that one of following case is occurred. + // - Input is scalar : result is always scalar + // - Input shape signature is not inferenced : cannot infer output shape signauture + // Therefore, when input signature rank is 0, always return empty signature. + if (input_signature.rank() == 0) + return output_signature; + + // When reduction_indices is not constant + auto reduction_indices = dynamic_cast(indices); + if (reduction_indices == nullptr) + { + if (keep_dims) + { + // If keep_dims is true, rank is not changed. + output_signature.rank(input_signature.rank()); + for (uint32_t i = 0; i < output_signature.rank(); ++i) + output_signature.dim(i) = -1; + } + else + { + // There is no way to inference for this case. + // Do nothing to return empty signature. + INFO(l) << "[CircleShapeSignatureInferenceHelper] " << circle_node->name() << std::endl; + INFO(l) << " reduced_signature : cannot infer because of non-constant node" << std::endl; + } + + return output_signature; + } + + std::vector reduction_values; + if (reduction_indices->dtype() == loco::DataType::S32) + { + auto reduction_size = reduction_indices->size(); + for (uint32_t i = 0; i < reduction_size; ++i) + { + int32_t axis = reduction_indices->at(i); + if (axis < 0) + axis += input_signature.rank(); + + if (!(0 <= axis && axis < static_cast(input_signature.rank()))) + INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis)); + + reduction_values.push_back(axis); + } + } + else if (reduction_indices->dtype() == loco::DataType::S64) + { + auto reduction_size = reduction_indices->size(); + for (uint32_t i = 0; i < reduction_size; ++i) + { + int32_t axis = static_cast(reduction_indices->at(i)); + if (axis < 0) + axis += input_signature.rank(); + + if (!(0 <= axis && axis < static_cast(input_signature.rank()))) + INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis)); + + reduction_values.push_back(axis); + } + } + else + { + INTERNAL_EXN("Wrong reduction axis type, Only INT32, INT64 supported."); + } + + if (keep_dims) + { + output_signature.rank(input_signature.rank()); + for (uint32_t i = 0; i < input_signature.rank(); ++i) + output_signature.dim(i) = input_signature.dim(i); + for (uint32_t i = 0; i < reduction_values.size(); ++i) + output_signature.dim(reduction_values.at(i)) = 1; + } + else + { + std::vector check_reduce(input_signature.rank(), false); + for (uint32_t i = 0; i < reduction_values.size(); ++i) + check_reduce.at(reduction_values.at(i)) = true; + + uint32_t reduce_cnt = 0; + for (uint32_t i = 0; i < check_reduce.size(); ++i) + if (check_reduce.at(i)) + ++reduce_cnt; + + output_signature.rank(input_signature.rank() - reduce_cnt); + for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i) + if (check_reduce.at(i) == false) + output_signature.dim(j++) = input_signature.dim(i); + } + + return output_signature; +} + +ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index) +{ + auto circle_input = loco::must_cast(node->arg(index)); + return circle_input->shape_signature(); +} + +} // namespace ssinf + +} // namespace luci diff --git a/compiler/luci/service/src/CircleTypeInference.cpp b/compiler/luci/service/src/CircleTypeInference.cpp index aa8524a..b4755b5 100644 --- a/compiler/luci/service/src/CircleTypeInference.cpp +++ b/compiler/luci/service/src/CircleTypeInference.cpp @@ -16,6 +16,8 @@ #include "luci/Service/CircleTypeInference.h" +#include + #include #include @@ -70,3 +72,47 @@ circle::TensorType TypeInference::get(loco::Node *node) } } // namespace luci + +namespace +{ + +bool inputs_dtype_ready(const luci::CircleNode *node) +{ + for (uint32_t arity = 0; arity < node->arity(); ++arity) + { + if (node->dtype() == loco::DataType::Unknown) + return false; + } + + return true; +} + +} // namespace + +namespace luci +{ +namespace tinf +{ + +bool Rule::infer(const luci::CircleNode *circle_node, loco::DataType &dtype) const +{ + LOGGER(l); + VERBOSE(l, 1) << "[CircleTypeInference] " << circle_node->name(); + VERBOSE(l, 1) << " before: " << static_cast(circle_node->dtype()); + + if (!inputs_dtype_ready(circle_node)) + { + VERBOSE(l, 1) << " after: Some inputs are not ready for inference"; + return false; + } + + Algorithm alg; + dtype = circle_node->accept(&alg); + + VERBOSE(l, 1) << " after: " << static_cast(dtype); + + return true; +} + +} // namespace tinf +} // namespace luci diff --git a/compiler/luci/service/src/CircleTypeInferenceHelper.cpp b/compiler/luci/service/src/CircleTypeInferenceHelper.cpp new file mode 100644 index 0000000..75cd9f7 --- /dev/null +++ b/compiler/luci/service/src/CircleTypeInferenceHelper.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleTypeInferenceHelper.h" + +namespace luci +{ +namespace tinf +{ + +// Helper function will be added + +} // namespace tinf +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleInput.cpp b/compiler/luci/service/src/Nodes/CircleInput.cpp new file mode 100644 index 0000000..24eab7b --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleInput.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleInput *node) +{ + return node->shape_signature(); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleMean.cpp b/compiler/luci/service/src/Nodes/CircleMean.cpp new file mode 100644 index 0000000..a787136 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleMean.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleMean *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleOutput.cpp b/compiler/luci/service/src/Nodes/CircleOutput.cpp new file mode 100644 index 0000000..d4c8da2 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleOutput.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutput *node) +{ + return input_arg_signature(node, 0); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp new file mode 100644 index 0000000..e0f13c4 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleOutputDummy.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputDummy *) { return ShapeSignature(); } + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp new file mode 100644 index 0000000..75bbbb3 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleOutputExclude.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputExclude *) +{ + return ShapeSignature(); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReduceAny.cpp b/compiler/luci/service/src/Nodes/CircleReduceAny.cpp new file mode 100644 index 0000000..27da814 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReduceAny.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceAny *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReduceMax.cpp b/compiler/luci/service/src/Nodes/CircleReduceMax.cpp new file mode 100644 index 0000000..48d9cb9 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReduceMax.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMax *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReduceMin.cpp b/compiler/luci/service/src/Nodes/CircleReduceMin.cpp new file mode 100644 index 0000000..9a99971 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReduceMin.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMin *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReduceProd.cpp b/compiler/luci/service/src/Nodes/CircleReduceProd.cpp new file mode 100644 index 0000000..a9d381a --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReduceProd.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceProd *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleRelu.cpp b/compiler/luci/service/src/Nodes/CircleRelu.cpp new file mode 100644 index 0000000..a7a7f6f --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleRelu.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu *node) +{ + return input_arg_signature(node, 0); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleRelu6.cpp b/compiler/luci/service/src/Nodes/CircleRelu6.cpp new file mode 100644 index 0000000..92a596d --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleRelu6.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu6 *node) +{ + return input_arg_signature(node, 0); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp new file mode 100644 index 0000000..1e8d997 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleReluN1To1.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleReluN1To1 *node) +{ + return input_arg_signature(node, 0); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleSum.cpp b/compiler/luci/service/src/Nodes/CircleSum.cpp new file mode 100644 index 0000000..9ef90e8 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleSum.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace luci +{ + +ShapeSignature ssinf::Algorithm::visit(const luci::CircleSum *node) +{ + return legalized_signature( + reduced_signature(node->input(), node->reduction_indices(), node->keep_dims())); +} + +} // namespace luci diff --git a/compiler/luci/service/src/ShapeDescription.cpp b/compiler/luci/service/src/ShapeDescription.cpp index cbc302f..01a638f 100644 --- a/compiler/luci/service/src/ShapeDescription.cpp +++ b/compiler/luci/service/src/ShapeDescription.cpp @@ -23,6 +23,19 @@ namespace luci { +ShapeDescription to_shape_description(const luci::CircleNode *circle_node) +{ + ShapeDescription res; + + res._rank_known = true; + + res._dims.resize(circle_node->rank()); + for (uint32_t i = 0; i < circle_node->rank(); ++i) + res._dims.at(i) = circle_node->dim(i).value(); + + return res; +} + ShapeDescription to_shape_description(const loco::TensorShape &shape) { ShapeDescription res; diff --git a/compiler/luci/service/src/Validate.cpp b/compiler/luci/service/src/Validate.cpp index d224fd1..3f732b6 100644 --- a/compiler/luci/service/src/Validate.cpp +++ b/compiler/luci/service/src/Validate.cpp @@ -42,6 +42,19 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape return os; } +std::ostream &operator<<(std::ostream &os, const luci::CircleNode *circle_node) +{ + os << "["; + for (uint32_t r = 0; r < circle_node->rank(); ++r) + { + if (r) + os << ","; + os << circle_node->dim(r).value(); + } + os << "]"; + return os; +} + /** * @brief returns a node that is CircleOutput with index is out_index in nodes */ @@ -80,23 +93,28 @@ bool validate_shape_dtype(loco::Graph *g) if (dynamic_cast(circle_node)) continue; - assert(loco::shape_known(circle_node)); + assert(circle_node->shape_status() != luci::ShapeStatus::UNDEFINED); // check if output node shape is same as graph output shape - auto co_tensor_shape = loco::shape_get(circle_node).as(); auto go_tensor_shape = graph_out->shape(); assert(go_tensor_shape); - if (!(co_tensor_shape == *go_tensor_shape)) + + bool is_shape_valid = (circle_node->rank() == go_tensor_shape->rank()); + for (uint32_t i = 0; is_shape_valid && i < circle_node->rank(); ++i) + if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value()) + is_shape_valid = false; + + if (is_shape_valid == false) { INFO(l) << "[luci] Shape for output #" << out_index << " not same " << std::endl; - INFO(l) << "[luci] " << circle_node->name() << " " << co_tensor_shape << " vs " + INFO(l) << "[luci] " << circle_node->name() << " " << circle_node << " vs " << *go_tensor_shape << std::endl; return false; } // check if data type match - assert(loco::dtype_known(circle_node)); - if (graph_out->dtype() != loco::dtype_get(circle_node)) + assert(circle_node->dtype() != loco::DataType::Unknown); + if (graph_out->dtype() != circle_node->dtype()) { INFO(l) << "[luci] Type for output #" << out_index << " not same " << std::endl; return false; @@ -106,6 +124,55 @@ bool validate_shape_dtype(loco::Graph *g) return true; } +bool validate_shape_signature(loco::Graph *g) +{ + LOGGER(l); + + for (auto node : loco::postorder_traversal(loco::output_nodes(g))) + { + auto circle_node = loco::must_cast(node); + const auto shape_signature = circle_node->shape_signature(); + + if (shape_signature.rank() == 0) + continue; + + // Rank of shape and shape signature should be same + if (circle_node->rank() != shape_signature.rank()) + { + INFO(l) << "[luci] Rank of shape signature for " << circle_node->name() << " do not match" + << std::endl; + return false; + } + + bool has_unknown = false; + + // If shape siganture is not -1, dimension value should be same + for (uint32_t d = 0; d < shape_signature.rank(); ++d) + { + if (shape_signature.dim(d) != -1 && + shape_signature.dim(d) != (int32_t)(circle_node->dim(d).value())) + { + INFO(l) << "[luci] Dimension " << d << "of shape signature for " << circle_node->name() + << " do not match" << std::endl; + return false; + } + + if (shape_signature.dim(d) == -1) + has_unknown = true; + } + + // Shape signature should have at least one -1 value. + if (!has_unknown) + { + INFO(l) << "[luci] Shape signature in " << circle_node->name() + << " do not have unknown dimension" << std::endl; + return false; + } + } + + return true; +} + } // namespace namespace luci @@ -119,6 +186,9 @@ bool validate(loco::Graph *g) if (!validate_shape_dtype(g)) return false; + if (!validate_shape_signature(g)) + return false; + // TODO add more validation return true; diff --git a/compiler/luci/tester/src/ReadTester.cpp b/compiler/luci/tester/src/ReadTester.cpp index a1aead1..f270a23 100644 --- a/compiler/luci/tester/src/ReadTester.cpp +++ b/compiler/luci/tester/src/ReadTester.cpp @@ -21,6 +21,9 @@ #include #include +// Following passes will be removed after refactoring is finished +#include + #include #include #include @@ -95,6 +98,12 @@ int entry(int argc, char **argv) while (pass.run(graph) == true) ; } + { + // This pass will be removed after refactoring is finished + luci::MigrateLegacyShapeDtypePass pass; + while (pass.run(graph) == true) + ; + } if (!luci::validate(graph)) return 255; diff --git a/compiler/luci/tester/src/WriteTester.cpp b/compiler/luci/tester/src/WriteTester.cpp index aa7085c..9a6e8de 100644 --- a/compiler/luci/tester/src/WriteTester.cpp +++ b/compiler/luci/tester/src/WriteTester.cpp @@ -23,6 +23,9 @@ #include #include +// Following passes will be removed after refactoring is finished +#include + #include #include #include @@ -139,6 +142,12 @@ int entry(int argc, char **argv) while (pass.run(graph) == true) ; } + { + // This pass will be removed after refactoring is finished + luci::MigrateLegacyShapeDtypePass pass; + while (pass.run(graph) == true) + ; + } if (!luci::validate(graph)) return 255; diff --git a/compiler/moco/support/src/TFShapeInferenceHelper.cpp b/compiler/moco/support/src/TFShapeInferenceHelper.cpp index 13e514a..605fb9c 100644 --- a/compiler/moco/support/src/TFShapeInferenceHelper.cpp +++ b/compiler/moco/support/src/TFShapeInferenceHelper.cpp @@ -66,7 +66,7 @@ private: }; /** - * @breif Expand shape x and y to same rank by align right and filling with 1 + * @brief Expand shape x and y to same rank by align right and filling with 1 */ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) { @@ -86,7 +86,7 @@ void expand_rank(loco::TensorShape &x, loco::TensorShape &y) } /** - * @breif Returns shape of expanded dimension of input x and y having same rank + * @brief Returns shape of expanded dimension of input x and y having same rank */ loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y) { diff --git a/compiler/nnc/include/Definitions.h.in b/compiler/nnc/include/Definitions.h.in index 070cdd2..bd86429 100644 --- a/compiler/nnc/include/Definitions.h.in +++ b/compiler/nnc/include/Definitions.h.in @@ -7,12 +7,12 @@ */ /** - * @breif absolute path to installation directory of *nnc* project + * @brief absolute path to installation directory of *nnc* project */ #define NNC_ROOT_PATH "@NNC_INSTALL_PATH@" /** - * @breif absolute path to directory contains libraries + * @brief absolute path to directory contains libraries */ #define NNC_LIB_PATH "@NNC_INSTALL_LIB_PATH@" diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt index 62a4978..d4e3269 100644 --- a/compiler/one-cmds/how-to-use-one-commands.txt +++ b/compiler/one-cmds/how-to-use-one-commands.txt @@ -161,6 +161,7 @@ Current transformation options are - make_batchnorm_gamma_positive: This makes negative gamma of batch normalization into a small positive value (1e-10). Note that this pass can change the execution result of the model. So, use it only when the impact is known to be acceptable. +- replace_cw_mul_add_with_depthwise_conv: This will replace channel-wise Mul/Add with DepthwiseConv2D. - resolve_customop_add: This will convert Custom(Add) to normal Add operator - resolve_customop_batchmatmul: This will convert Custom(BatchMatMul) to normal BatchMatMul operator diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen index f2d8230..fbe3d52 100644 --- a/compiler/one-cmds/one-codegen +++ b/compiler/one-cmds/one-codegen @@ -87,24 +87,19 @@ def main(): # verify arguments _verify_arg(parser, args) - # get file path to log + # make a command to run given backend driver dir_path = os.path.dirname(os.path.realpath(__file__)) - logfile_path = os.path.realpath(args.output_path) + '.log' - - with open(logfile_path, 'wb') as f: - # make a command to run given backend driver - codegen_path = os.path.join(dir_path, getattr(args, 'backend') + '-compile') - codegen_cmd = [codegen_path] + unknown_args - - f.write((' '.join(codegen_cmd) + '\n').encode()) - - # run backend driver - with subprocess.Popen( - codegen_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - bufsize=1) as p: - for line in p.stdout: - sys.stdout.buffer.write(line) - f.write(line) + codegen_path = os.path.join(dir_path, getattr(args, 'backend') + '-compile') + codegen_cmd = [codegen_path] + unknown_args + if _utils._is_valid_attr(args, 'command'): + codegen_cmd += getattr(args, 'command').split() + + # run backend driver + with subprocess.Popen( + codegen_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + bufsize=1) as p: + for line in p.stdout: + sys.stdout.buffer.write(line) if __name__ == '__main__': diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq index 5ea1f57..50f5879 100644 --- a/compiler/one-cmds/one-import-bcq +++ b/compiler/one-cmds/one-import-bcq @@ -43,13 +43,13 @@ def _get_parser(): converter_version.add_argument( '--v1', action='store_const', - dest='converter_version', + dest='converter_version_cmd', const='--v1', help='use TensorFlow Lite Converter 1.x') converter_version.add_argument( '--v2', action='store_const', - dest='converter_version', + dest='converter_version_cmd', const='--v2', help='use TensorFlow Lite Converter 2.x') diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf index 49009d3..3a7c69a 100644 --- a/compiler/one-cmds/one-import-tf +++ b/compiler/one-cmds/one-import-tf @@ -52,8 +52,6 @@ def _get_parser(): const='--v2', help='use TensorFlow Lite Converter 2.x') - #converter_version.set_defaults(converter_version='--v1') - parser.add_argument('--converter_version', type=str, help=argparse.SUPPRESS) # input model format diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize index 4c5f109..f03bb8d 100644 --- a/compiler/one-cmds/one-optimize +++ b/compiler/one-cmds/one-optimize @@ -73,6 +73,10 @@ def _get_parser(): circle2circle_group.add_argument( '--fuse_instnorm', action='store_true', help='fuse ops to InstanceNorm operator') circle2circle_group.add_argument( + '--replace_cw_mul_add_with_depthwise_conv', + action='store_true', + help='replace channel-wise Mul/Add with DepthwiseConv2D') + circle2circle_group.add_argument( '--resolve_customop_add', action='store_true', help='convert Custom(Add) op to Add op') diff --git a/compiler/one-cmds/tests/one-build_001.cfg b/compiler/one-cmds/tests/one-build_001.cfg index 8524bbd..b022ba7 100644 --- a/compiler/one-cmds/tests/one-build_001.cfg +++ b/compiler/one-cmds/tests/one-build_001.cfg @@ -13,7 +13,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-build_002.cfg b/compiler/one-cmds/tests/one-build_002.cfg index 1830776..bbf0915 100644 --- a/compiler/one-cmds/tests/one-build_002.cfg +++ b/compiler/one-cmds/tests/one-build_002.cfg @@ -13,7 +13,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-build_neg_002.cfg b/compiler/one-cmds/tests/one-build_neg_002.cfg index 360c601..99db966 100644 --- a/compiler/one-cmds/tests/one-build_neg_002.cfg +++ b/compiler/one-cmds/tests/one-build_neg_002.cfg @@ -13,7 +13,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-build_neg_003.cfg b/compiler/one-cmds/tests/one-build_neg_003.cfg index 91e7875..fa027cb 100644 --- a/compiler/one-cmds/tests/one-build_neg_003.cfg +++ b/compiler/one-cmds/tests/one-build_neg_003.cfg @@ -4,7 +4,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-build_neg_004.cfg b/compiler/one-cmds/tests/one-build_neg_004.cfg index 4d312c4..571077b 100644 --- a/compiler/one-cmds/tests/one-build_neg_004.cfg +++ b/compiler/one-cmds/tests/one-build_neg_004.cfg @@ -13,7 +13,7 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 [one-optimize] input_path=inception_v3.circle diff --git a/compiler/one-cmds/tests/one-import_002.cfg b/compiler/one-cmds/tests/one-import_002.cfg index 9a90abe..8d6ae2c 100644 --- a/compiler/one-cmds/tests/one-import_002.cfg +++ b/compiler/one-cmds/tests/one-import_002.cfg @@ -13,4 +13,4 @@ output_path=inception_v3.circle input_arrays=input input_shapes=1,299,299,3 output_arrays=InceptionV3/Predictions/Reshape_1 -v2=True +converter_version=v2 diff --git a/compiler/one-cmds/tests/one-import_003.cfg b/compiler/one-cmds/tests/one-import_003.cfg new file mode 100644 index 0000000..b679ebd --- /dev/null +++ b/compiler/one-cmds/tests/one-import_003.cfg @@ -0,0 +1,13 @@ +[one-build] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +model_format=saved_model +input_path=test_saved_model +output_path=test_saved_model.circle diff --git a/compiler/one-cmds/tests/one-import_003.test b/compiler/one-cmds/tests/one-import_003.test new file mode 100644 index 0000000..6093f14 --- /dev/null +++ b/compiler/one-cmds/tests/one-import_003.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# import of TF 2.x saved model + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="one-import_003.cfg" +outputfile="test_saved_model.circle" + +rm -f ${outputfile} + +# run test +one-import tf -C ${configfile} > /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-import_004.cfg b/compiler/one-cmds/tests/one-import_004.cfg new file mode 100644 index 0000000..d28c8df --- /dev/null +++ b/compiler/one-cmds/tests/one-import_004.cfg @@ -0,0 +1,13 @@ +[one-build] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +model_format=keras_model +input_path=test_keras_model.h5 +output_path=test_keras_model.circle diff --git a/compiler/one-cmds/tests/one-import_004.test b/compiler/one-cmds/tests/one-import_004.test new file mode 100644 index 0000000..9d10c43 --- /dev/null +++ b/compiler/one-cmds/tests/one-import_004.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# import of TF 2.x keras model + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="one-import_004.cfg" +outputfile="test_keras_model.circle" + +rm -f ${outputfile} + +# run test +one-import tf -C ${configfile} > /dev/null + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh index cb1067e..bc3d65d 100644 --- a/compiler/one-cmds/tests/prepare_test_materials.sh +++ b/compiler/one-cmds/tests/prepare_test_materials.sh @@ -63,6 +63,20 @@ if [[ ! -s "inception_v3_test_data.h5" ]]; then --output_path inception_v3_test_data.h5 fi +if [[ ! -d "test_saved_model" ]]; then + rm -rf test_saved_model.zip + wget https://github.com/Samsung/ONE/files/5516226/test_saved_model.zip + unzip test_saved_model.zip + # https://github.com/Samsung/ONE/issues/4268#issuecomment-724578237 +fi + +if [[ ! -s "test_keras_model.h5" ]]; then + rm -rf test_keras_model.zip + wget https://github.com/Samsung/ONE/files/5520777/test_keras_model.zip + unzip test_keras_model.zip + # https://github.com/Samsung/ONE/issues/4268#issuecomment-725025805 +fi + # prepare 'inception_v3.circle' file used for quantization test inputfile="./inception_v3.pb" outputfile="./inception_v3.circle" diff --git a/compiler/oops/include/oops/InternalExn.h b/compiler/oops/include/oops/InternalExn.h index 0e11085..e14332b 100644 --- a/compiler/oops/include/oops/InternalExn.h +++ b/compiler/oops/include/oops/InternalExn.h @@ -40,20 +40,20 @@ class InternalExn : public std::exception { public: InternalExn(const char *filename, const int line, const std::string &msg) - : _filename(filename), _line(line), _msg(msg) + : _filename(filename), _line(to_uint32(line)), _msg(msg) { construct_full_msg(); } explicit InternalExn(const char *filename, const int line, const std::string &msg, uint32_t val) - : _filename(filename), _line(line), _msg(msg + ": " + std::to_string(val)) + : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + std::to_string(val)) { construct_full_msg(); } explicit InternalExn(const char *filename, const int line, const std::string &msg, const std::string &val) - : _filename(filename), _line(line), _msg(msg + ": " + val) + : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + val) { construct_full_msg(); } diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt index 73b9ead..80661e5 100644 --- a/compiler/pota-quantization-value-test/CMakeLists.txt +++ b/compiler/pota-quantization-value-test/CMakeLists.txt @@ -1,6 +1,12 @@ unset(QUANTIZATION_VALUE_TEST) unset(QUANTIZATION_VALUE_TEST_WITH_PARAM) +nnas_find_package(FlatBuffers QUIET) +if(NOT FlatBuffers_FOUND) + message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)") + return() +endif(NOT FlatBuffers_FOUND) + macro(addTest NAME GRANULARITY DTYPE) list(APPEND QUANTIZATION_VALUE_TEST ${NAME}) list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE}) @@ -14,8 +20,12 @@ include("test.local.lst" OPTIONAL) unset(TEST_DEPS) get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) +get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR) + +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py" + "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY) -set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_1_13_2") +set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_3_0") ### ### Generate test.config @@ -35,7 +45,21 @@ add_custom_command( COMMENT "Generate test configuration" ) -list(APPEND TEST_DEPS "${TEST_CONFIG}") +### +### Generate python interface for circle schema +### +set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle") + +add_custom_command( + OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR} + COMMAND ${CMAKE_COMMAND} -E remove_directory "${CIRCLE_SCHEMA_PYTHON_DIR}" + COMMAND "$" --python + -o "${CMAKE_CURRENT_BINARY_DIR}" "${SCHEMA_BIN_PATH}/schema.fbs" + DEPENDS flatbuffers::flatc + COMMENT "Generate python interface for circle schema" +) + +list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CIRCLE_SCHEMA_PYTHON_DIR}") # This enforces CMake to generate all the dependencies during "build" phase add_custom_target(pota_quantization_value_test_deps ALL DEPENDS ${TEST_DEPS}) diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json new file mode 100644 index 0000000..fa2cdae --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/beta.json @@ -0,0 +1,20 @@ +{ + "weights": [ + 1, + 0, + 1, + 1 + ], + "scale": [ + 0.7023000121116638, + 0.3091999888420105, + 0.7552000284194946, + 0.2728999853134155 + ], + "zero_point": [ + 0, + 1, + 0, + 0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json new file mode 100644 index 0000000..393a44a --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/gamma.json @@ -0,0 +1,20 @@ +{ + "weights": [ + 1, + 0, + 1, + 0 + ], + "scale": [ + 0.012299999594688416, + 0.33239999413490295, + 0.23240000009536743, + 3.3359999656677246 + ], + "zero_point": [ + 0, + 1, + 0, + 1 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json new file mode 100644 index 0000000..94c4e0f --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.003919127397239208, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json new file mode 100644 index 0000000..27a1c85 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.051219820976257324, + "zero_point": 104.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json new file mode 100644 index 0000000..910e855 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": 0.006417479291558266, + "max": 0.9993774032592774 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json new file mode 100644 index 0000000..190da30 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/channel/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": -5.316554107666015, + "max": 7.744499607086182 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json new file mode 100644 index 0000000..9dcefd5 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/beta.json @@ -0,0 +1,10 @@ +{ + "weights": [ + 242, + 0, + 255, + 139 + ], + "scale": 0.004174117464572191, + "zero_point": 74.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json new file mode 100644 index 0000000..6d85a1e --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/gamma.json @@ -0,0 +1,10 @@ +{ + "weights": [ + 239, + 214, + 255, + 0 + ], + "scale": 0.013993725180625916, + "zero_point": 238.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json new file mode 100644 index 0000000..df3df56 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.003914226312190294, + "zero_point": 0.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json new file mode 100644 index 0000000..098816a --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.04870154336094856, + "zero_point": 122.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json new file mode 100644 index 0000000..d2e7923 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": 0.011221568882465362, + "max": 0.9981276893615723 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json new file mode 100644 index 0000000..b4ea586 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/InstanceNorm_001/layer/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": -5.94246238708496, + "max": 6.4764308166503906 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json index 5f6db8d..6f99899 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/alpha.json @@ -2,12 +2,20 @@ "weights": [ [ [ - 6553, - 19660, - 32767 + 1, + 1, + 1 ] ] ], - "scale": 1.5259254723787308e-05, - "zero_point": 0.0 + "scale": [ + 0.10000000149011612, + 0.30000001192092896, + 0.5 + ], + "zero_point": [ + 0, + 0, + 0 + ] } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json index e75377c..7d1f4c7 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ifm.json @@ -1,4 +1,4 @@ { - "scale": 0.0001509107678430155, + "scale": 0.00015214986342471093, "zero_point": 0.0 } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json index e4a89e2..533c1e3 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/quantization/ofm.json @@ -1,4 +1,4 @@ { - "scale": 0.00015084103506524116, + "scale": 0.00015159364556893706, "zero_point": 0.0 } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json index a34d48c..edbbff9 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ifm.json @@ -1,4 +1,4 @@ { - "min": -4.944893226623535, - "max": 4.942608108520508 + "min": -4.985494499206543, + "max": 4.967269058227539 } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json index 640397c..954d5ef 100644 --- a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/int16/record_minmax/ofm.json @@ -1,4 +1,4 @@ { - "min": -2.451441249847412, - "max": 4.942608108520508 + "min": -2.4895002365112306, + "max": 4.967269058227539 } diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json new file mode 100644 index 0000000..6f99899 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/alpha.json @@ -0,0 +1,21 @@ +{ + "weights": [ + [ + [ + 1, + 1, + 1 + ] + ] + ], + "scale": [ + 0.10000000149011612, + 0.30000001192092896, + 0.5 + ], + "zero_point": [ + 0, + 0, + 0 + ] +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json new file mode 100644 index 0000000..d661df3 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ifm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.03893596678972244, + "zero_point": 128.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json new file mode 100644 index 0000000..6dfffd5 --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/quantization/ofm.json @@ -0,0 +1,4 @@ +{ + "scale": 0.029139429330825806, + "zero_point": 85.0 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json new file mode 100644 index 0000000..8de6b3d --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ifm.json @@ -0,0 +1,4 @@ +{ + "min": -4.977406520843505, + "max": 4.951265411376953 +} diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json new file mode 100644 index 0000000..c88f6ca --- /dev/null +++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001/channel/uint8/record_minmax/ofm.json @@ -0,0 +1,4 @@ +{ + "min": -2.4792890548706055, + "max": 4.951265411376953 +} diff --git a/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py b/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py index 9863c80..a00cbeb 100755 --- a/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py +++ b/compiler/pota-quantization-value-test/gen_h5_explicit_inputs.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 import h5py as h5 import numpy as np -import tensorflow as tf +from circle.Model import Model +from circle.TensorType import TensorType import argparse import glob # -# This script generates a pack of random input data (.h5) expected by the input tflite model +# This script generates a pack of random input data (.h5) expected by the input circle model # # Basic usage: # gen_h5_explicit_inputs.py --model --input --output -# ex: gen_h5_explicit_inputs.py --model Add_000.tflite --input Add_000 --output Add_000.input.h5 +# ex: gen_h5_explicit_inputs.py --model Add_000.circle --input Add_000 --output Add_000.input.h5 # (This will create Add_000.input.h5) # # The input directory should be organized as follows @@ -33,15 +34,30 @@ model = args.model input = args.input output = args.output -# Build TFLite interpreter. (to get the information of model input) -interpreter = tf.lite.Interpreter(model) -input_details = interpreter.get_input_details() +with open(model, 'rb') as f: + buf = f.read() + circle_model = Model.GetRootAsModel(buf, 0) + +# Assume one subgraph +assert (circle_model.SubgraphsLength() == 1) +graph = circle_model.Subgraphs(0) +inputs = graph.InputsAsNumpy() # Create h5 file h5_file = h5.File(output, 'w') group = h5_file.create_group("value") group.attrs['desc'] = "Input data for " + model + +def toNumpyType(circle_type): + if circle_type == TensorType.UINT8: + return np.uint8 + if circle_type == TensorType.FLOAT32: + return np.float32 + if circle_type == TensorType.INT16: + return np.int16 + + # Input files records = sorted(glob.glob(input + "/*.txt")) for i, record in enumerate(records): @@ -51,9 +67,10 @@ for i, record in enumerate(records): lines = f.readlines() for j, line in enumerate(lines): data = np.array(line.split(',')) - input_detail = input_details[j] - input_data = np.array( - data.reshape(input_detail["shape"]), input_detail["dtype"]) + input_index = inputs[j] + tensor = graph.Tensors(input_index) + np_type = toNumpyType(tensor.Type()) + input_data = np.array(data.reshape(tensor.ShapeAsNumpy()), np_type) sample.create_dataset(str(j), data=input_data) h5_file.close() diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst index 15606b8..dd16404 100644 --- a/compiler/pota-quantization-value-test/test.lst +++ b/compiler/pota-quantization-value-test/test.lst @@ -13,6 +13,8 @@ addTest(DepthwiseConv2D_002 layer uint8) addTest(FullyConnected_003 channel uint8) addTest(FullyConnected_003 channel int16) addTest(FullyConnected_003 layer uint8) +addTest(InstanceNorm_001 layer uint8) +addTest(InstanceNorm_001 channel uint8) addTest(Mean_000 layer uint8) addTest(Mean_000 channel int16) addTest(MaxPool2D_000 layer uint8) @@ -20,6 +22,7 @@ addTest(MaxPool2D_000 channel int16) addTest(Mul_001 layer uint8) addTest(Mul_001 channel int16) addTest(PRelu_001 layer uint8) +addTest(PRelu_001 channel uint8) addTest(PRelu_001 channel int16) addTest(ReLU_000 layer uint8) addTest(ReLU_000 channel int16) diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt new file mode 100644 index 0000000..5e926a2 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/0.txt @@ -0,0 +1 @@ +0.15500909,0.32379007,0.12717001,0.60674316,0.07691418,0.437071 ,0.3737046 ,0.798342 ,0.65901846,0.40579247,0.15460491,0.80063623,0.591834 ,0.6617658 ,0.5617774 ,0.44884747,0.7996519 ,0.75895494,0.6239346 ,0.56500244,0.8955974 ,0.32503998,0.05756519,0.11889575,0.19635268,0.33958906,0.916527 ,0.16366032,0.51954055,0.2615102 ,0.07677322,0.6970092 ,0.27848312,0.97694606,0.73990864,0.96292055 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt new file mode 100644 index 0000000..eb5de0c --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/1.txt @@ -0,0 +1 @@ +0.85332185,0.03102963,0.54344934,0.6300742 ,0.3323267 ,0.1701224 ,0.36199054,0.23949413,0.11960976,0.668403 ,0.7907452 ,0.4377144 ,0.87145853,0.75605077,0.37314144,0.3622036 ,0.4321453 ,0.8770253 ,0.10936793,0.0734281 ,0.2922192 ,0.5829591 ,0.5422962 ,0.84274834,0.48475483,0.23154257,0.20037153,0.27911612,0.30018023,0.23753181,0.98804647,0.61455756,0.90376633,0.8255312 ,0.21020697,0.6272272 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt new file mode 100644 index 0000000..16561ef --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/2.txt @@ -0,0 +1 @@ +0.29736656,0.5712386 ,0.55447775,0.9014779 ,0.6208391 ,0.3413809 ,0.043885 ,0.5474101 ,0.8642339 ,0.05225753,0.36101478,0.15561381,0.776422 ,0.9997885 ,0.35188794,0.23418508,0.0882741 ,0.5797471 ,0.99945694,0.22190607,0.12337059,0.3701574 ,0.65161157,0.9830193 ,0.46270686,0.10077237,0.23681253,0.8734158 ,0.8358533 ,0.08817147,0.3845248 ,0.12799203,0.66830546,0.14838815,0.90201443,0.21123447 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt new file mode 100644 index 0000000..deba38b --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/3.txt @@ -0,0 +1 @@ +0.92424273,0.35776526,0.0776509 ,0.93697083,0.6559925 ,0.78421926,0.7511033 ,0.71389145,0.52217877,0.41876563,0.3560251 ,0.5862293 ,0.53027606,0.32203177,0.24654935,0.55851364,0.35312092,0.38102064,0.21245371,0.87299466,0.94972914,0.54950166,0.3445233 ,0.98951054,0.37458083,0.3778964 ,0.64035404,0.10410193,0.18511558,0.1942945 ,0.07018933,0.6113747 ,0.38076922,0.08337755,0.98258 ,0.91440874 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt new file mode 100644 index 0000000..78b783a --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/channel/uint8/4.txt @@ -0,0 +1 @@ +0.3790198 ,0.6347678 ,0.42544237,0.37033263,0.08057033,0.49041638,0.61705315,0.15411597,0.6455052 ,0.6857795 ,0.9613043 ,0.60357374,0.57679754,0.22550431,0.05105425,0.8641173 ,0.65559083,0.18274343,0.8963692 ,0.22369736,0.3133119 ,0.27507883,0.00539197,0.6846556 ,0.5969273 ,0.78488904,0.87746257,0.15459861,0.23133573,0.59048635,0.07172906,0.28935516,0.02084327,0.09926946,0.02687503,0.7306079 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt new file mode 100644 index 0000000..25b600c --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/0.txt @@ -0,0 +1 @@ +0.641226 ,0.68639857,0.87044334,0.9448475 ,0.21544299,0.5202749 ,0.5077167 ,0.23931624,0.5712026 ,0.4167988 ,0.56711906,0.52392703,0.42762014,0.5277072 ,0.03028643,0.18017273,0.8823869 ,0.5752544 ,0.09368648,0.50277 ,0.784248 ,0.04220072,0.55217946,0.75145644,0.7957966 ,0.6563401 ,0.54975605,0.17231019,0.4219812 ,0.27839735,0.5850074 ,0.24070603,0.00957893,0.3669335 ,0.03722228,0.8705231 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt new file mode 100644 index 0000000..caadfed --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/1.txt @@ -0,0 +1 @@ +0.76871806,0.65729177,0.946514 ,0.4308198 ,0.65200335,0.5745432 ,0.2990488 ,0.3156028 ,0.3218111 ,0.44709972,0.9411461 ,0.4828708 ,0.5707792 ,0.10645963,0.74497086,0.3563156 ,0.07986172,0.64869064,0.73329425,0.8848129 ,0.3027897 ,0.8753744 ,0.8884493 ,0.3606782 ,0.88617206,0.20232914,0.10251648,0.6366529 ,0.20422891,0.24426484,0.6952833 ,0.21889713,0.11477511,0.40650114,0.9637219 ,0.9751801 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt new file mode 100644 index 0000000..bc4a494 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/2.txt @@ -0,0 +1 @@ +0.5773043 ,0.6733178 ,0.22994593,0.32895002,0.74122405,0.6671442 ,0.1899878 ,0.35264668,0.31084946,0.3864719 ,0.7035006 ,0.46563607,0.44263086,0.2414678 ,0.7430625 ,0.72898006,0.9982008 ,0.8989132 ,0.45622516,0.17876478,0.9356994 ,0.85493064,0.73729265,0.9804242 ,0.8735895 ,0.14825071,0.33990774,0.76397645,0.14657325,0.2492199 ,0.43957144,0.20367876,0.43692476,0.28123745,0.24346785,0.21133597 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt new file mode 100644 index 0000000..18f8666 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/3.txt @@ -0,0 +1 @@ +0.74837255,0.7530814 ,0.05257462,0.06676125,0.26824346,0.05064487,0.23974492,0.5355457 ,0.97374374,0.38518724,0.3781766 ,0.7047476 ,0.95856845,0.09918232,0.36570287,0.5659468 ,0.8793284 ,0.7967468 ,0.99486005,0.11670698,0.42955273,0.25254622,0.06959745,0.5107888 ,0.88106513,0.3649466 ,0.7039582 ,0.8535825 ,0.3979168 ,0.9560912 ,0.17733434,0.69954944,0.35459924,0.28516313,0.75249106,0.7197228 diff --git a/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt new file mode 100644 index 0000000..b51c5eb --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/InstanceNorm_001/layer/uint8/4.txt @@ -0,0 +1 @@ +0.73320377,0.33635676,0.05811058,0.7032399 ,0.26380542,0.99637365,0.36622 ,0.47471517,0.5940316 ,0.39782768,0.46486765,0.5167471 ,0.61612487,0.93076104,0.8955697 ,0.5320168 ,0.41166067,0.29174343,0.07476811,0.60023075,0.0961028 ,0.77073896,0.17360727,0.48763612,0.31430086,0.37943754,0.7456216 ,0.16767363,0.9368368 ,0.09397154,0.68992966,0.5829225 ,0.7521187 ,0.06086114,0.13137193,0.22886442 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt index 107491f..081a1e6 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/0.txt @@ -1 +1 @@ - 0.5590226 ,-0.2806683 ,-1.6237477 ,-0.9041292 ,-2.2877202 , 3.4275887 , 0.7413508 ,-2.4284103 ,-0.39940628, 2.431437 ,-3.681079 ,-0.24288087, 3.3011584 ,-4.9507365 , 0.63297826, 3.0742207 ,-4.407745 ,-3.1469536 , 0.28014645, 1.7506292 ,-2.2447422 ,-0.5647249 , 4.763762 ,-1.9554822 ,-1.0236452 , 1.4784483 ,-0.15040281, 3.009691 , 4.0685706 ,-4.3577633 , 3.9074588 , 3.3200462 , 0.7937705 ,-4.491444 ,-1.5227276 ,-4.907054 , 3.0078046 ,-3.3134713 ,-4.180262 , 0.42208448,-4.764361 , 1.7373432 ,-2.4944234 , 1.3338212 , 0.5318029 , 2.0201192 , 1.274291 ,-3.891372 +-1.9927613e+00,-1.7386111e+00, 4.0895696e+00, 3.7818990e+00, 1.9420158e+00, 2.8482721e+00, 1.9165717e+00, 3.0059583e+00, 1.8346788e+00,-1.9055414e-03, 4.9277787e+00,-2.2794118e+00, 4.4005270e+00, 4.9703922e+00,-4.5275192e+00,-4.0446317e-01,-4.9363256e+00, 4.9506269e+00, 5.5874938e-01, 3.9949589e+00,-3.8152415e-01,-4.1024357e-01,-3.8472393e+00, 4.2956004e+00, 4.8097472e+00, 1.7960385e+00, 1.6767026e+00,-2.2773645e+00, 2.6808765e+00,-3.7214172e+00, 4.0978761e+00, 3.6202488e+00,-3.3211513e+00, 3.6200387e+00,-3.6106458e+00,-3.9778764e+00, 3.8779631e+00,-4.8502750e+00,-2.1901150e+00, 3.1800017e+00, 4.6261444e+00, 3.5151103e+00, 2.8659137e-02, 4.5340648e+00, 1.9836371e+00,-2.1751235e+00,-4.6762753e+00,-3.6951694e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt index f95a6c3..f6b31db 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/1.txt @@ -1 +1 @@ --2.5172353 , 1.8682998 , 2.6845884 , 1.8813597 ,-4.6693754 ,-3.2414548 ,-3.1801097 ,-1.5670214 , 1.9862102 , 3.857179 ,-3.0402668 ,-1.4183347 ,-2.7983398 ,-4.087585 ,-1.1274861 , 1.8738103 ,-2.563316 ,-2.973781 ,-0.872552 ,-4.4504313 ,-0.9188538 , 4.5734954 , 1.3559026 , 4.943204 ,-3.6803703 , 4.577067 ,-0.6116983 , 4.5055084 , 2.5480487 , 3.7308915 ,-0.3163238 ,-0.00772368, 3.0286303 ,-0.43645218, 0.87748104,-2.6953583 , 0.21743219, 2.431181 ,-1.2284794 , 0.35975334, 0.87034357,-2.5191767 , 4.030477 ,-1.2849646 ,-4.537441 ,-0.8822066 , 4.5059347 ,-0.9273924 +-4.7488093 , 4.805902 ,-0.29828382, 0.57486725,-4.864297 , 1.1832287 ,-1.7611881 ,-2.7058024 , 2.707353 ,-3.9832466 , 3.1243927 ,-4.795229 , 1.9835415 , 3.2291937 , 2.4303932 ,-3.556881 , 4.316894 ,-0.6444627 ,-3.8289468 , 4.012964 , 0.7878584 ,-1.8921386 , 2.779619 ,-3.762597 , 3.4239094 ,-0.9103423 ,-3.9791772 ,-2.5613685 ,-4.4910364 , 0.19411987, 4.6296096 ,-0.6827259 , 3.7645729 , 1.5309091 , 3.5163064 , 3.4726381 , 3.5372822 , 1.7671971 , 1.4374614 , 3.5783768 ,-2.4927518 , 3.9427729 , 2.431568 , 2.6959393 , 3.8100271 ,-2.099064 , 3.3663592 ,-2.0818436 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt index 106889e..acc01cb 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/2.txt @@ -1 +1 @@ - 4.523605 ,-2.1303053 , 2.7449381 ,-4.449816 ,-1.4482541 , 4.643309 ,-2.5644886 , 4.3115034 ,-4.7736797 ,-1.9451635 ,-2.1877592 , 2.3639698 ,-1.8480709 ,-4.560132 ,-0.40588248, 4.368528 ,-0.25666243, 1.1258887 , 2.33142 ,-3.8270295 ,-4.337086 ,-0.6709232 , 4.9283085 ,-3.5181348 , 2.225021 ,-0.0831629 , 2.0482597 , 3.161154 ,-0.49435407, 2.9382129 ,-1.248886 ,-3.7053974 , 1.6736145 ,-1.3524985 ,-1.4007242 ,-4.291275 ,-3.391911 , 4.803692 , 1.631321 , 0.13381048,-2.9587808 , 3.9878602 ,-3.3585925 , 4.6802793 ,-1.7605352 , 3.4168313 , 1.2318416 ,-4.40287 + 4.279912 ,-2.2746763 , 4.0609813 , 4.5353827 , 3.624241 ,-3.9593613 , 4.189409 ,-3.9370356 ,-2.7063863 ,-1.9987059 , 4.172294 ,-4.5454354 , 4.362368 , 2.2204642 ,-4.9866576 , 3.31571 , 0.12623785, 4.7834573 ,-1.3521448 ,-1.5408021 ,-4.6578984 ,-2.93307 ,-1.5684534 ,-1.6875995 ,-0.4278419 , 1.1314197 ,-2.9655704 ,-0.48032767,-1.9200082 , 1.3321692 , 0.87586147,-0.1761448 , 3.939337 ,-1.0270193 ,-4.807054 , 2.8373904 ,-1.1184337 ,-0.8979197 , 2.1442132 ,-2.8509672 ,-3.3741531 , 3.6592414 , 0.7632272 ,-4.11465 , 4.892313 , 4.715815 ,-4.6481915 , 0.24676175 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt index 488c348..0f0b7a9 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/3.txt @@ -1 +1 @@ - 1.249105 ,-3.2594535 ,-1.7899538 ,-4.804654 ,-2.0324056 ,-1.9959925 , 3.5215054 , 0.5371311 , 1.9365969 ,-3.130136 ,-2.3590457 ,-4.653209 ,-2.0184708 , 3.5759254 ,-1.3521014 , 1.910826 , 3.8221822 ,-2.8988552 , 0.6571995 , 1.0839036 , 3.5422468 , 2.4680734 , 0.6148754 ,-3.4008195 , 4.558109 , 2.0105803 , 0.58087206, 1.3398736 , 2.770545 , 0.29666626, 4.1851935 , 0.04321287, 2.7680604 , 4.5661645 , 4.0127945 ,-4.8027678 , 4.1711125 ,-0.24452859, 0.4101852 , 1.5963763 ,-2.8356924 , 1.2876563 , 0.90424466, 2.965566 ,-1.9058269 , 4.759825 ,-2.2063546 ,-1.1309439 +-2.0949495 ,-1.1370499 , 4.6457314 ,-2.243915 ,-1.7996464 , 1.2268789 ,-4.938172 ,-3.2802615 , 1.8788282 , 4.4162655 ,-4.8805113 , 3.1269526 , 3.2644348 , 0.89842725,-1.4484432 ,-0.28381723, 3.046261 ,-1.0718596 ,-3.996107 ,-4.9575796 ,-2.2279077 , 1.5326967 , 4.4588428 ,-2.042381 , 4.6604958 , 4.6422915 ,-1.097833 , 3.666126 , 0.4735639 ,-4.480704 ,-4.831033 ,-0.27288163, 4.588138 , 4.5297036 , 4.3675694 ,-1.6098841 ,-3.4147859 , 2.1168516 ,-1.9529305 ,-0.12548867, 3.4388335 ,-1.4071734 , 0.9507897 , 4.8206787 , 1.676873 ,-1.7102181 , 1.7746873 , 0.02711739 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt index a59688e..d23450d 100644 --- a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/int16/4.txt @@ -1 +1 @@ --3.0078897 , 1.6800234 , 4.350201 , 0.22538732, 2.9894316 ,-4.234071 , 2.733158 ,-3.8551323 , 3.9647048 , 1.4266169 , 0.78519976,-0.5334222 , 0.6681823 , 2.8409274 , 2.335872 ,-3.757666 ,-3.321705 , 2.9423573 , 1.3080943 , 1.0453726 , 3.222387 , 3.1813147 ,-1.8588669 ,-3.2523947 ,-4.4175825 , 3.7631783 ,-3.4176416 , 1.2141145 , 1.3725096 ,-1.2283872 ,-2.9829195 ,-3.6383085 ,-2.0126016 ,-3.7627625 , 4.916868 , 0.73052526,-0.02047114,-3.9506733 , 2.3569562 ,-4.247723 ,-1.8913685 , 1.7365774 , 4.59158 , 3.654596 ,-4.2133813 ,-4.6193404 ,-1.3968121 ,-3.580963 +-4.707647 ,-4.0921726 , 3.5813692 ,-4.71081 , 3.157816 ,-3.0034213 ,-0.21858999,-1.1736552 ,-1.6042249 ,-3.93102 ,-4.0407577 , 3.7350774 ,-4.9545655 ,-1.5413756 , 0.34996858, 2.0339615 , 0.99290746,-3.9916334 ,-4.149016 ,-3.2332835 , 3.6728513 , 2.4537466 ,-3.103485 ,-0.4829316 , 4.8046784 ,-1.753812 , 4.878712 ,-1.4039769 , 1.6640003 ,-1.2041731 , 0.8046477 , 0.9196048 ,-0.6475092 , 1.1409346 , 2.0324717 ,-0.04227797,-0.5379897 , 3.205104 , 3.3556423 , 4.8447986 ,-1.9695646 ,-2.6304977 ,-3.7261262 ,-4.725599 , 2.1162436 ,-0.5631174 ,-0.5820323 , 0.8398242 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt new file mode 100644 index 0000000..bcda22c --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/0.txt @@ -0,0 +1 @@ + 0.29413325,-0.5246354 , 2.5049045 , 4.9534087 , 0.9885207 ,-4.9603324 ,-2.534284 ,-1.2587626 ,-4.6054525 ,-4.0071754 , 3.204513 , 1.9254771 ,-3.0781755 ,-2.225973 , 3.3524523 , 3.817767 , 3.4921055 , 4.3435416 , 3.0849605 ,-1.4030998 ,-1.0506575 ,-0.42979953,-2.2500112 , 3.4057455 , 4.5414543 , 2.9366746 , 4.8639297 ,-0.1028097 , 2.3421814 , 0.6463296 ,-4.906506 ,-0.7544193 ,-4.0089574 , 2.3837643 ,-0.62171113,-3.349577 , 0.63758767,-3.6872568 ,-2.4398334 ,-1.1556609 ,-3.116043 ,-1.9698795 , 0.7246678 , 2.1801088 ,-2.5762403 , 2.5748649 ,-2.8637013 , 2.8755338 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt new file mode 100644 index 0000000..937e08f --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/1.txt @@ -0,0 +1 @@ +-3.5664022e+00, 3.7696166e+00,-2.0404069e+00,-3.2197843e+00, 2.0149478e-01, 4.1116104e+00, 1.9678035e+00,-7.5975507e-01,-2.1460054e+00, 4.6308274e+00,-1.8927828e+00, 3.0689645e+00,-7.0773923e-01,-6.7477709e-01,-1.6248076e+00, 2.7095401e+00, 2.9545853e+00, 8.5142839e-01,-2.7683893e-01,-2.0586762e+00,-3.5001924e+00,-1.7622359e+00, 2.2262762e+00,-4.0617161e+00,-2.4704919e+00,-3.6333869e+00, 2.3401244e+00,-4.6641917e+00,-4.0812837e-03, 1.1013873e+00, 1.4518824e-01, 2.4135842e+00, 4.1183419e+00, 3.0343807e+00,-3.7195799e-01,-9.7189492e-01,-3.0425618e+00, 4.6822820e+00,-1.7649661e+00, 3.9648254e+00,-3.1084957e+00,-7.3071235e-01,-5.1578474e-01,-3.5188673e+00,-4.7018051e+00,-4.1592669e+00,-3.5443991e-01, 1.3961188e+00 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt new file mode 100644 index 0000000..fb30491 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/2.txt @@ -0,0 +1 @@ + 4.2618856 , 0.4364266 , 0.5258691 , 3.5147502 ,-4.025428 , 3.143039 , 1.3707066 , 4.7792606 , 1.1539228 , 3.785161 ,-1.9495047 , 2.7047534 , 0.5673139 ,-0.5191105 ,-2.5284607 , 4.076998 , 2.9433093 ,-2.1924984 , 1.1020935 ,-2.126009 , 0.7586875 , 1.1708144 ,-4.594603 ,-3.252912 ,-3.057344 , 3.8008513 ,-4.9164753 ,-4.560891 , 1.724639 ,-3.0877826 , 0.55354726,-3.969067 , 4.17461 ,-1.901139 ,-4.8903475 , 4.7866077 ,-1.3506653 ,-4.2624874 , 0.8842832 , 4.672003 ,-2.5649548 ,-3.6606123 ,-1.6794366 ,-2.0534387 ,-2.9902222 , 3.078469 , 2.846819 , 1.2788221 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt new file mode 100644 index 0000000..fb9d40a --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/3.txt @@ -0,0 +1 @@ +-2.6751792 ,-2.5436802 , 0.30533552, 1.0443643 ,-4.4327927 , 2.813772 ,-4.27514 , 2.5894637 , 2.8684394 ,-2.2010357 , 1.5827026 , 0.01609957, 0.38605672,-4.978118 ,-0.30794173, 0.7372266 ,-1.2931277 , 2.8435483 , 2.8204155 , 1.5801594 , 0.853025 , 1.0665054 ,-2.3281817 ,-4.2512784 , 2.379218 , 2.6335719 , 0.17575608,-2.7761426 ,-2.8164017 , 1.8392245 , 2.6495574 , 0.82702005, 3.8548648 ,-3.179834 , 0.25908127, 2.4930098 , 0.71019745,-3.193962 ,-1.1381371 ,-3.5847874 ,-1.3353258 , 2.942422 , 0.11944559,-3.0676606 , 3.534187 , 0.86664987,-1.4781127 , 4.8873277 diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt new file mode 100644 index 0000000..aeecd56 --- /dev/null +++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001/channel/uint8/4.txt @@ -0,0 +1 @@ + 4.2327642 , 4.644095 ,-2.8978996 , 4.39419 , 2.897952 ,-3.330613 ,-3.9131684 ,-1.4672462 ,-3.9219787 , 2.1286428 ,-4.313653 , 2.65426 ,-4.201722 , 2.5390174 ,-3.821772 ,-1.9420135 , 3.3508427 ,-1.2804624 , 4.899826 ,-4.165279 ,-0.38920662, 3.594253 ,-2.367396 , 3.8604352 , 0.40077925, 3.7654843 ,-2.7208197 , 3.4325044 ,-2.921729 , 2.0519714 ,-0.6181836 ,-0.12342291,-4.1059036 ,-3.653849 ,-3.5340316 ,-0.2782715 , 0.32330513, 3.360021 , 2.5673623 , 2.1614027 ,-4.438277 , 3.3010736 , 0.3992392 , 0.82871836,-2.8720777 , 0.29633927, 0.25286415,-4.191315 diff --git a/compiler/pota-quantization-value-test/test_record_minmax.sh b/compiler/pota-quantization-value-test/test_record_minmax.sh index acb7574..fa8f506 100755 --- a/compiler/pota-quantization-value-test/test_record_minmax.sh +++ b/compiler/pota-quantization-value-test/test_record_minmax.sh @@ -9,11 +9,11 @@ # work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test) SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -GEN_SCRIPT_PATH="${SOURCE_PATH}/gen_h5_explicit_inputs.py" COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py" CONFIG_PATH="$1"; shift BIN_PATH=$(dirname "${CONFIG_PATH}") TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs" +GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py" WORKDIR="$1"; shift source "${CONFIG_PATH}" @@ -48,7 +48,7 @@ while [ "$1" != "" ]; do # Generate h5 input data source "${VIRTUALENV}/bin/activate" "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \ - --model "${WORKDIR}/${MODELNAME}.tflite" \ + --model "${WORKDIR}/${MODELNAME}.circle" \ --input "${TEST_INPUT_PATH}/${MODELNAME}/${GRANULARITY}/${DTYPE}" \ --output "${TESTCASE_FILE}.input.h5" diff --git a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgMax.cpp b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgMax.cpp index b1c92ec..13bf2e5 100644 --- a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgMax.cpp +++ b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgMax.cpp @@ -65,13 +65,13 @@ MaxPoolWithArgMaxChef::custom_value(flatbuffers::FlatBufferBuilder &fbb) const flex_buffers->Add(1); flex_buffers->EndVector(start, /*typed=*/true, /*fixed=*/false); auto output_type = operation.max_pool_with_argmax_options().output_type(); - assert(output_type == tflite::TensorType_INT64 || output_type == tflite::TensorType_INT32); + assert(output_type == tflchef::INT64 || output_type == tflchef::INT32); flex_buffers->Int("Targmax", output_type); std::string padding = operation.max_pool_with_argmax_options().padding() ? "VALID" : "SAME"; flex_buffers->String("padding", padding); flex_buffers->Bool("include_batch_in_index", operation.max_pool_with_argmax_options().include_batch_in_index()); - flex_buffers->Int("T", tflite::TensorType_FLOAT32); + flex_buffers->Int("T", tflchef::FLOAT32); flex_buffers->EndMap(map_start); flex_buffers->Finish(); diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp index 8c8178f..20e1343 100644 --- a/compiler/tfldump/src/Dump.cpp +++ b/compiler/tfldump/src/Dump.cpp @@ -349,6 +349,7 @@ void dump_model(std::ostream &os, const tflite::Model *model) auto opcodes = reader.opcodes(); auto buffers = reader.buffers(); + auto metadata = reader.metadata(); // dump operator_codes os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl; @@ -382,6 +383,17 @@ void dump_model(std::ostream &os, const tflite::Model *model) } os << std::endl; + // dump metadata + if (metadata != nullptr) + { + os << "metadata : B(index) name" << std::endl; + for (uint32_t i = 0; i < metadata->Length(); ++i) + { + os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str(); + } + os << std::endl; + } + for (uint32_t sg = 0; sg < num_subgraph; ++sg) { reader.select_subgraph(sg); diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp index 5d279632..c358480 100644 --- a/compiler/tfldump/src/OpPrinter.cpp +++ b/compiler/tfldump/src/OpPrinter.cpp @@ -694,6 +694,7 @@ OpPrinterRegistry::OpPrinterRegistry() // There is no Option for LOGISTIC // There is no Option for LOG_SOFTMAX _op_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique(); + _op_map[tflite::BuiltinOperator_MEAN] = make_unique(); _op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique(); _op_map[tflite::BuiltinOperator_MUL] = make_unique(); // There is no Option for NON_MAX_SUPPRESSION_V4 diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp index f9782d9..856cc56 100644 --- a/compiler/tfldump/src/Read.cpp +++ b/compiler/tfldump/src/Read.cpp @@ -81,6 +81,7 @@ Reader::Reader(const tflite::Model *model) _version = model->version(); _subgraphs = model->subgraphs(); _buffers = model->buffers(); + _metadata = model->metadata(); auto opcodes = model->operator_codes(); for (const ::tflite::OperatorCode *opcode : *opcodes) diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h index 7af2fa5..f835be1 100644 --- a/compiler/tfldump/src/Read.h +++ b/compiler/tfldump/src/Read.h @@ -52,6 +52,7 @@ private: using TFliteBuffers_t = flatbuffers::Vector>; using TFliteTensors_t = flatbuffers::Vector>; using TFliteOperators_t = flatbuffers::Vector>; + using TFliteMetadata_t = flatbuffers::Vector>; public: Reader(const tflite::Model *model); @@ -67,6 +68,7 @@ public: const TFliteOperators_t *operators() { return _operators; } const std::vector &inputs() const { return _inputs; } const std::vector &outputs() const { return _outputs; } + const TFliteMetadata_t *metadata() const { return _metadata; } uint32_t num_subgraph() const { return _subgraphs->Length(); } @@ -86,6 +88,7 @@ private: const TFliteBuffers_t *_buffers{nullptr}; const TFliteTensors_t *_tensors{nullptr}; const TFliteOperators_t *_operators{nullptr}; + const TFliteMetadata_t *_metadata{nullptr}; uint32_t _subgraph_index; std::string _subgraph_name; diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt index 9055154..595bbfd 100644 --- a/compiler/vconone/CMakeLists.txt +++ b/compiler/vconone/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT VCONONE_VERSION) - set(VCONONE_VERSION 0x00000000000b0001) + set(VCONONE_VERSION 0x00000000000c0001) # NOTE order is [build patch minor major] # if VCONONE_VERSION is set with -D option, it will be cached # you may have to remove cache file if you remove -D option diff --git a/compute/.clang-format b/compute/.clang-format new file mode 120000 index 0000000..0ff66f3 --- /dev/null +++ b/compute/.clang-format @@ -0,0 +1 @@ +../.clang-format.8 \ No newline at end of file diff --git a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h index d29886a..4a37178 100644 --- a/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h +++ b/compute/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h @@ -255,14 +255,14 @@ private: cl::Device _device; /**< Underlying CL device. */ std::string _kernel_path; /**< Path to the kernels folder. */ mutable std::map - _programs_map; /**< Map with all already loaded program data. */ + _programs_map; /**< Map with all already loaded program data. */ mutable std::map - _built_programs_map; /**< Map with all already built program data. */ + _built_programs_map; /**< Map with all already built program data. */ static const std::map - _kernel_program_map; /**< Map that associates kernel names with programs. */ + _kernel_program_map; /**< Map that associates kernel names with programs. */ static const std::map - _program_source_map; /**< Contains sources for all programs. - Used for compile-time kernel inclusion. >*/ + _program_source_map; /**< Contains sources for all programs. + Used for compile-time kernel inclusion. >*/ }; } #endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */ diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h index a614d52..fb689f7 100644 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h @@ -54,8 +54,8 @@ namespace arm_compute class ICLTensor; /** -* @brief Class to perform EmbeddingLookup operation with opencl kernel -*/ + * @brief Class to perform EmbeddingLookup operation with opencl kernel + */ class CLEmbeddingLookupKernel : public ICLKernel { public: diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h index 99cfa61..96f8308 100644 --- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h +++ b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h @@ -55,8 +55,8 @@ namespace arm_compute class ICLTensor; /** -* @brief Class to perform HashtableLookup operation with opencl kernel -*/ + * @brief Class to perform HashtableLookup operation with opencl kernel + */ class CLHashtableLookupKernel : public ICLKernel { public: diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h index 99bb351..963d7b8 100644 --- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h +++ b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEOneHotKernel.h @@ -68,34 +68,37 @@ public: const char *name() const override { return "NEOneHotKernel"; } /** Initialise the kernel's inputs and outputs * - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] depth The tensor for depth of the one hot dimension. Supported tensor rank: up to - * 3. Must be one of the following types: U32/S32 - * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: Same - * as @p on_value - * @param[out] output Destination tensor. Data type supported: Same as @p on_value - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * The value must be in range [-indices.rank , indices.rank) + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the + * following types: U32/S32 + * @param[in] depth The tensor for depth of the one hot dimension. + * Supported tensor rank: up to 3. + * Must be one of the following types: U32/S32 + * @param[in] on_value On value tensor. Supported tensor rank: only 1. + * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] off_value Off value tensor. Supported tensor rank: only 1. + * Data type supported: Same as @p on_value + * @param[out] output Destination tensor. Data type supported: Same as @p on_value + * @param[in] axis (Optional) The axis to fill. Negative values wrap around. + * Defaults to -1. + * The value must be in range [-indices.rank , indices.rank) */ void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, const ITensor *off_value, ITensor *output, int axis = -1); /** Static function to check if given info will lead to a valid configuration of @ref - * NEOneHotKernel + * NEOneHotKernel * - * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank: - * up to 3. Must be one of the following types: U32/S32 - * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[out] output Destination tensor info. Data type supported: Same as @p on_value - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * The value must be in range [-indices.rank , indices.rank) + * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. + * Must be one of the following types: U32/S32 + * @param[in] depth The tensor info for depth of the one hot dimension. + * Supported tensor rank: up to 3. + * Must be one of the following types: U32/S32 + * @param[in] on_value On value tensor info. Supported tensor rank: only 1. + * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. + * Data type supported: Same as @p on_value + * @param[out] output Destination tensor info. Data type supported: Same as @p on_value + * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. + * The value must be in range [-indices.rank , indices.rank) * * @return a status */ diff --git a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h index 1e69f09..2aaab6b 100644 --- a/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h +++ b/compute/ARMComputeEx/arm_compute/core/utils/misc/ShapeCalculatorEx.h @@ -72,10 +72,10 @@ namespace shape_calculator * @return the calculated shape */ inline TensorShape compute_transposeconv_upsampled_shape( - const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info, - std::pair &out_dims, unsigned int invalid_right, - unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right, - unsigned int &pad_top, unsigned int &pad_bottom) + const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info, + std::pair &out_dims, unsigned int invalid_right, + unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right, + unsigned int &pad_top, unsigned int &pad_bottom) { unsigned int sx = info.stride().first; unsigned int sy = info.stride().second; @@ -103,7 +103,7 @@ inline TensorShape compute_transposeconv_upsampled_shape( unsigned int padx_all_except_invallid = padx + info.pad_left() + info.pad_right() - invalid_right; unsigned int pady_all_except_invallid = - pady + info.pad_top() + info.pad_bottom() - invalid_bottom; + pady + info.pad_top() + info.pad_bottom() - invalid_bottom; pad_left = (padx_all_except_invallid + 1) / 2 - info.pad_left(); pad_right = pady_all_except_invallid / 2 - info.pad_right() + invalid_right; pad_top = (padx_all_except_invallid + 1) / 2 - info.pad_top(); @@ -135,7 +135,7 @@ compute_transposeconv_output_shape(const std::pair & const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int channel_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); TensorShape out_shape{input_shape}; @@ -160,7 +160,7 @@ inline TensorShape compute_depth_to_space_shape_ex(const ITensorInfo *input, int const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int idx_channel = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); TensorShape output_shape{input->tensor_shape()}; output_shape.set(idx_width, input->dimension(idx_width) * block); diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h index 409eaf5..026209f 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h @@ -106,22 +106,24 @@ public: CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default; /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. - * Data type supported: Should match @p input data type, except for - * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[out] output Output tensor. The output has the same number of dimensions as the - * @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this - * is decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, + * except for input of QASYMM8 and QASYMM8_SIGNED type + * where biases should be of S32 type + * @param[out] output Output tensor. + * The output has the same number of dimensions as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for + * @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with + * @ref CLWeightsReshapeKernel. * */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, @@ -130,23 +132,24 @@ public: /** Set the input, weights, biases and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and - * an optional 4th dimension for batch of inputs. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. * @param[in] bias (Optional) The biases have one dimension. * Data type supported: Should match @p input data type, except for - * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * input of QASYMM8 and QASYMM8_SIGNED type + * where biases should be of S32 type * @param[out] output Output tensor. The output has the same number of dimensions as - * the @p input. + * the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, - * this is decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for + * @ref CLConvolutionLayer, specifies if the weights tensor has + * been reshaped with @ref CLWeightsReshapeKernel. * */ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, @@ -154,24 +157,26 @@ public: unsigned int invalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLDirectTransposeConvLayer + * CLDirectTransposeConvLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. - * Data type supported: Should match @p input data type, except for input - * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[in] output Output tensor info. The output has the same number of dimensions as the - * @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Should match @p input data type, + * except for input of QASYMM8 and QASYMM8_SIGNED type + * where biases should be of S32 type + * @param[in] output Output tensor info. The output has the same number of dimensions + * as the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped + * with @ref CLWeightsReshapeKernel. * * @return a status */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h index e65a646..f27e991 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h @@ -216,7 +216,7 @@ private: CLConvertFullyConnectedWeights _convert_weights; weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed; weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged - _reshape_weights_managed_function; + _reshape_weights_managed_function; CLFlattenLayer _flatten_layer; CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function; CLGEMM _mm_gemm; diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h index 289ab16..bdb1686 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h @@ -43,8 +43,8 @@ public: public: CLFullyConnectedReshapingLayer(std::shared_ptr memory_manager = nullptr) - : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{}, - _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false) + : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{}, + _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false) { // DO NOTHING } diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h index b01ec42..167554c 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGatherEx.h @@ -66,7 +66,7 @@ public: * @param[out] output The output tensor, Data types supported: same as @p input. * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 * @return N/A - */ + */ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); /** diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h index 5fb102e..5b27d36 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h @@ -63,20 +63,22 @@ public: /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same - * as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions as the - * @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this - * is described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions + * as the @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, + * this is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for + * @ref CLConvolutionLayer, specifies if the weights tensor has + * been reshaped with @ref CLWeightsReshapeKernel. * */ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, @@ -85,22 +87,22 @@ public: /** Set the input, weights, biases and output tensors. * * @param[in] compile_context The compile context to be used. - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and - * an optional 4th dimension for batch of inputs. Data types supported: - * QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: - * Same as @p input. - * @param[out] output Output tensor. The output has the same number of dimensions as - * the @p input. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Same as @p input. + * @param[out] output Output tensor. The output has the same number of dimensions + * as the @p input. * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, - * this is described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref - * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref - * CLWeightsReshapeKernel. + * this is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for + * @ref CLConvolutionLayer, specifies if the weights tensor has + * been reshaped with @ref CLWeightsReshapeKernel. * */ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, @@ -108,22 +110,24 @@ public: unsigned int invalid_right, unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref - * CLTransposeConvLayer + * CLTransposeConvLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data - * type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as - * @p input. - * @param[in] output Output tensor info. The output has the same number of dimensions as the - * @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is - * described in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. - * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, - * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data type supported: Same as @p input. + * @param[in] output Output tensor info. The output has the same number of dimensions + * as the @p input. + * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, + * this is described in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer, + * specifies if the weights tensor has been reshaped with + * @ref CLWeightsReshapeKernel. * * @return a status */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h index 18cb61b..e34b4dc 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h @@ -43,8 +43,8 @@ public: public: NEFullyConnectedReshapingLayer(std::shared_ptr memory_manager = nullptr) - : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr), - _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false) + : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr), + _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false) { // DO NOTHING } diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h index b2ea627..1a68f80 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEOneHot.h @@ -66,19 +66,20 @@ public: void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value, const ITensor *off_value, ITensor *output, int axis = -1); /** Static function to check if given info will lead to a valid configuration of @ref - * NEOneHotKernel + * NEOneHotKernel * - * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the - * following types: U32/S32 - * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank: - * up to 3. Must be one of the following types: U32/S32 - * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported: - * U8/S8/U16/S16/F16/U32/S32/F32 - * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported: - * Same as @p on_value - * @param[out] output Destination tensor info. Data type supported: Same as @p on_value - * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. - * The value must be in range [-indices.rank , indices.rank) + * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. + * Must be one of the following types: U32/S32 + * @param[in] depth The tensor info for depth of the one hot dimension. + * Supported tensor rank: up to 3. + * Must be one of the following types: U32/S32 + * @param[in] on_value On value tensor info. Supported tensor rank: only 1. + * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. + * Data type supported: Same as @p on_value + * @param[out] output Destination tensor info. Data type supported: Same as @p on_value + * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1. + * The value must be in range [-indices.rank , indices.rank) * * @return a status */ diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h index 24ff5da..7a08dae 100644 --- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h +++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h @@ -110,39 +110,42 @@ public: /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type - * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 - * for F16 input. - * @param[out] output Output tensor. The output has the same number of dimensions as the @p - * input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] invalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. + * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. + * Data type supported: Data types supported: S32 for QASYMM8 and + * QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. + * @param[out] output Output tensor. The output has the same number of dimensions as + * the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] invalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. * */ void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom); /** Static function to check if given info will lead to a valid configuration of @ref - * NETransposeConvLayer + * NETransposeConvLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an - * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. - * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type - * supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types - * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. - * @param[in] output Output tensor info. The output has the same number of dimensions as the @p - * input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is - * decribed in @ref PadStrideInfo. - * @param[in] innvalid_right The number of zeros added to right edge of the output. - * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, + * and an optional 4th dimension for batch of inputs. + * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. + * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. + * Data type supported: Same as @p input. + * @param[in] bias (Optional) The biases have one dimension. + * Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, + * F32 for F32 input, F16 for F16 input. + * @param[in] output Output tensor info. The output has the same number of dimensions as + * the @p input. + * @param[in] info Contains padding and policies to be used in the deconvolution, + * this is decribed in @ref PadStrideInfo. + * @param[in] innvalid_right The number of zeros added to right edge of the output. + * @param[in] invalid_bottom The number of zeros added to bottom edge of the output. * * @return a status */ diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp index 81d0cb7..1a8ff3e 100644 --- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp @@ -54,123 +54,123 @@ using namespace arm_compute; const std::map CLKernelLibraryEx::_kernel_program_map = { - // ARMComputeEx kernels - {"arg_min_max_ex_x", "arg_min_max_ex.cl"}, - {"arg_min_max_ex_y", "arg_min_max_ex.cl"}, - {"arg_min_max_ex_z", "arg_min_max_ex.cl"}, - {"arg_min_max_ex_w", "arg_min_max_ex.cl"}, - {"binary_logical_op", "binary_logical_op.cl"}, - {"cast_bool", "cast.cl"}, - {"embedding_lookup", "embedding_lookup.cl"}, - {"gather_ex", "gather_ex.cl"}, - {"gather_ex_1d", "gather_ex.cl"}, - {"gather_ex_1d_out", "gather_ex.cl"}, - {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"}, - {"hashtable_lookup", "hashtable_lookup.cl"}, - {"instance_normalization_ex", "instance_normalization_ex.cl"}, - {"multiply_scale_factor", "multiply_scale_factor.cl"}, - {"neg_tensor", "neg_tensor.cl"}, - {"one_hot", "one_hot.cl"}, - {"one_hot_only_on_value", "one_hot.cl"}, - {"quantization_symm8", "quantization_symm8.cl"}, - {"reduce_min_max", "reduce_operation.cl"}, - {"reduce_sum_mean", "reduce_operation.cl"}, - {"topkv2_init", "topkv2.cl"}, - {"topkv2_find_first_negative", "topkv2.cl"}, - {"topkv2_reorder_negatives", "topkv2.cl"}, - {"topkv2_store", "topkv2.cl"}, - {"radixsort_histogram", "topkv2_radixsort.cl"}, - {"radixsort_scanhistograms", "topkv2_radixsort.cl"}, - {"radixsort_pastehistograms", "topkv2_radixsort.cl"}, - {"radixsort_reorder", "topkv2_radixsort.cl"}, - {"topkv2_quicksort", "topkv2_quicksort.cl"}, - {"scale_factor_symm8", "scale_factor.cl"}, + // ARMComputeEx kernels + {"arg_min_max_ex_x", "arg_min_max_ex.cl"}, + {"arg_min_max_ex_y", "arg_min_max_ex.cl"}, + {"arg_min_max_ex_z", "arg_min_max_ex.cl"}, + {"arg_min_max_ex_w", "arg_min_max_ex.cl"}, + {"binary_logical_op", "binary_logical_op.cl"}, + {"cast_bool", "cast.cl"}, + {"embedding_lookup", "embedding_lookup.cl"}, + {"gather_ex", "gather_ex.cl"}, + {"gather_ex_1d", "gather_ex.cl"}, + {"gather_ex_1d_out", "gather_ex.cl"}, + {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"}, + {"hashtable_lookup", "hashtable_lookup.cl"}, + {"instance_normalization_ex", "instance_normalization_ex.cl"}, + {"multiply_scale_factor", "multiply_scale_factor.cl"}, + {"neg_tensor", "neg_tensor.cl"}, + {"one_hot", "one_hot.cl"}, + {"one_hot_only_on_value", "one_hot.cl"}, + {"quantization_symm8", "quantization_symm8.cl"}, + {"reduce_min_max", "reduce_operation.cl"}, + {"reduce_sum_mean", "reduce_operation.cl"}, + {"topkv2_init", "topkv2.cl"}, + {"topkv2_find_first_negative", "topkv2.cl"}, + {"topkv2_reorder_negatives", "topkv2.cl"}, + {"topkv2_store", "topkv2.cl"}, + {"radixsort_histogram", "topkv2_radixsort.cl"}, + {"radixsort_scanhistograms", "topkv2_radixsort.cl"}, + {"radixsort_pastehistograms", "topkv2_radixsort.cl"}, + {"radixsort_reorder", "topkv2_radixsort.cl"}, + {"topkv2_quicksort", "topkv2_quicksort.cl"}, + {"scale_factor_symm8", "scale_factor.cl"}, }; const std::map CLKernelLibraryEx::_program_source_map = { #ifdef EMBEDDED_KERNELS - { - "arg_min_max_ex.cl", + { + "arg_min_max_ex.cl", #include "./cl_kernels/arg_min_max_ex.clembed" - }, - { - "cast.cl", + }, + { + "cast.cl", #include "./cl_kernels/cast.clembed" - }, - { - "embedding_lookup.cl", + }, + { + "embedding_lookup.cl", #include "./cl_kernels/embedding_lookup.clembed" - }, - { - "gather_ex.cl", + }, + { + "gather_ex.cl", #include "./cl_kernels/gather_ex.clembed" - }, - { - "gemmlowp_ex.cl", + }, + { + "gemmlowp_ex.cl", #include "./cl_kernels/gemmlowp_ex.clembed" - }, - { - "hashtable_lookup.cl", + }, + { + "hashtable_lookup.cl", #include "./cl_kernels/hashtable_lookup.clembed" - }, - { - "helpers.h", + }, + { + "helpers.h", #include "./cl_kernels/helpers.hembed" - }, - { - "helpers_asymm.h", + }, + { + "helpers_asymm.h", #include "./cl_kernels/helpers_asymm.hembed" - }, - { - "instance_normalization_ex.cl", + }, + { + "instance_normalization_ex.cl", #include "./cl_kernels/instance_normalization_ex.clembed" - }, - { - "binary_logical_op.cl", + }, + { + "binary_logical_op.cl", #include "./cl_kernels/binary_logical_op.clembed" - }, - { - "multiply_scale_factor.cl", + }, + { + "multiply_scale_factor.cl", #include "./cl_kernels/multiply_scale_factor.clembed" - }, - { - "neg_tensor.cl", + }, + { + "neg_tensor.cl", #include "./cl_kernels/neg_tensor.clembed" - }, - { - "one_hot.cl", + }, + { + "one_hot.cl", #include "./cl_kernels/one_hot.clembed" - }, - { - "quantization_symm8.cl", + }, + { + "quantization_symm8.cl", #include "./cl_kernels/quantization_symm8.clembed" - }, - { - "reduce_operation.cl", + }, + { + "reduce_operation.cl", #include "./cl_kernels/reduce_operation.clembed" - }, - { - "scale_factor.cl", + }, + { + "scale_factor.cl", #include "./cl_kernels/scale_factor.clembed" - }, - { - "topkv2.cl", + }, + { + "topkv2.cl", #include "./cl_kernels/topkv2.clembed" - }, - { - "topkv2_radixsort.cl", + }, + { + "topkv2_radixsort.cl", #include "./cl_kernels/topkv2_radixsort.clembed" - }, - { - "topkv2_quicksort.cl", + }, + { + "topkv2_quicksort.cl", #include "./cl_kernels/topkv2_quicksort.clembed" - }, + }, #endif /* EMBEDDED_KERNELS */ }; CLKernelLibraryEx::CLKernelLibraryEx() - : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map() + : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map() { opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the // CLKernelLibraryEx is built @@ -337,8 +337,8 @@ size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) con size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result); ARM_COMPUTE_ERROR_ON_MSG( - err != 0, - "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel"); + err != 0, + "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel"); ARM_COMPUTE_UNUSED(err); return result; diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl index 0a014d1..135cacf 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_min_max_ex.cl @@ -119,15 +119,15 @@ inline DATA_TYPE_OUTPUT arg_idx_min(__global const DATA_TYPE *input, const int x in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel); res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8)); - idx_sel.s0123 = (in.s0123 < in.s4567) || - (in.s0123 == in.s4567 && - CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4))); + idx_sel.s0123 = + (in.s0123 < in.s4567) || + (in.s0123 == in.s4567 && CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4))); in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123); res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4)); idx_sel.s01 = - (in.s01 < in.s23) || - (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2))); + (in.s01 < in.s23) || + (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2))); in.s01 = select(in.s23, in.s01, idx_sel.s01); res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2)); @@ -204,15 +204,15 @@ inline DATA_TYPE_OUTPUT arg_idx_max(__global const DATA_TYPE *input, const int x in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel); res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8)); - idx_sel.s0123 = (in.s0123 > in.s4567) || - (in.s0123 == in.s4567 && - CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4))); + idx_sel.s0123 = + (in.s0123 > in.s4567) || + (in.s0123 == in.s4567 && CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4))); in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123); res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4)); idx_sel.s01 = - (in.s01 > in.s23) || - (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2))); + (in.s01 > in.s23) || + (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2))); in.s01 = select(in.s23, in.s01, idx_sel.s01); res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2)); @@ -296,22 +296,21 @@ __kernel void arg_min_max_ex_x(IMAGE_DECLARATION(src), const uint x_idx = get_global_id(0); const uint y_idx = get_global_id(1); const __global DATA_TYPE *src_in_row = - (const __global DATA_TYPE *)(src_ptr + src_offset_first_element_in_bytes + - y_idx * src_step_y); + (const __global DATA_TYPE *)(src_ptr + src_offset_first_element_in_bytes + y_idx * src_step_y); for (unsigned int y = 0; y < get_local_size(1); ++y) { #if defined(ARG_MAX) #if defined(PREV_OUTPUT) - local_results[lid] = arg_idx_max_prev_out( - src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx); + local_results[lid] = + arg_idx_max_prev_out(src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx); #else // !defined(PREV_OUTPUT) local_results[lid] = arg_idx_max((__global DATA_TYPE *)offset(&src, 0, y), x_idx); #endif // defined(PREV_OUTPUT) #else // defined(ARG_MIN) #if defined(PREV_OUTPUT) - local_results[lid] = arg_idx_min_prev_out( - src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx); + local_results[lid] = + arg_idx_min_prev_out(src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx); #else // !defined(PREV_OUTPUT) local_results[lid] = arg_idx_min((__global DATA_TYPE *)offset(&src, 0, y), x_idx); #endif // defined(PREV_OUTPUT) @@ -334,12 +333,12 @@ __kernel void arg_min_max_ex_x(IMAGE_DECLARATION(src), DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]); #if defined(ARG_MAX) condition_check3 = - ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 < tmp1); + ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 < tmp1); local_results[lid] = select(local_results[lid], local_results[lid + i], condition_check3); #else // defined(ARG_MIN) local_results[lid] = select( - local_results[lid], local_results[lid + i], - ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 > tmp1)); + local_results[lid], local_results[lid + i], + ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 > tmp1)); #endif // defined(ARG_MAX) || defined(ARG_MIN) } barrier(CLK_LOCAL_MEM_FENCE); @@ -403,7 +402,7 @@ __kernel void arg_min_max_ex_y(IMAGE_DECLARATION(src), IMAGE_DECLARATION(output) { VEC_DATA_TYPE(DATA_TYPE, 16) in = - CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE, 16)); + CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE, 16)); VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16) cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)); diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl index e249663..f8b5bbe 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl @@ -111,14 +111,14 @@ __kernel void binary_logical_op(TENSOR3D_DECLARATION(input1), TENSOR3D_DECLARATI #if OP_CODE == 1 // LOGICAL AND VSTORE(VEC_SIZE) (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) && - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), + VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, (__global DATA_TYPE *)output.ptr); #elif OP_CODE == 2 // LOGICAL OR VSTORE(VEC_SIZE) (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) || - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), + VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, (__global DATA_TYPE *)output.ptr); diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl index 92e5dfb..5ebc78d 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl @@ -117,15 +117,15 @@ __kernel void embedding_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION // lookup ids for based on the tensor dimensions int lup_id[4] = {0}; - lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) - : get_global_id(0); - lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) - : get_global_id(1); + lup_id[0] = + (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0); + lup_id[1] = + (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1); lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2))) : get_global_id(2) % DEPTH_OUT; lup_id[3] = (NUM_DIMS == 4) - ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT)) - : get_global_id(2) / DEPTH_OUT; + ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT)) + : get_global_id(2) / DEPTH_OUT; in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x + lup_id[1] * input_step_y + lup_id[2] * input_step_z + lup_id[3] * input_step_w; diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl index 80ba73d..85fc09d 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl @@ -41,7 +41,7 @@ #include "helpers.h" #if defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && \ - defined(COLS_A) + defined(COLS_A) #define VECTOR_CHAR VEC_DATA_TYPE(char, NUM_ELEMS_PROCESSED_PER_THREAD_X) #define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X) #define VECTOR_FLOAT VEC_DATA_TYPE(float, NUM_ELEMS_PROCESSED_PER_THREAD_X) @@ -117,7 +117,7 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION( , uint dst_cross_plane_pad #endif // REINTERPRET_OUTPUT_AS_3D - ) +) { int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X; @@ -208,9 +208,9 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION( #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 // Load values from matrix B VECTOR_CHAR b0 = - VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1)); + VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1)); VECTOR_CHAR b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)( - 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y)); + 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y)); // Accumulate acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0.s0; @@ -251,7 +251,7 @@ __kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION( #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 // Load values from matrix B VECTOR_CHAR b0 = - VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1)); + VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1)); // Accumulate acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0; diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl index a4f7dbd..3ace1fd 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl @@ -115,15 +115,15 @@ __kernel void hashtable_lookup(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION int lup_id[4] = {0}; - lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) - : get_global_id(0); - lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) - : get_global_id(1); + lup_id[0] = + (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0); + lup_id[1] = + (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1); lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2))) : get_global_id(2) % DEPTH_OUT; lup_id[3] = (NUM_DIMS == 4) - ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT)) - : get_global_id(2) / DEPTH_OUT; + ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT)) + : get_global_id(2) / DEPTH_OUT; if (lup_id[NUM_DIMS - 1] < 0) { diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h index e07a25e..4a3bc13 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h @@ -49,7 +49,7 @@ #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8) #if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \ - defined(cl_arm_integer_dot_product_accumulate_int8) + defined(cl_arm_integer_dot_product_accumulate_int8) #pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable #endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && // defined(cl_arm_integer_dot_product_accumulate_int8) @@ -288,21 +288,21 @@ #define VECTOR_DECLARATION(name) \ __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \ - uint name##_offset_first_element_in_bytes + uint name##_offset_first_element_in_bytes #define IMAGE_DECLARATION(name) \ __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_offset_first_element_in_bytes + uint name##_step_y, uint name##_offset_first_element_in_bytes #define TENSOR3D_DECLARATION(name) \ __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_stride_z, uint name##_step_z, \ - uint name##_offset_first_element_in_bytes + uint name##_step_y, uint name##_stride_z, uint name##_step_z, \ + uint name##_offset_first_element_in_bytes #define TENSOR4D_DECLARATION(name) \ __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \ - uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \ - uint name##_step_w, uint name##_offset_first_element_in_bytes + uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \ + uint name##_step_w, uint name##_offset_first_element_in_bytes #define CONVERT_TO_VECTOR_STRUCT(name) \ update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \ @@ -406,9 +406,9 @@ inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_ uint stride_x, uint step_x) { Vector vector = { - .ptr = ptr, - .offset_first_element_in_bytes = offset_first_element_in_bytes, - .stride_x = stride_x, + .ptr = ptr, + .offset_first_element_in_bytes = offset_first_element_in_bytes, + .stride_x = stride_x, }; vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x; return vector; @@ -436,7 +436,7 @@ inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_el .stride_x = stride_x, .stride_y = stride_y}; img.ptr += - img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y; + img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y; return img; } diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h index 5f1b3f9..d7f1d08 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h @@ -100,16 +100,16 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) * * @return quantized values */ -#define QUANTIZE_IMPL(type, size) \ - inline VEC_DATA_TYPE(type, size) \ - quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \ - { \ - VEC_DATA_TYPE(float, size) \ - out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \ - VEC_DATA_TYPE(type, size) \ - res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), \ - VEC_DATA_TYPE(type, size)); \ - return res; \ +#define QUANTIZE_IMPL(type, size) \ + inline VEC_DATA_TYPE(type, size) \ + quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \ + { \ + VEC_DATA_TYPE(float, size) \ + out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \ + VEC_DATA_TYPE(type, size) \ + res = \ + CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), VEC_DATA_TYPE(type, size)); \ + return res; \ } /** Dequantize a vector of values to floating-point @@ -119,11 +119,11 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) * * @return dequantized values in floating point */ -#define DEQUANTIZE_IMPL(type, size) \ - inline VEC_DATA_TYPE(float, size) \ - dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \ - { \ - return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \ +#define DEQUANTIZE_IMPL(type, size) \ + inline VEC_DATA_TYPE(float, size) \ + dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \ + { \ + return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \ } /** Correctly-rounded-to-nearest division by a power-of-two. @@ -134,7 +134,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) */ #define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \ inline VEC_DATA_TYPE(int, size) asymm_rounding_divide_by_POW2_##size( \ - VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \ + VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \ { \ const VEC_DATA_TYPE(int, size) zero = (VEC_DATA_TYPE(int, size))0; \ const VEC_DATA_TYPE(int, size) one = (VEC_DATA_TYPE(int, size))1; \ @@ -152,32 +152,32 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) * * @return Product of two fixed-point numbers. */ -#define ASYMM_MULT_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \ - { \ - VEC_DATA_TYPE(int, size) \ - overflow = a == b && a == INT_MIN; \ - VEC_DATA_TYPE(long, size) \ - a_64 = convert_long##size(a); \ - VEC_DATA_TYPE(long, size) \ - b_64 = convert_long##size(b); \ - VEC_DATA_TYPE(long, size) \ - ab_64 = a_64 * b_64; \ - /* Revert COMPMID-907 */ \ - VEC_DATA_TYPE(long, size) \ - mask1 = 1 << 30; \ - VEC_DATA_TYPE(long, size) \ - mask2 = 1 - (1 << 30); \ - VEC_DATA_TYPE(long, size) \ - is_positive_or_zero = ab_64 >= 0; \ - VEC_DATA_TYPE(long, size) \ - nudge = select(mask2, mask1, is_positive_or_zero); \ - VEC_DATA_TYPE(long, size) \ - mask = 1ll << 31; \ - VEC_DATA_TYPE(int, size) \ - ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \ - return select(ab_x2_high32, INT_MAX, overflow); \ +#define ASYMM_MULT_IMPL(size) \ + inline VEC_DATA_TYPE(int, size) \ + asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \ + { \ + VEC_DATA_TYPE(int, size) \ + overflow = a == b && a == INT_MIN; \ + VEC_DATA_TYPE(long, size) \ + a_64 = convert_long##size(a); \ + VEC_DATA_TYPE(long, size) \ + b_64 = convert_long##size(b); \ + VEC_DATA_TYPE(long, size) \ + ab_64 = a_64 * b_64; \ + /* Revert COMPMID-907 */ \ + VEC_DATA_TYPE(long, size) \ + mask1 = 1 << 30; \ + VEC_DATA_TYPE(long, size) \ + mask2 = 1 - (1 << 30); \ + VEC_DATA_TYPE(long, size) \ + is_positive_or_zero = ab_64 >= 0; \ + VEC_DATA_TYPE(long, size) \ + nudge = select(mask2, mask1, is_positive_or_zero); \ + VEC_DATA_TYPE(long, size) \ + mask = 1ll << 31; \ + VEC_DATA_TYPE(int, size) \ + ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \ + return select(ab_x2_high32, INT_MAX, overflow); \ } /** Calculates \f$ exp(x) \f$ for x in [-1/4, 0). @@ -186,32 +186,32 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) * * @return Result in fixed-point format Q0. */ -#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \ - a) \ - { \ - const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \ - const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \ - const int k_fractional_bits = 31; \ - VEC_DATA_TYPE(int, size) \ - x = a + (1 << (k_fractional_bits - 3)); \ - VEC_DATA_TYPE(int, size) \ - x2 = ASYMM_MULT(x, x, size); \ - VEC_DATA_TYPE(int, size) \ - x3 = ASYMM_MULT(x2, x, size); \ - VEC_DATA_TYPE(int, size) \ - x4 = ASYMM_MULT(x2, x2, size); \ - VEC_DATA_TYPE(int, size) \ - x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \ - VEC_DATA_TYPE(int, size) \ - x4_over_24_plus_x3_over_6_plus_x2 = \ - ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \ - VEC_DATA_TYPE(int, size) \ - x4_over_24_plus_x3_over_6_plus_x2_over_2 = \ - ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \ - return constant_term + \ - ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \ +#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \ + inline VEC_DATA_TYPE(int, size) \ + asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \ + a) \ + { \ + const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \ + const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \ + const int k_fractional_bits = 31; \ + VEC_DATA_TYPE(int, size) \ + x = a + (1 << (k_fractional_bits - 3)); \ + VEC_DATA_TYPE(int, size) \ + x2 = ASYMM_MULT(x, x, size); \ + VEC_DATA_TYPE(int, size) \ + x3 = ASYMM_MULT(x2, x, size); \ + VEC_DATA_TYPE(int, size) \ + x4 = ASYMM_MULT(x2, x2, size); \ + VEC_DATA_TYPE(int, size) \ + x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \ + VEC_DATA_TYPE(int, size) \ + x4_over_24_plus_x3_over_6_plus_x2 = \ + ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \ + VEC_DATA_TYPE(int, size) \ + x4_over_24_plus_x3_over_6_plus_x2_over_2 = \ + ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \ + return constant_term + \ + ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \ } /** Each bit of the result is set to the corresponding bit of either then_val or @@ -263,15 +263,15 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) #define EXP_BARREL_SHIFTER_IMPL(size) \ inline VEC_DATA_TYPE(int, size) exp_barrel_shifter##size( \ - VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \ - int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \ + VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \ + int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \ { \ if (k_integer_bits > exponent) \ { \ const int k_shift_amount = k_integer_bits > exponent ? k_fractional_bits + exponent : 0; \ return ASYMM_SELECT_USING_MASK( \ - ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \ - ASYMM_MULT(result, fp_multiplier, size), result, size); \ + ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \ + ASYMM_MULT(result, fp_multiplier, size), result, size); \ } \ \ return result; \ @@ -285,7 +285,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) */ #define ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(size) \ inline VEC_DATA_TYPE(int, size) \ - asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \ + asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \ { \ const int k_fractional_bits = 31 - k_integer_bits; \ VEC_DATA_TYPE(int, size) \ @@ -298,7 +298,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) a_mod_quarter_minus_one_quarter_scaled = a_mod_quarter_minus_one_quarter << k_integer_bits; \ VEC_DATA_TYPE(int, size) \ result = ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL( \ - a_mod_quarter_minus_one_quarter_scaled, size); \ + a_mod_quarter_minus_one_quarter_scaled, size); \ VEC_DATA_TYPE(int, size) \ remainder = a_mod_quarter_minus_one_quarter - a; \ \ @@ -312,10 +312,10 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) remainder, size); \ result = EXP_BARREL_SHIFTER(result, +2, 39332535, k_integer_bits, k_fractional_bits, \ remainder, size); \ - result = EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, \ - size); \ result = \ - EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \ + EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, size); \ + result = \ + EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \ \ if (k_integer_bits > 5) \ { \ @@ -335,27 +335,27 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) * * @return Arithmetic left or right shift. */ -#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \ - { \ - if (exponent < 0) \ - { \ - return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \ - } \ - \ - const VEC_DATA_TYPE(int, size) min = INT_MIN; \ - const VEC_DATA_TYPE(int, size) max = INT_MAX; \ - int threshold = ((1 << (31 - exponent)) - 1); \ - VEC_DATA_TYPE(int, size) \ - positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \ - VEC_DATA_TYPE(int, size) \ - negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \ - VEC_DATA_TYPE(int, size) \ - result = x << exponent; \ - result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \ - result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \ - return result; \ +#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \ + inline VEC_DATA_TYPE(int, size) \ + asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \ + { \ + if (exponent < 0) \ + { \ + return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \ + } \ + \ + const VEC_DATA_TYPE(int, size) min = INT_MIN; \ + const VEC_DATA_TYPE(int, size) max = INT_MAX; \ + int threshold = ((1 << (31 - exponent)) - 1); \ + VEC_DATA_TYPE(int, size) \ + positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \ + VEC_DATA_TYPE(int, size) \ + negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \ + VEC_DATA_TYPE(int, size) \ + result = x << exponent; \ + result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \ + result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \ + return result; \ } /** Calculates (a+b)/2, rounded to the nearest integer. @@ -365,21 +365,21 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) * * @return (a+b)/2, rounded to the nearest integer. */ -#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \ - { \ - VEC_DATA_TYPE(long, size) \ - a64 = convert_long##size(a); \ - VEC_DATA_TYPE(long, size) \ - b64 = convert_long##size(b); \ - VEC_DATA_TYPE(long, size) \ - sum = a64 + b64; \ - const VEC_DATA_TYPE(long, size) one = 1; \ - const VEC_DATA_TYPE(long, size) minus_one = -1; \ - VEC_DATA_TYPE(long, size) \ - sign = select(minus_one, one, sum >= 0); \ - return convert_int##size((sum + sign) / 2); \ +#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \ + inline VEC_DATA_TYPE(int, size) \ + asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \ + { \ + VEC_DATA_TYPE(long, size) \ + a64 = convert_long##size(a); \ + VEC_DATA_TYPE(long, size) \ + b64 = convert_long##size(b); \ + VEC_DATA_TYPE(long, size) \ + sum = a64 + b64; \ + const VEC_DATA_TYPE(long, size) one = 1; \ + const VEC_DATA_TYPE(long, size) minus_one = -1; \ + VEC_DATA_TYPE(long, size) \ + sign = select(minus_one, one, sum >= 0); \ + return convert_int##size((sum + sign) / 2); \ } /** Calculates \f$ 1 / (1 + x) \f$ for x in (0, 1). @@ -390,7 +390,7 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) */ #define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(size) \ inline VEC_DATA_TYPE(int, size) \ - asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \ + asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \ { \ const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \ const VEC_DATA_TYPE(int, size) Q2_one = 1 << (31 - 2); \ @@ -462,14 +462,14 @@ inline float dequantize_qasymm8_signed(char input, float offset, float scale) #define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \ asymm_rescale##size(value, src_integer_bits, dst_integer_bits) -#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \ - inline VEC_DATA_TYPE(int, size) \ - multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \ - { \ - const int left_shift = shift > 0 ? shift : 0; \ - const int right_shift = shift > 0 ? 0 : -shift; \ - return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \ - right_shift, size); \ +#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \ + inline VEC_DATA_TYPE(int, size) \ + multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \ + { \ + const int left_shift = shift > 0 ? shift : 0; \ + const int right_shift = shift > 0 ? 0 : -shift; \ + return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \ + right_shift, size); \ } #define MULTIPLY_BY_QUANTIZED_MULTIPLIER(input, qmul, shift, size) \ multiply_by_quantized_multiplier##size(input, qmul, shift) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl index 0148426..96a2431 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl @@ -41,7 +41,7 @@ #include "helpers.h" #if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \ - defined(DIM_Y) && defined(DIM_Z) + defined(DIM_Y) && defined(DIM_Z) /** This function normalizes the input 2D tensor across the first dimension with respect to mean and * standard deviation of the same dimension. * @@ -108,14 +108,14 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output) #endif /* IN_PLACE */ #ifdef GAMMA - , + , VECTOR_DECLARATION(gamma) #endif // GAMMA #ifdef BETA - , + , VECTOR_DECLARATION(beta) #endif // BETA - ) +) { Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); #ifndef IN_PLACE @@ -213,12 +213,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input), for (int i_h = 0; i_h < DIM_Z; ++i_h) { __global DATA_TYPE *input_address = - (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch); + (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch); #ifdef IN_PLACE __global DATA_TYPE *output_address = input_address; #else /* !IN_PLACE */ __global DATA_TYPE *output_address = - (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch); + (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch); #endif /* IN_PLACE */ *(output_address) = (*(input_address)-mean) * multip + beta; } @@ -231,12 +231,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input), for (; x <= (DIM_X - VEC_SIZE); x += VEC_SIZE) { __global DATA_TYPE *input_address = - (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch); + (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch); #ifdef IN_PLACE __global DATA_TYPE *output_address = input_address; #else /* !IN_PLACE */ __global DATA_TYPE *output_address = - (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch); + (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch); #endif /* IN_PLACE */ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) @@ -251,12 +251,12 @@ __kernel void instance_normalization_ex(TENSOR4D_DECLARATION(input), for (; x < DIM_X; ++x) { __global DATA_TYPE *input_address = - (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch); + (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch); #ifdef IN_PLACE __global DATA_TYPE *output_address = input_address; #else /* !IN_PLACE */ __global DATA_TYPE *output_address = - (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch); + (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch); #endif /* IN_PLACE */ *(output_address) = (*(input_address)-mean) * multip + beta; } diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl index 3943fc4..abbfbd2 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl @@ -114,8 +114,8 @@ __kernel void multiply_scale_factor(IMAGE_DECLARATION(input), VECTOR_DECLARATION (val, 0, (__global DATA_TYPE *)output.ptr); #else // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X) *((__global DATA_TYPE *)(output.ptr)) = - ((DATA_TYPE)(*((__global int *)(input.ptr)))) * - *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier); + ((DATA_TYPE)(*((__global int *)(input.ptr)))) * + *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier); #endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) } diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl index c274aba..784a8d6 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/one_hot.cl @@ -206,16 +206,16 @@ __kernel void one_hot_only_on_value(TENSOR3D_DECLARATION(indices), VECTOR_DECLAR #if AXIS == 0 *(__global DATA_TYPE *)tensor4D_offset(&output, index, px, py, pz) = - *((__global const DATA_TYPE *)on_value_ptr); + *((__global const DATA_TYPE *)on_value_ptr); #elif AXIS == 1 *(__global DATA_TYPE *)tensor4D_offset(&output, px, index, py, pz) = - *((__global const DATA_TYPE *)on_value_ptr); + *((__global const DATA_TYPE *)on_value_ptr); #elif AXIS == 2 *(__global DATA_TYPE *)tensor4D_offset(&output, px, py, index, pz) = - *((__global const DATA_TYPE *)on_value_ptr); + *((__global const DATA_TYPE *)on_value_ptr); #elif AXIS == 3 *(__global DATA_TYPE *)tensor4D_offset(&output, px, py, pz, index) = - *((__global const DATA_TYPE *)on_value_ptr); + *((__global const DATA_TYPE *)on_value_ptr); #endif // AXIS } diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl index 76fda90..532000e 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl @@ -138,7 +138,7 @@ __kernel void pixelwise_mul_qasymm8(TENSOR3D_DECLARATION(in1), TENSOR3D_DECLARAT // Multiply with a multiplier smaller than 1 out_val = - ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16); + ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16); out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET); VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16)); diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl index 4ae9adb..c829f26 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl @@ -116,7 +116,7 @@ __kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(sc // Create scale vector const VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) vscale = - *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1)); + *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1)); // Quantize VEC_DATA_TYPE(int, VEC_SIZE) @@ -127,10 +127,10 @@ __kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(sc (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr); #else //! defined(VEC_SIZE) || !defined(LAST_ACCESSED_X) *((__global DATA_TYPE_OUT *)(output.ptr)) = (DATA_TYPE_OUT)CLAMP( - CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) / - (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))), - int), - MIN_QUANT_VAL, MAX_QUANT_VAL); + CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) / + (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))), + int), + MIN_QUANT_VAL, MAX_QUANT_VAL); #endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) } #endif // defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl index 832ac12..d0ef31b 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl @@ -100,12 +100,14 @@ __kernel void reduce_min_max(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(o Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); int indices[4] = { - get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT, - get_global_id(2) / DEPTH_OUT, + get_global_id(0), + get_global_id(1), + get_global_id(2) % DEPTH_OUT, + get_global_id(2) / DEPTH_OUT, }; DATA_TYPE value = - *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); + *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); for (int i = 1; i < dim; ++i) { indices[axis] = i; @@ -186,16 +188,18 @@ __kernel void reduce_sum_mean(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION( Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); int indices[4] = { - get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT, - get_global_id(2) / DEPTH_OUT, + get_global_id(0), + get_global_id(1), + get_global_id(2) % DEPTH_OUT, + get_global_id(2) / DEPTH_OUT, }; DATA_TYPE sum_value = (DATA_TYPE)0; for (int i = 0; i < dim; ++i) { indices[axis] = i; - sum_value += *( - (__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); + sum_value += + *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])); } #if OP_CODE == 3 // REDUCE_SUM diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp index 047004d..45307fa 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxLayerKernelEx.cpp @@ -63,10 +63,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *prev_outp { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, + DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && - op != ReductionOperation::ARG_IDX_MIN, + op != ReductionOperation::ARG_IDX_MIN, "Only ARG_IDX_MAX and ARG_IDX_MIN are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); @@ -101,13 +102,13 @@ std::tuple validate_and_configure_window(ITensorInfo *input, output_shape.set(axis, 1); DataType output_data_type = (prev_output != nullptr) ? (prev_output->data_type()) : DataType::S32; auto_init_if_empty(*output, input->clone() - ->set_tensor_shape(output_shape) - .set_data_type(output_data_type) - .reset_padding() - .set_is_resizable(true)); + ->set_tensor_shape(output_shape) + .set_data_type(output_data_type) + .reset_padding() + .set_is_resizable(true)); - Window win = calculate_max_window((prev_output != nullptr) ? (*prev_output) : (*input), - Steps(vector_size)); + Window win = + calculate_max_window((prev_output != nullptr) ? (*prev_output) : (*input), Steps(vector_size)); bool window_changed = false; switch (axis) @@ -137,15 +138,15 @@ std::tuple validate_and_configure_window(ITensorInfo *input, } Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; + ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") + : Status{}; return std::make_tuple(err, win); } } // namespace CLArgMinMaxLayerKernelEx::CLArgMinMaxLayerKernelEx() - : _input(nullptr), _prev_output(nullptr), _output(nullptr), _reduction_axis(0), - _op(ReductionOperation::ARG_IDX_MAX) + : _input(nullptr), _prev_output(nullptr), _output(nullptr), _reduction_axis(0), + _op(ReductionOperation::ARG_IDX_MAX) { } @@ -155,11 +156,11 @@ void CLArgMinMaxLayerKernelEx::configure(const ICLTensor *input, const ICLTensor { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, - output->info(), axis, op)); + validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, + output->info(), axis, op)); auto win_config = validate_and_configure_window( - input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis, - op); + input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis, + op); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); _input = input; @@ -213,7 +214,7 @@ void CLArgMinMaxLayerKernelEx::configure(const ICLTensor *input, const ICLTensor ARM_COMPUTE_ERROR("Not supported"); } _kernel = static_cast(CLKernelLibraryEx::get().create_kernel( - "arg_min_max_ex_" + kernel_axis_name, build_opts.options())); + "arg_min_max_ex_" + kernel_axis_name, build_opts.options())); // Configure kernel window ICLKernel::configure_internal(std::get<1>(win_config), lws_hint); @@ -225,8 +226,8 @@ Status CLArgMinMaxLayerKernelEx::validate(const ITensorInfo *input, const ITenso { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, prev_output, output, axis, op)); ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window( - input->clone().get(), (prev_output != nullptr) ? prev_output->clone().get() : nullptr, - output->clone().get(), axis, op))); + input->clone().get(), (prev_output != nullptr) ? prev_output->clone().get() : nullptr, + output->clone().get(), axis, op))); return Status{}; } diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp index fbc76f5..ffa2c5a 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp @@ -55,7 +55,7 @@ Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) { const TensorShape &out_shape = - TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); + TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8); @@ -68,15 +68,15 @@ Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2, ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8); ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); + detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), + "Wrong shape for output"); } return Status{}; } } // namespace CLBinaryLogicalOpKernel::CLBinaryLogicalOpKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) + : _input1(nullptr), _input2(nullptr), _output(nullptr) { } @@ -111,13 +111,13 @@ void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code))); build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); + ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); _kernel = - static_cast(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); + static_cast(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); const std::pair broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info()); + ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info()); const ValidRegion &valid_region = broadcast_pair.second; @@ -130,8 +130,8 @@ void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); + update_window_and_padding(win_input2, input2_access) || + update_window_and_padding(win, output_access); output_access.set_valid_region(win, valid_region); @@ -151,7 +151,7 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue) if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) { can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); + (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) { can_collapse = (in_shape1[d] == in_shape2[d]); @@ -160,13 +160,13 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue) bool has_collapsed = false; Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; + can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) + : window; const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; + has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; + has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; Window slice = collapsed.first_slice_window_3D(); Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); @@ -189,9 +189,9 @@ void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue) BorderSize CLBinaryLogicalOpKernel::border_size() const { const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); + _output->info()->dimension(0) - + std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); const unsigned int border = - std::min(num_elems_processed_per_iteration - 1U, replicateSize); + std::min(num_elems_processed_per_iteration - 1U, replicateSize); return BorderSize(0, border, 0, 0); } diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp index 6e0bcde..3f2ae35 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLCastBoolKernel.cpp @@ -103,7 +103,7 @@ void CLCastBoolKernel::configure(const ICLTensor *input, ICLTensor *output) // Create kernel const std::string kernel_name = "cast_bool"; _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options())); + CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options())); // Configure kernel ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp index 67aaf2d..e4c617c 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp @@ -61,14 +61,14 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen input_access.set_valid_region(win, output->valid_region()); Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; + ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") + : Status{}; return std::make_pair(err, win); } } // namespace CLEmbeddingLookupKernel::CLEmbeddingLookupKernel() - : _input(nullptr), _output(nullptr), _lookups(nullptr) + : _input(nullptr), _output(nullptr), _lookups(nullptr) { } @@ -77,8 +77,8 @@ Status CLEmbeddingLookupKernel::validate(const ITensorInfo *input, const ITensor { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -108,8 +108,8 @@ void CLEmbeddingLookupKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions())); // Create kernel - _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts)); + _kernel = + static_cast(CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts)); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp index 3bfe3e4..8b58852 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp @@ -62,15 +62,15 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= input->num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( - input->tensor_shape(), indices->tensor_shape(), actual_axis); + input->tensor_shape(), indices->tensor_shape(), actual_axis); ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); } @@ -86,7 +86,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen const uint32_t actual_axis = wrap_around(axis, static_cast(input->num_dimensions())); std::unique_ptr output_info = input->clone(); output_info->set_tensor_shape(arm_compute::misc::shape_calculator::compute_gather_shape_ex( - input->tensor_shape(), indices->tensor_shape(), actual_axis)); + input->tensor_shape(), indices->tensor_shape(), actual_axis)); // Output auto initialization if not yet initialized auto_init_if_empty((*output), output_info->tensor_shape(), 1, input->data_type()); @@ -100,7 +100,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLGatherExKernel::CLGatherExKernel() - : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0) + : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0) { } @@ -109,11 +109,11 @@ void CLGatherExKernel::configure(const ICLTensor *input, const ICLTensor *indice { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), indices->info(), output->info(), axis)); + validate_arguments(input->info(), indices->info(), output->info(), axis)); // Configure kernel window auto win_config = - validate_and_configure_window(input->info(), indices->info(), output->info(), axis); + validate_and_configure_window(input->info(), indices->info(), output->info(), axis); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); _input = input; @@ -133,7 +133,7 @@ void CLGatherExKernel::configure(const ICLTensor *input, const ICLTensor *indice // Create kernel _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options())); + CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options())); ICLKernel::configure_internal(win_config.second); } @@ -144,7 +144,7 @@ Status CLGatherExKernel::validate(const ITensorInfo *input, const ITensorInfo *i ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), indices->clone().get(), output->clone().get(), axis) - .first); + .first); return Status{}; } diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp index 930e7c9..f0a761b 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp @@ -61,8 +61,8 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen input_access.set_valid_region(win, output->valid_region()); Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; + ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") + : Status{}; return std::make_pair(err, win); } } // namespace @@ -78,8 +78,8 @@ Status CLHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITens { ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(hits, 1, DataType::U8, DataType::QASYMM8); @@ -102,7 +102,7 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso const ICLTensor *input, ICLTensor *output, ICLTensor *hits) { ARM_COMPUTE_ERROR_THROW_ON( - validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info())); + validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info())); _lookups = lookups; _keys = keys; @@ -113,7 +113,7 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso // Make _lookup_indices tensor _lookup_indices = support::cpp14::make_unique(); _lookup_indices->allocator()->init( - TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32)); + TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32)); _lookup_indices->allocator()->allocate(); // Set kernel build options @@ -127,8 +127,8 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions())); // Create kernel - _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts)); + _kernel = + static_cast(CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts)); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); @@ -148,7 +148,7 @@ void CLHashtableLookupKernel::run(const Window &window, cl::CommandQueue &queue) // Set values of hits const int32_t *lookups_buf = - reinterpret_cast(const_cast(_lookups)->buffer()); + reinterpret_cast(const_cast(_lookups)->buffer()); const int32_t *keys_buf = reinterpret_cast(const_cast(_keys)->buffer()); uint8_t *hits_buf = reinterpret_cast(_hits->buffer()); int32_t *lookup_indices_buf = reinterpret_cast(_lookup_indices->buffer()); diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp index 61c14d2..dab6480 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp @@ -94,8 +94,8 @@ std::tuple validate_and_configure_window(ITensorInfo *input, ITe } // namespace CLInstanceNormalizationLayerKernelEx::CLInstanceNormalizationLayerKernelEx() - : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12), - _run_in_place(false) + : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12), + _run_in_place(false) { } @@ -132,7 +132,7 @@ void CLInstanceNormalizationLayerKernelEx::configure(ICLTensor *input, ICLTensor // Create kernel _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options())); + CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options())); // Configure kernel window auto win_config = validate_and_configure_window(_input->info(), _output->info()); @@ -147,7 +147,7 @@ Status CLInstanceNormalizationLayerKernelEx::validate(const ITensorInfo *input, { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon)); ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window( - input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get())))); + input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get())))); return Status{}; } diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp index 6b27c99..1d4b141 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp @@ -99,7 +99,7 @@ std::tuple validate_and_configure_window(const ITensorInfo *inpu } // namespace CLMultiplyScaleFactorKernel::CLMultiplyScaleFactorKernel() - : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f) + : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f) { } @@ -108,7 +108,7 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), scale_factor->info(), output->info())); + validate_arguments(input->info(), scale_factor->info(), output->info())); _input = input; _scale_factor = scale_factor; @@ -123,9 +123,9 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen Window win = calculate_max_window(*output->info()); if (multi_access_x) { - win.set(Window::DimX, - Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), - vec_size_x)); + win.set( + Window::DimX, + Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x)); } ICLKernel::configure_internal(win); @@ -134,11 +134,11 @@ void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTen build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); build_opts.add_option_if( - multi_access_x, "-DLAST_ACCESSED_X=" + - support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); + multi_access_x, "-DLAST_ACCESSED_X=" + + support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options())); + CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options())); } Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input, @@ -147,7 +147,7 @@ Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input, { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output)); ARM_COMPUTE_RETURN_ON_ERROR( - std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); + std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); return Status{}; } diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp index 643c8b1..ee633d4 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp @@ -80,9 +80,9 @@ void CLNegKernel::configure(const ICLTensor *input, ICLTensor *output) std::set build_opts; build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); + ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); _kernel = - static_cast(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts)); + static_cast(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts)); // Configure window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp index 35d70d6..0b8e7cc 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLOneHotKernel.cpp @@ -65,7 +65,7 @@ inline Status validate_arguments(const ITensorInfo *indices, const ITensorInfo * { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, output); TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex( - indices->tensor_shape(), static_cast(depth), actual_axis); + indices->tensor_shape(), static_cast(depth), actual_axis); ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); } return Status{}; @@ -79,7 +79,7 @@ std::pair validate_and_configure_window(ITensorInfo *indices, const uint32_t actual_axis = wrap_around(axis, static_cast(output->num_dimensions())); // Output auto initialization if not yet initialized TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex( - indices->tensor_shape(), static_cast(depth), actual_axis); + indices->tensor_shape(), static_cast(depth), actual_axis); auto_init_if_empty((*output), output_shape, 1, on_value->data_type()); // Create window Window win = calculate_max_window(*output, Steps()); @@ -88,8 +88,8 @@ std::pair validate_and_configure_window(ITensorInfo *indices, } } // namespace CLOneHotKernel::CLOneHotKernel() - : _indices(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr), - _is_off_value_memset(false) + : _indices(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr), + _is_off_value_memset(false) { } void CLOneHotKernel::configure(const ICLTensor *indices, const ICLTensor *on_value, @@ -114,10 +114,10 @@ void CLOneHotKernel::configure_common(const ICLTensor *indices, const ICLTensor ICLTensor *output, int depth, int axis) { ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(indices->info(), on_value->info(), output->info(), depth, axis)); + validate_arguments(indices->info(), on_value->info(), output->info(), depth, axis)); // Configure kernel window auto win_config = - validate_and_configure_window(indices->info(), on_value->info(), output->info(), depth, axis); + validate_and_configure_window(indices->info(), on_value->info(), output->info(), depth, axis); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); if (_is_off_value_memset) { @@ -131,7 +131,7 @@ void CLOneHotKernel::configure_common(const ICLTensor *indices, const ICLTensor // Set build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size( - data_size_from_type(on_value->info()->data_type()))); + data_size_from_type(on_value->info()->data_type()))); build_opts.add_option("-DAXIS=" + support::cpp11::to_string(actual_axis)); build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(depth)); build_opts.add_option("-DOUTPUT_DIM_Z=" + @@ -139,7 +139,7 @@ void CLOneHotKernel::configure_common(const ICLTensor *indices, const ICLTensor // Create kernel const std::string kernel_name = _is_off_value_memset ? "one_hot_only_on_value" : "one_hot"; _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options())); + CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options())); ICLKernel::configure_internal(win_config.second); } Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value, @@ -153,7 +153,7 @@ Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *o ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(), on_value->clone().get(), output->clone().get(), depth, axis) - .first); + .first); return Status{}; } Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value, @@ -163,7 +163,7 @@ Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *o ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(), on_value->clone().get(), output->clone().get(), depth, axis) - .first); + .first); return Status{}; } void CLOneHotKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp index 1a7a18c..b417a71 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp @@ -87,9 +87,9 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen if (multi_access_x) { - win.set(Window::DimX, - Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), - vec_size_x)); + win.set( + Window::DimX, + Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x)); } Coordinates coord; @@ -101,7 +101,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace CLQuantizationSymmetricKernel::CLQuantizationSymmetricKernel() - : _input(nullptr), _scale_factor(nullptr), _output(nullptr) + : _input(nullptr), _scale_factor(nullptr), _output(nullptr) { } @@ -110,7 +110,7 @@ void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLT { ARM_COMPUTE_ERROR_ON_NULLPTR(input, scale_factor, output); ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), scale_factor->info(), output->info())); + validate_arguments(input->info(), scale_factor->info(), output->info())); _input = input; _scale_factor = scale_factor; @@ -132,11 +132,11 @@ void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLT build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); build_opts.add_option_if( - multi_access_x, "-DLAST_ACCESSED_X=" + - support::cpp11::to_string(std::max(input_width_x - vec_size_x, 0))); + multi_access_x, + "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(input_width_x - vec_size_x, 0))); _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options())); + CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options())); } Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input, @@ -145,7 +145,7 @@ Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input, { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output)); ARM_COMPUTE_RETURN_ON_ERROR( - validate_and_configure_window(input->clone().get(), output->clone().get()).first); + validate_and_configure_window(input->clone().get(), output->clone().get()).first); return Status{}; } diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp index 3fbebf2..3906009 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp @@ -145,7 +145,7 @@ void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *outpu // Create kernel _kernel = - static_cast(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); + static_cast(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); // Configure kernel window Window win = calculate_max_window(*output_info, Steps()); diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp index 8d8853c..4a63744 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp @@ -94,8 +94,8 @@ std::tuple validate_and_configure_window(ITensorInfo *input, ITe output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; + ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") + : Status{}; return std::make_tuple(err, win); } } // namespace @@ -115,7 +115,7 @@ void CLScaleFactorSymm8Kernel::configure(const ICLTensor *input, ICLTensor *outp // Create kernel _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts)); + CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts)); auto win_config = validate_and_configure_window(input->info(), output->info()); @@ -128,7 +128,7 @@ Status CLScaleFactorSymm8Kernel::validate(const ITensorInfo *input, const ITenso { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); ARM_COMPUTE_RETURN_ON_ERROR( - std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); + std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); return Status{}; } diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp index dfe5d59..c88bef6 100644 --- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp +++ b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp @@ -53,12 +53,12 @@ namespace using namespace arm_compute; template void elementwise_op_templ( - const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &), - int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &, - OutputScalarType *, const bool), - int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *, - OutputScalarType *)) + const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, + OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &), + int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &, + OutputScalarType *, const bool), + int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *, + OutputScalarType *)) { // Create input windows Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()); @@ -88,26 +88,26 @@ void elementwise_op_templ( Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win); Iterator output(out, win); - execute_window_loop(win, - [&](const Coordinates &) { - auto output_ptr = reinterpret_cast(output.ptr()); - const auto non_broadcast_input_ptr = - reinterpret_cast(non_broadcast_input.ptr()); - const InputScalarType broadcast_value = - *reinterpret_cast(broadcast_input.ptr()); - - int x = (*broadcast_func)(window_start_x, window_end_x, window_step_x, - non_broadcast_input_ptr, broadcast_value, - output_ptr, !is_broadcast_input_2); - for (; x < window_end_x; ++x) - { - const auto a = *(non_broadcast_input_ptr + x); - *(output_ptr + x) = - (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a, - !is_broadcast_input_2 ? a : broadcast_value); - } - }, - broadcast_input, non_broadcast_input, output); + execute_window_loop( + win, + [&](const Coordinates &) { + auto output_ptr = reinterpret_cast(output.ptr()); + const auto non_broadcast_input_ptr = + reinterpret_cast(non_broadcast_input.ptr()); + const InputScalarType broadcast_value = + *reinterpret_cast(broadcast_input.ptr()); + + int x = + (*broadcast_func)(window_start_x, window_end_x, window_step_x, non_broadcast_input_ptr, + broadcast_value, output_ptr, !is_broadcast_input_2); + for (; x < window_end_x; ++x) + { + const auto a = *(non_broadcast_input_ptr + x); + *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a, + !is_broadcast_input_2 ? a : broadcast_value); + } + }, + broadcast_input, non_broadcast_input, output); } else { @@ -119,24 +119,23 @@ void elementwise_op_templ( Iterator input2(in2, input2_win); Iterator output(out, win); - execute_window_loop(win, - [&](const Coordinates &) { - auto output_ptr = reinterpret_cast(output.ptr()); - const auto input1_ptr = - reinterpret_cast(input1.ptr()); - const auto input2_ptr = - reinterpret_cast(input2.ptr()); - - int x = (*neon_func)(window_start_x, window_end_x, window_step_x, - input1_ptr, input2_ptr, output_ptr); - for (; x < window_end_x; ++x) - { - const auto a = *(input1_ptr + x); - const auto b = *(input2_ptr + x); - *(output_ptr + x) = (*scalar_func)(a, b); - } - }, - input1, input2, output); + execute_window_loop( + win, + [&](const Coordinates &) { + auto output_ptr = reinterpret_cast(output.ptr()); + const auto input1_ptr = reinterpret_cast(input1.ptr()); + const auto input2_ptr = reinterpret_cast(input2.ptr()); + + int x = (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr, + output_ptr); + for (; x < window_end_x; ++x) + { + const auto a = *(input1_ptr + x); + const auto b = *(input2_ptr + x); + *(output_ptr + x) = (*scalar_func)(a, b); + } + }, + input1, input2, output); } } diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp index 32d7d62..a8464af 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp @@ -103,8 +103,10 @@ template inline uint8x16x4_t elementwise_logic_op(const uint8x16x4_t &a, const uint8x16x4_t &b) { uint8x16x4_t out = {{ - elementwise_logic_op(a.val[0], b.val[0]), elementwise_logic_op(a.val[1], b.val[1]), - elementwise_logic_op(a.val[2], b.val[2]), elementwise_logic_op(a.val[3], b.val[3]), + elementwise_logic_op(a.val[0], b.val[0]), + elementwise_logic_op(a.val[1], b.val[1]), + elementwise_logic_op(a.val[2], b.val[2]), + elementwise_logic_op(a.val[3], b.val[3]), }}; return out; } @@ -160,8 +162,8 @@ void elementwise_logic_op(const ITensor *in1, const ITensor *in2, ITensor *out, } std::function configure_func( - const ITensor *input1, const ITensor *input2, ITensor *output, - std::map map_function) + const ITensor *input1, const ITensor *input2, ITensor *output, + std::map map_function) { std::string function_to_call("op_"); function_to_call += string_from_data_type(input1->info()->data_type()) + "_"; @@ -184,8 +186,8 @@ std::function configure_logic_func(const ITensor *input1, const ITensor *input2, ITensor *output) { static std::map map_function = { - {"op_U8_U8_U8", &elementwise_logic_op}, - {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op}}; + {"op_U8_U8_U8", &elementwise_logic_op}, + {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op}}; return configure_func(input1, input2, output, map_function); } @@ -223,7 +225,7 @@ Status NEBinaryLogicalOperationKernel::validate_arguments(const ITensorInfo &inp ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2); const TensorShape out_shape = - TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); + TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); @@ -232,8 +234,8 @@ Status NEBinaryLogicalOperationKernel::validate_arguments(const ITensorInfo &inp if (output.total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), - "Wrong shape for output"); + detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), + "Wrong shape for output"); } return Status{}; diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp index 12017e5..f935596 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NECastBoolKernel.cpp @@ -129,125 +129,125 @@ void NECastBoolKernel::run(const Window &window, const ThreadInfo &info) case DataType::S8: { /* Conversion U8 -> S8 */ - execute_window_loop(win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - vst1q_s8(output_ptr + x, vreinterpretq_s8_u8(vandq_u8( - texels_u8, vdupq_n_u8(true_val)))); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); - } - }, - input, output); + execute_window_loop( + win, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); + + vst1q_s8(output_ptr + x, + vreinterpretq_s8_u8(vandq_u8(texels_u8, vdupq_n_u8(true_val)))); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); + } + }, + input, output); break; } case DataType::S16: { /* Up-conversion U8 -> S16 */ execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const int16x8x2_t texels = { - {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), - vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; - - vst1q_s16(output_ptr + x, texels.val[0]); - vst1q_s16(output_ptr + x + 8, texels.val[1]); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); - } - }, - input, output); + win, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); + + const int16x8x2_t texels = { + {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), + vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; + + vst1q_s16(output_ptr + x, texels.val[0]); + vst1q_s16(output_ptr + x + 8, texels.val[1]); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); + } + }, + input, output); break; } case DataType::S32: { /* Up-conversion U8 -> S32 */ execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const int16x8x2_t texels = { - {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), - vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; - - vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0]))); - vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0]))); - vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1]))); - vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1]))); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); - } - }, - input, output); + win, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); + + const int16x8x2_t texels = { + {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), + vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; + + vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0]))); + vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0]))); + vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1]))); + vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1]))); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); + } + }, + input, output); break; } case DataType::F32: { /* Up-conversion U8 -> F32 */ execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const int16x8x2_t texels = { - {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), - vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; - vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0])))); - vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0])))); - vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1])))); - vst1q_f32(output_ptr + x + 12, - vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1])))); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - auto in = static_cast(*(input_ptr + x) & true_val); - *(output_ptr + x) = static_cast(in); - } - }, - input, output); + win, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); + + const int16x8x2_t texels = { + {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), + vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; + vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0])))); + vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0])))); + vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1])))); + vst1q_f32(output_ptr + x + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1])))); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + auto in = static_cast(*(input_ptr + x) & true_val); + *(output_ptr + x) = static_cast(in); + } + }, + input, output); break; } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -255,86 +255,87 @@ void NECastBoolKernel::run(const Window &window, const ThreadInfo &info) { /* Up-conversion U8 -> F16 */ execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const int16x8x2_t texels = { - {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), - vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; - vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0])); - vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1])); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); - } - }, - input, output); + win, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); + + const int16x8x2_t texels = { + {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))), + vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}}; + vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0])); + vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1])); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); + } + }, + input, output); break; } #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::U8: { /* Conversion U8 -> S8 */ - execute_window_loop(win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val))); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); - } - }, - input, output); + execute_window_loop( + win, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); + + vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val))); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); + } + }, + input, output); break; } case DataType::U16: { /* Up-conversion U8 -> U16 */ execute_window_loop( - win, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast(input.ptr()); - const auto output_ptr = reinterpret_cast(output.ptr()); - - int x = window_start_x; - for (; x <= (window_end_x - window_step_x); x += window_step_x) - { - const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); - - const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)), - vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}}; - - vst1q_u16(output_ptr + x, texels.val[0]); - vst1q_u16(output_ptr + x + 8, texels.val[1]); - } - - // Compute left-over elements - for (; x < window_end_x; ++x) - { - *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); - } - }, - input, output); + win, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x); + + const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)), + vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}}; + + vst1q_u16(output_ptr + x, texels.val[0]); + vst1q_u16(output_ptr + x + 8, texels.val[1]); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + *(output_ptr + x) = static_cast(*(input_ptr + x) & true_val); + } + }, + input, output); break; } default: diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp index 091d38c..e3a77c6 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp @@ -50,7 +50,7 @@ using namespace arm_compute; NEEmbeddingLookupKernel::NEEmbeddingLookupKernel() - : _input(nullptr), _lookups(nullptr), _output(nullptr) + : _input(nullptr), _lookups(nullptr), _output(nullptr) { } @@ -79,8 +79,8 @@ Status NEEmbeddingLookupKernel::validate(const arm_compute::ITensorInfo *input, { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4); @@ -119,16 +119,17 @@ void NEEmbeddingLookupKernel::run(const Window &window, const ThreadInfo &info) { Iterator output_it(_output, out_slice); - execute_window_loop(out_slice, - [&](const Coordinates &id) { - const int32_t lookup = *reinterpret_cast( - _lookups->ptr_to_element(Coordinates{id[lookup_dim]})); - Coordinates input_id{id}; - input_id.set(lookup_dim, lookup); - memcpy(output_it.ptr(), _input->ptr_to_element(input_id), - _output->info()->dimension(0) * _output->info()->element_size()); - }, - output_it); + execute_window_loop( + out_slice, + [&](const Coordinates &id) { + const int32_t lookup = + *reinterpret_cast(_lookups->ptr_to_element(Coordinates{id[lookup_dim]})); + Coordinates input_id{id}; + input_id.set(lookup_dim, lookup); + memcpy(output_it.ptr(), _input->ptr_to_element(input_id), + _output->info()->dimension(0) * _output->info()->element_size()); + }, + output_it); } while (window.slide_window_slice_4D(out_slice)); } diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp index 93963a5..c9f0799 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp @@ -71,7 +71,7 @@ template void validate_indices(const ITensor *indices) } // namespace NEGatherKernelEx::NEGatherKernelEx() - : _input{}, _indices{}, _axis{}, _indices_rank{}, _output{}, _func{} + : _input{}, _indices{}, _axis{}, _indices_rank{}, _output{}, _func{} { } @@ -85,36 +85,35 @@ inline void NEGatherKernelEx::gather_0_axis(const Window &window, const ThreadIn Iterator output_it(_output, window); execute_window_loop( - window, - [&](const Coordinates &id) { - Coordinates gather_id(id); - gather_id.collapse(_indices_rank); - - U new_index; - switch (_indices_rank) - { - case 1: - new_index = *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0])))); - break; - case 2: - new_index = - *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0], id[1])))); - break; - case 3: - new_index = *( - reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2])))); - break; - default: - ARM_COMPUTE_ERROR("Wrong num of dimensions"); - break; - } - - gather_id.set(0, new_index); - - std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(), - output_it.ptr()); - }, - output_it); + window, + [&](const Coordinates &id) { + Coordinates gather_id(id); + gather_id.collapse(_indices_rank); + + U new_index; + switch (_indices_rank) + { + case 1: + new_index = *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0])))); + break; + case 2: + new_index = *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0], id[1])))); + break; + case 3: + new_index = + *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2])))); + break; + default: + ARM_COMPUTE_ERROR("Wrong num of dimensions"); + break; + } + + gather_id.set(0, new_index); + + std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(), + output_it.ptr()); + }, + output_it); } template @@ -130,37 +129,36 @@ void NEGatherKernelEx::gather_n_axis(const Window &window, const ThreadInfo &inf Iterator output_it(_output, output_window); execute_window_loop( - output_window, - [&](const Coordinates &id) { - Coordinates gather_id(id); - gather_id.collapse(_indices_rank, _axis); - - U new_index; - switch (_indices_rank) - { - case 1: - new_index = *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[_axis])))); - break; - case 2: - new_index = *(reinterpret_cast( - _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1])))); - break; - case 3: - new_index = *(reinterpret_cast( - _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2])))); - break; - default: - ARM_COMPUTE_ERROR("Wrong num of dimensions"); - break; - } - - gather_id.set(_axis, new_index); - - std::copy_n(_input->ptr_to_element(gather_id), - _input->info()->dimension(0) * _output->info()->element_size(), - output_it.ptr()); - }, - output_it); + output_window, + [&](const Coordinates &id) { + Coordinates gather_id(id); + gather_id.collapse(_indices_rank, _axis); + + U new_index; + switch (_indices_rank) + { + case 1: + new_index = *(reinterpret_cast(_indices->ptr_to_element(Coordinates(id[_axis])))); + break; + case 2: + new_index = *( + reinterpret_cast(_indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1])))); + break; + case 3: + new_index = *(reinterpret_cast( + _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2])))); + break; + default: + ARM_COMPUTE_ERROR("Wrong num of dimensions"); + break; + } + + gather_id.set(_axis, new_index); + + std::copy_n(_input->ptr_to_element(gather_id), + _input->info()->dimension(0) * _output->info()->element_size(), output_it.ptr()); + }, + output_it); } void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, @@ -170,8 +168,8 @@ void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, I ARM_COMPUTE_ERROR_ON(indices->info()->num_dimensions() > 3); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); _input = input; _indices = indices; @@ -217,7 +215,7 @@ void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, I } // Output auto initialization if not yet initialized TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( - input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis); + input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis); auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); // Create window @@ -243,15 +241,15 @@ Status NEGatherKernelEx::validate(const ITensorInfo *input, const ITensorInfo *i ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast(input->num_dimensions())); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex( - input->tensor_shape(), indices->tensor_shape(), axis); + input->tensor_shape(), indices->tensor_shape(), axis); ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); } diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp index 30787c0..52b40e7 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp @@ -57,7 +57,7 @@ constexpr size_t NOT_HIT = 0xFFFFFFFF; } // namespace NEHashtableLookupKernel::NEHashtableLookupKernel() - : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr} + : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr} { } @@ -66,7 +66,7 @@ void NEHashtableLookupKernel::configure(const ITensor *lookups, const ITensor *k { ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits); ARM_COMPUTE_ERROR_THROW_ON( - validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info())); + validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info())); _lookups = lookups; _keys = keys; @@ -92,8 +92,8 @@ Status NEHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITens { ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); + input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32); @@ -134,8 +134,8 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info) const size_t lookup_dim = _output->info()->num_dimensions() - 1; const int const_0 = _output->info()->data_type() == DataType::QASYMM8 - ? _output->info()->quantization_info().uniform().offset - : 0; + ? _output->info()->quantization_info().uniform().offset + : 0; std::unordered_map key_index_map; for (size_t n = 0; n < _keys->info()->dimension(0); ++n) @@ -174,24 +174,24 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info) { Iterator output_it(_output, out_slice); - execute_window_loop(out_slice, - [&](const Coordinates &id) { - const auto lookup = lookup_indices.at(id[lookup_dim]); - if (lookup == NOT_HIT) - { - memset(output_it.ptr(), const_0, - _output->info()->dimension(0) * _output->info()->element_size()); - } - else - { - Coordinates input_id{id}; - input_id.set(lookup_dim, lookup); - memcpy(output_it.ptr(), _input->ptr_to_element(input_id), - _output->info()->dimension(0) * _output->info()->element_size()); - } - - }, - output_it); + execute_window_loop( + out_slice, + [&](const Coordinates &id) { + const auto lookup = lookup_indices.at(id[lookup_dim]); + if (lookup == NOT_HIT) + { + memset(output_it.ptr(), const_0, + _output->info()->dimension(0) * _output->info()->element_size()); + } + else + { + Coordinates input_id{id}; + input_id.set(lookup_dim, lookup); + memcpy(output_it.ptr(), _input->ptr_to_element(input_id), + _output->info()->dimension(0) * _output->info()->element_size()); + } + }, + output_it); } while (window.slide_window_slice_4D(out_slice)); } diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp index 49adf14..4dc0f55 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp @@ -63,7 +63,7 @@ void instance_normalization_nchw(ITensor *input, ITensor *output, ITensor *gamma { /** NEON vector tag type. */ using ExactTagType = - typename wrapper::traits::neon_bitvector_tag_t; + typename wrapper::traits::neon_bitvector_tag_t; // Clear X/Y dimensions on execution window as we handle the planes manually Window win = window; @@ -73,107 +73,107 @@ void instance_normalization_nchw(ITensor *input, ITensor *output, ITensor *gamma constexpr int window_step_x = 16 / sizeof(T); const unsigned int elements_plane = input->info()->dimension(0) * output->info()->dimension(1); const auto channel_idx = - get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL); + get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL); Iterator input_it(input, win); execute_window_loop( - win, - [&](const Coordinates &id) { - Window win_plane = window; - win_plane.set(Window::DimX, Window::Dimension(0, 1, 1)); - win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1)); - win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1)); - - Iterator input_plane_it(input, win_plane); - Iterator output_plane_it(output, win_plane); - - auto sum_h_w = static_cast(0.f); - auto sum_squares_h_w = static_cast(0.f); - - execute_window_loop( - win_plane, - [&](const Coordinates &) { - const auto input_ptr = reinterpret_cast(input_plane_it.ptr()); - - auto vec_sum_h_w = wrapper::vdup_n(static_cast(0.f), ExactTagType{}); - auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast(0.f), ExactTagType{}); - - // Compute S elements per iteration - int x = window.x().start(); - for (; x <= (window.x().end() - window_step_x); x += window_step_x) - { - auto vec_input_val = wrapper::vloadq(input_ptr + x); - vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val); - vec_sum_squares_h_w = - wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val)); - } - - auto vec2_sum_h_w = - wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w)); - auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w), - wrapper::vgetlow(vec_sum_squares_h_w)); - for (int i = 0; i < window_step_x / 4; ++i) - { - vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w); - vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w); - } - sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0); - sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0); - - // Compute left-over elements - for (; x < window.x().end(); ++x) - { - const auto value = *(input_ptr + x); - sum_h_w += value; - sum_squares_h_w += value * value; - } - }, - input_plane_it, output_plane_it); - - const auto mean_h_w = sum_h_w / elements_plane; - const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w; - - auto gamma_val = 1.0f; - if (gamma != nullptr) - { - gamma_val = *reinterpret_cast(gamma->ptr_to_element({id[channel_idx]})); - } - const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon); - const auto vec_mean_h_w = wrapper::vdup_n(static_cast(mean_h_w), ExactTagType{}); - const auto vec_multip_h_w = wrapper::vdup_n(static_cast(multip_h_w), ExactTagType{}); - auto beta_val = 0.0f; - if (beta != nullptr) - { - beta_val = *reinterpret_cast(beta->ptr_to_element({id[channel_idx]})); - } - const auto vec_beta = wrapper::vdup_n(static_cast(beta_val), ExactTagType{}); - - execute_window_loop( - win_plane, - [&](const Coordinates &) { - auto input_ptr = reinterpret_cast(input_plane_it.ptr()); - auto output_ptr = reinterpret_cast(output_plane_it.ptr()); - - // Compute S elements per iteration - int x = window.x().start(); - auto vec_val = wrapper::vdup_n(static_cast(0.0f), ExactTagType{}); - for (; x <= (window.x().end() - window_step_x); x += window_step_x) - { - vec_val = wrapper::vloadq(input_ptr + x); - vec_val = wrapper::vadd( - wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta); - wrapper::vstore(output_ptr + x, vec_val); - } - - // Compute left-over elements - for (; x < window.x().end(); ++x) - { - *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val; - } - }, - input_plane_it, output_plane_it); - }, - input_it); + win, + [&](const Coordinates &id) { + Window win_plane = window; + win_plane.set(Window::DimX, Window::Dimension(0, 1, 1)); + win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1)); + win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1)); + + Iterator input_plane_it(input, win_plane); + Iterator output_plane_it(output, win_plane); + + auto sum_h_w = static_cast(0.f); + auto sum_squares_h_w = static_cast(0.f); + + execute_window_loop( + win_plane, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast(input_plane_it.ptr()); + + auto vec_sum_h_w = wrapper::vdup_n(static_cast(0.f), ExactTagType{}); + auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast(0.f), ExactTagType{}); + + // Compute S elements per iteration + int x = window.x().start(); + for (; x <= (window.x().end() - window_step_x); x += window_step_x) + { + auto vec_input_val = wrapper::vloadq(input_ptr + x); + vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val); + vec_sum_squares_h_w = + wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val)); + } + + auto vec2_sum_h_w = + wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w)); + auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w), + wrapper::vgetlow(vec_sum_squares_h_w)); + for (int i = 0; i < window_step_x / 4; ++i) + { + vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w); + vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w); + } + sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0); + sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0); + + // Compute left-over elements + for (; x < window.x().end(); ++x) + { + const auto value = *(input_ptr + x); + sum_h_w += value; + sum_squares_h_w += value * value; + } + }, + input_plane_it, output_plane_it); + + const auto mean_h_w = sum_h_w / elements_plane; + const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w; + + auto gamma_val = 1.0f; + if (gamma != nullptr) + { + gamma_val = *reinterpret_cast(gamma->ptr_to_element({id[channel_idx]})); + } + const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon); + const auto vec_mean_h_w = wrapper::vdup_n(static_cast(mean_h_w), ExactTagType{}); + const auto vec_multip_h_w = wrapper::vdup_n(static_cast(multip_h_w), ExactTagType{}); + auto beta_val = 0.0f; + if (beta != nullptr) + { + beta_val = *reinterpret_cast(beta->ptr_to_element({id[channel_idx]})); + } + const auto vec_beta = wrapper::vdup_n(static_cast(beta_val), ExactTagType{}); + + execute_window_loop( + win_plane, + [&](const Coordinates &) { + auto input_ptr = reinterpret_cast(input_plane_it.ptr()); + auto output_ptr = reinterpret_cast(output_plane_it.ptr()); + + // Compute S elements per iteration + int x = window.x().start(); + auto vec_val = wrapper::vdup_n(static_cast(0.0f), ExactTagType{}); + for (; x <= (window.x().end() - window_step_x); x += window_step_x) + { + vec_val = wrapper::vloadq(input_ptr + x); + vec_val = wrapper::vadd( + wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta); + wrapper::vstore(output_ptr + x, vec_val); + } + + // Compute left-over elements + for (; x < window.x().end(); ++x) + { + *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val; + } + }, + input_plane_it, output_plane_it); + }, + input_it); } Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, @@ -199,8 +199,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index( - input->data_layout(), DataLayoutDimension::CHANNEL)) != - gamma->dimension(0), + input->data_layout(), DataLayoutDimension::CHANNEL)) != + gamma->dimension(0), "Gamma's size must be the same as size of input's channel"); } @@ -208,8 +208,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index( - input->data_layout(), DataLayoutDimension::CHANNEL)) != - beta->dimension(0), + input->data_layout(), DataLayoutDimension::CHANNEL)) != + beta->dimension(0), "Beta's size must be the same as size of input's channel"); } @@ -234,8 +234,8 @@ std::tuple validate_and_configure_window(ITensorInfo *input, ITe } // namespace NEInstanceNormalizationLayerKernelEx::NEInstanceNormalizationLayerKernelEx() - : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), - _epsilon(1e-12) + : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), + _epsilon(1e-12) { } @@ -251,7 +251,7 @@ void NEInstanceNormalizationLayerKernelEx::configure(ITensor *input, ITensor *ou _epsilon = epsilon; ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon)); + validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon)); if (_input->info()->data_type() == DataType::F32) { @@ -282,7 +282,7 @@ Status NEInstanceNormalizationLayerKernelEx::validate(const ITensorInfo *input, { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon)); ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window( - input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get())))); + input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get())))); return Status{}; } diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp index b92130c..ad47281 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp @@ -123,15 +123,17 @@ inline float32x4x4_t multiply_scale_vec(const int32x4x4_t &iv, float scale) const float32x4_t vscale = vdupq_n_f32(scale); const float32x4x4_t ret = {{ - vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale), - vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale), + vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale), + vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale), + vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale), + vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale), }}; return ret; } } // namespace NEMultiplyScaleFactorKernel::NEMultiplyScaleFactorKernel() - : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f) + : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f) { } @@ -140,7 +142,7 @@ void NEMultiplyScaleFactorKernel::configure(const ITensor *input, const ITensor { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), scale_factor->info(), output->info())); + validate_arguments(input->info(), scale_factor->info(), output->info())); _input = input; _scale_factor = scale_factor; @@ -180,25 +182,25 @@ template void NEMultiplyScaleFactorKernel::multiply(const Window &w Iterator output(_output, win_collapsed); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); execute_window_loop( - win_collapsed, - [&](const Coordinates &id) { - auto scale = *reinterpret_cast(_scale_factor->ptr_to_element({id.y()})); - scale *= _multiplier; - - const auto input_ptr = reinterpret_cast(input.ptr()); - auto output_ptr = reinterpret_cast(output.ptr()); - int x = window_start_x; - for (; x <= (window_end_x - window_step); x += window_step) - { - store_result(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale)); - } - // Compute left-over elements - for (; x < window_end_x; ++x) - { - output_ptr[x] = input_ptr[x] * scale; - } - }, - input, output); + win_collapsed, + [&](const Coordinates &id) { + auto scale = *reinterpret_cast(_scale_factor->ptr_to_element({id.y()})); + scale *= _multiplier; + + const auto input_ptr = reinterpret_cast(input.ptr()); + auto output_ptr = reinterpret_cast(output.ptr()); + int x = window_start_x; + for (; x <= (window_end_x - window_step); x += window_step) + { + store_result(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale)); + } + // Compute left-over elements + for (; x < window_end_x; ++x) + { + output_ptr[x] = input_ptr[x] * scale; + } + }, + input, output); } void NEMultiplyScaleFactorKernel::run(const Window &window, const ThreadInfo &info) diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp index 0a11eb5..0daff5c 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEOneHotKernel.cpp @@ -101,8 +101,8 @@ bool isOnValue(U index, U depth) } // namespace NEOneHotKernel::NEOneHotKernel() - : _indices{nullptr}, _depth{nullptr}, _on_value{nullptr}, _off_value{nullptr}, _axis{-1}, - _output{nullptr}, _func{} + : _indices{nullptr}, _depth{nullptr}, _on_value{nullptr}, + _off_value{nullptr}, _axis{-1}, _output{nullptr}, _func{} { } @@ -117,22 +117,22 @@ void NEOneHotKernel::onehot_0_axis(const Window &window, const ThreadInfo &info) Iterator output_it(_output, output_window); const U off_value = *reinterpret_cast(_off_value->buffer()); execute_window_loop( - output_window, - [&](const Coordinates &id) { - std::fill_n(output_it.ptr(), - _output->info()->dimension(0) * _output->info()->element_size(), off_value); - Coordinates indices_id(id); - indices_id.remove(0); - const U new_index = *(reinterpret_cast(_indices->ptr_to_element(indices_id))); - if (isOnValue(new_index, *(reinterpret_cast(_depth->buffer())))) - { - Coordinates onehot_id(id); - onehot_id.set(0, new_index); - std::copy_n(_on_value->buffer(), _output->info()->element_size(), - _output->ptr_to_element(onehot_id)); - } - }, - output_it); + output_window, + [&](const Coordinates &id) { + std::fill_n(output_it.ptr(), _output->info()->dimension(0) * _output->info()->element_size(), + off_value); + Coordinates indices_id(id); + indices_id.remove(0); + const U new_index = *(reinterpret_cast(_indices->ptr_to_element(indices_id))); + if (isOnValue(new_index, *(reinterpret_cast(_depth->buffer())))) + { + Coordinates onehot_id(id); + onehot_id.set(0, new_index); + std::copy_n(_on_value->buffer(), _output->info()->element_size(), + _output->ptr_to_element(onehot_id)); + } + }, + output_it); } template @@ -142,22 +142,22 @@ inline void NEOneHotKernel::onehot_n_axis(const Window &window, const ThreadInfo // Validate that the indices are not negative validate_depth(_depth, _output, _axis); Iterator output_it(_output, window); - execute_window_loop(window, - [&](const Coordinates &id) { - Coordinates indices_id(id); - indices_id.remove(_axis); - const U new_index = - *(reinterpret_cast(_indices->ptr_to_element(indices_id))); - if (isOnValue(new_index, *(reinterpret_cast(_depth->buffer())))) - { - Coordinates onehot_id(id); - onehot_id.set(_axis, new_index); - std::copy_n(static_cast(id[_axis]) == new_index ? _on_value->buffer() - : _off_value->buffer(), - _output->info()->element_size(), output_it.ptr()); - } - }, - output_it); + execute_window_loop( + window, + [&](const Coordinates &id) { + Coordinates indices_id(id); + indices_id.remove(_axis); + const U new_index = *(reinterpret_cast(_indices->ptr_to_element(indices_id))); + if (isOnValue(new_index, *(reinterpret_cast(_depth->buffer())))) + { + Coordinates onehot_id(id); + onehot_id.set(_axis, new_index); + std::copy_n(static_cast(id[_axis]) == new_index ? _on_value->buffer() + : _off_value->buffer(), + _output->info()->element_size(), output_it.ptr()); + } + }, + output_it); } void NEOneHotKernel::configure(const ITensor *indices, const ITensor *depth, @@ -215,7 +215,7 @@ Status NEOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *d const ITensorInfo *output, int axis) { ARM_COMPUTE_RETURN_ON_ERROR( - validate_arguments(indices, depth, on_value, off_value, output, axis)); + validate_arguments(indices, depth, on_value, off_value, output, axis)); return Status{}; } diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp index 5841f1d..2306228 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp @@ -107,19 +107,15 @@ inline int8x16_t vquantizeSymm(const float32x4x4_t &fv, float scale_factor_inv, const int32x4x4_t rf = {{ #ifdef __aarch64__ - vminq_s32(vposend, - vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))), - vminq_s32(vposend, - vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))), - vminq_s32(vposend, - vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))), - vminq_s32(vposend, - vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))), + vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))), + vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))), + vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))), + vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))), #else //__aarch64__ - vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))), - vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))), - vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))), - vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))), + vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))), + vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))), + vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))), + vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))), #endif //__aarch64__ }}; const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); @@ -129,7 +125,7 @@ inline int8x16_t vquantizeSymm(const float32x4x4_t &fv, float scale_factor_inv, } // namespace NEQuantizationSymmetricKernel::NEQuantizationSymmetricKernel() - : _input(nullptr), _output(nullptr), _scale_factor(nullptr) + : _input(nullptr), _output(nullptr), _scale_factor(nullptr) { } @@ -138,7 +134,7 @@ void NEQuantizationSymmetricKernel::configure(const ITensor *input, ITensor *out { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), output->info(), scale_factor->info())); + validate_arguments(input->info(), output->info(), scale_factor->info())); _input = input; _output = output; @@ -182,40 +178,40 @@ template void NEQuantizationSymmetricKernel::quantize(const Window const auto dim_x = _input->info()->dimension(0); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); execute_window_loop( - win_collapsed, - [&](const Coordinates &id) { - const auto start = reinterpret_cast(input.ptr()); - const auto min_max = std::minmax_element(start, start + dim_x); - const auto int8_scale = 127; - auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second)); - if (range == 0) - { - *reinterpret_cast(_scale_factor->ptr_to_element({id.y()})) = 1; - range = 1; - } - else - { - *reinterpret_cast(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale; - } - const auto scale_factor_inv = int8_scale / range; - - auto input_ptr = reinterpret_cast(input.ptr()); - auto output_ptr = reinterpret_cast(output.ptr()); - int x = window_start_x; - for (; x <= (window_end_x - window_step); x += window_step) - { - wrapper::vstore(&output_ptr[x], - vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale)); - } - // Compute left-over elements - for (; x < window_end_x; ++x) - { - int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy); - quantized = std::min(int8_scale, std::max(quantized, -int8_scale)); - output_ptr[x] = static_cast(quantized); - } - }, - input, output); + win_collapsed, + [&](const Coordinates &id) { + const auto start = reinterpret_cast(input.ptr()); + const auto min_max = std::minmax_element(start, start + dim_x); + const auto int8_scale = 127; + auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second)); + if (range == 0) + { + *reinterpret_cast(_scale_factor->ptr_to_element({id.y()})) = 1; + range = 1; + } + else + { + *reinterpret_cast(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale; + } + const auto scale_factor_inv = int8_scale / range; + + auto input_ptr = reinterpret_cast(input.ptr()); + auto output_ptr = reinterpret_cast(output.ptr()); + int x = window_start_x; + for (; x <= (window_end_x - window_step); x += window_step) + { + wrapper::vstore(&output_ptr[x], + vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale)); + } + // Compute left-over elements + for (; x < window_end_x; ++x) + { + int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy); + quantized = std::min(int8_scale, std::max(quantized, -int8_scale)); + output_ptr[x] = static_cast(quantized); + } + }, + input, output); } void NEQuantizationSymmetricKernel::run(const Window &window, const ThreadInfo &info) diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp index 267228e..b02a48e 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgMinMaxLayerEx.cpp @@ -50,8 +50,8 @@ namespace arm_compute { CLArgMinMaxLayerEx::CLArgMinMaxLayerEx(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(), - _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis() + : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(), + _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis() { } @@ -60,13 +60,13 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && - op != ReductionOperation::ARG_IDX_MIN, + op != ReductionOperation::ARG_IDX_MIN, "Invalid reduction operation"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast(TensorShape::num_max_dimensions), "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); const unsigned int num_of_stages = - calculate_number_of_stages_only_x_axis(input->dimension(0), axis); + calculate_number_of_stages_only_x_axis(input->dimension(0), axis); DataType output_data_type = DataType::S32; TensorInfo not_reshaped_output; @@ -76,9 +76,9 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT if (output->total_size() != 0) { output_data_type = output->data_type(); - const TensorInfo expected_output_shape = output->clone()->set_tensor_shape( - arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, - false)); + const TensorInfo expected_output_shape = + output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape( + input->tensor_shape(), axis, false)); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output); } @@ -87,9 +87,9 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT auto initialize_tensorinfo = [](TensorInfo &ti, TensorShape shape, DataType data_type, int num_channels, QuantizationInfo qinfo) { ti.set_data_type(data_type) - .set_tensor_shape(shape) - .set_num_channels(num_channels) - .set_quantization_info(qinfo); + .set_tensor_shape(shape) + .set_num_channels(num_channels) + .set_quantization_info(qinfo); }; initialize_tensorinfo(not_reshaped_output, shape_before_reshape, output_data_type, @@ -98,7 +98,7 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT if (num_of_stages == 1) { ARM_COMPUTE_RETURN_ON_ERROR( - CLArgMinMaxLayerKernelEx::validate(input, nullptr, ¬_reshaped_output, axis, op)); + CLArgMinMaxLayerKernelEx::validate(input, nullptr, ¬_reshaped_output, axis, op)); } else { @@ -118,19 +118,19 @@ Status CLArgMinMaxLayerEx::validate(const ITensorInfo *input, int axis, const IT // Validate ReductionOperation only on first kernel ARM_COMPUTE_RETURN_ON_ERROR( - CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op)); + CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op)); // Validate ReductionOperation on intermediate stages for (unsigned int i = 1; i < num_of_stages - 1; ++i) { - ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1], - &sums_vector[i], axis, op)); + ARM_COMPUTE_RETURN_ON_ERROR( + CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1], &sums_vector[i], axis, op)); } // Validate ReductionOperation on the last stage const unsigned int last_stage = num_of_stages - 1; ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate( - input, &sums_vector[last_stage - 1], ¬_reshaped_output, axis, op)); + input, &sums_vector[last_stage - 1], ¬_reshaped_output, axis, op)); } ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(¬_reshaped_output, output)); return Status{}; @@ -144,16 +144,16 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor * _reduction_axis = axis; const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape( - input->info()->tensor_shape(), axis, false); + input->info()->tensor_shape(), axis, false); DataType output_data_type = (output->info()->data_type() == DataType::UNKNOWN) - ? DataType::S32 - : output->info()->data_type(); + ? DataType::S32 + : output->info()->data_type(); auto_init_if_empty(*output->info(), input->info() - ->clone() - ->set_tensor_shape(output_shape) - .set_data_type(output_data_type) - .reset_padding() - .set_is_resizable(true)); + ->clone() + ->set_tensor_shape(output_shape) + .set_data_type(output_data_type) + .reset_padding() + .set_is_resizable(true)); // Configure reduction operation kernels _reduction_kernels_vector.resize(_num_of_stages); @@ -166,11 +166,11 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor * TensorShape output_shape{input->info()->tensor_shape()}; output_shape.set(axis, 1); auto_init_if_empty(*_not_reshaped_output.info(), input->info() - ->clone() - ->set_tensor_shape(output_shape) - .set_data_type(output_data_type) - .reset_padding() - .set_is_resizable(true)); + ->clone() + ->set_tensor_shape(output_shape) + .set_data_type(output_data_type) + .reset_padding() + .set_is_resizable(true)); _not_reshaped_output.info()->set_tensor_shape(output_shape); _reduction_kernels_vector[0].configure(input, nullptr, &_not_reshaped_output, axis, op); } @@ -182,7 +182,7 @@ void CLArgMinMaxLayerEx::configure(const ICLTensor *input, int axis, ICLTensor * { shape.set(0, ceil(shape.x() / 128.f)); _results_vector[i].allocator()->init( - input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type)); + input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type)); } // Apply ReductionOperation only on first kernel diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp index 3dede05..6359b4b 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp @@ -53,16 +53,10 @@ namespace arm_compute using namespace arm_compute::misc::shape_calculator; CLDirectTransposeConvLayer::CLDirectTransposeConvLayer( - std::shared_ptr memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _scale_f(), - _conv_f(), - _flip_weights(), - _scaled_output(), - _original_weights(nullptr), - _weights_flipped(), - _flip_axis(), - _is_prepared(false) + std::shared_ptr memory_manager) // NOLINT + : _memory_group(std::move(memory_manager)), _scale_f(), _conv_f(), _flip_weights(), + _scaled_output(), _original_weights(nullptr), _weights_flipped(), _flip_axis(), + _is_prepared(false) { } @@ -74,7 +68,7 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32); + input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); const DataLayout data_layout = input->data_layout(); @@ -86,8 +80,8 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); auto out_dims = transposeconv_output_dimensions( - input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), - weights->dimension(idx_h), info, invalid_right, invalid_bottom); + input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), + weights->dimension(idx_h), info, invalid_right, invalid_bottom); const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights); @@ -117,19 +111,19 @@ Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITen unsigned int pad_right = 0; unsigned int pad_top = 0; unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, - pad_bottom); + const TensorShape scale_out_shape = + compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right, + invalid_bottom, pad_left, pad_right, pad_top, pad_bottom); TensorInfo scale_out_info(input->clone() - ->set_is_resizable(true) - .reset_padding() - .set_tensor_shape(scale_out_shape) - .set_data_layout(data_layout)); + ->set_is_resizable(true) + .reset_padding() + .set_tensor_shape(scale_out_shape) + .set_data_layout(data_layout)); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info)); - ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, - conv_info, weights_info)); + ARM_COMPUTE_RETURN_ON_ERROR( + CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, weights_info)); return Status{}; } @@ -171,22 +165,22 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis); auto out_dims = transposeconv_output_dimensions( - input->info()->dimension(idx_w), input->info()->dimension(idx_h), - weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right, - invalid_bottom); + input->info()->dimension(idx_w), input->info()->dimension(idx_h), + weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right, + invalid_bottom); const TensorShape output_shape = - compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); + compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); // Output auto initialization if not yet initialized auto_init_if_empty( - *output->info(), - input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); + *output->info(), + input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate( - input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); + input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info, + invalid_right, invalid_bottom)); _is_prepared = weights_info.retain_internal_weights(); @@ -195,8 +189,8 @@ void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_conte // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order // to match output shape const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); + *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, + pad_right, pad_top, pad_bottom); TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp index 0198946..79d0929 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp @@ -60,7 +60,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I ARM_COMPUTE_UNUSED(weights); ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); + CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); return Status{}; } @@ -80,12 +80,12 @@ Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *in } CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer( - std::shared_ptr memory_manager) - : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(), - _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(), - _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(), - _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false), - _original_weights(nullptr) + std::shared_ptr memory_manager) + : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(), + _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(), + _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(), + _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false), + _original_weights(nullptr) { } void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, @@ -107,8 +107,8 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen // Perform validate step ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); + input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), + fc_info)); _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; _accumulate_biases = false; @@ -140,10 +140,10 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen bool is_fc_after_conv = false; if (is_batched_fc_layer) { - is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); + is_fc_after_conv = + (TensorShape::num_max_dimensions >= 4) && + (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); } else { @@ -158,28 +158,28 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen { // Reshape the weights _reshape_weights_output.allocator()->init( - weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights->info()))); + weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights->info()))); _reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output); weights_to_use = &_reshape_weights_output; } // Extract scale factor _scale_factor.allocator()->init( - TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type())); + TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type())); _memory_group.manage(&_scale_factor); _scale_factor_kernel.configure(input, &_scale_factor); // Quantize input _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); + input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); _memory_group.manage(&_quantized_input); _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input); // GEMMLowp _gemmlowp_output.allocator()->init( - output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); + output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); _memory_group.manage(&_gemmlowp_output); configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output, fc_info.retain_internal_weights); @@ -209,15 +209,15 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe const GPUTarget gpu_target = CLScheduler::get().target(); const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); + TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights))); // Configure accumulate biases kernel for non quantized asymmetric types if (biases != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target)); + CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target)); } // With the Fully Connected layer we can have 4 different cases: @@ -247,33 +247,32 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe { // Validate reshape weights kernel ARM_COMPUTE_RETURN_ON_ERROR( - CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights)); + CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights)); weights_to_use = &reshaped_weights; } // Validate Scale factor kernel const ITensorInfo &scale_factor = - TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type()); + TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type()); ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor)); // Validate quantization symm8 kernel - const ITensorInfo &quantized_input = - TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); + const ITensorInfo &quantized_input = TensorInfo( + input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED)); ARM_COMPUTE_RETURN_ON_ERROR( - CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input)); + CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input)); // Fully Connected layer after a Fully Connected Layer without batches ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); // Validate matrix multiply kernel const ITensorInfo &gemmlowp_output = TensorInfo( - output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); + output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output)); // Multiply scale ARM_COMPUTE_RETURN_ON_ERROR( - CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output)); + CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output)); return Status{}; } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp index 2ff4b96..13d3acb 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp @@ -79,7 +79,7 @@ Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorIn int output_multiplier = 0; int output_shift = 0; ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one( - multiplier, &output_multiplier, &output_shift)); + multiplier, &output_multiplier, &output_shift)); // Set the GEMMLowp output stage info gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; @@ -99,7 +99,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I { GEMMLowpOutputStageInfo gemmlowp_output_stage; ARM_COMPUTE_RETURN_ON_ERROR( - construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage)); + construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage)); const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped false, // is_b_reshaped @@ -125,14 +125,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I // Validate gemmlowp function ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate( - &input.clone()->set_quantization_info(input_quantization_info), - &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output, - gemm_info)); + &input.clone()->set_quantization_info(input_quantization_info), + &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output, + gemm_info)); } else { ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info)); + CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info)); } return Status{}; @@ -154,12 +154,12 @@ Status CLFullyConnectedLayerReshapeWeightsEx::validate(const ITensorInfo *input, CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), - _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), - _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager), - _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), - _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true), - _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr) + : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), + _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), + _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager), + _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), + _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true), + _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr) { } void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights, @@ -190,9 +190,9 @@ void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTens const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); input->info()->set_quantization_info(QuantizationInfo( - input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); weights->info()->set_quantization_info(QuantizationInfo( - weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); + weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Configure gemmlowp function _mm_gemmlowp.configure(input, weights, bias, output, gemm_info); @@ -214,8 +214,8 @@ void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const IC const FullyConnectedLayerInfo &fc_info) { ARM_COMPUTE_ERROR_ON( - (weights->info()->dimension(1) != - (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); + (weights->info()->dimension(1) != + (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); // If the fully connected layer is called after a convolution layer, the input tensor must be // linearized @@ -223,11 +223,11 @@ void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const IC // Initialize output tensor for flatten TensorShape shape_flatten = compute_flatten_shape(input->info()); _flatten_output.allocator()->init(input->info() - ->clone() - ->set_is_resizable(true) - .reset_padding() - .set_tensor_shape(shape_flatten) - .set_data_layout(DataLayout::NCHW)); + ->clone() + ->set_is_resizable(true) + .reset_padding() + .set_tensor_shape(shape_flatten) + .set_data_layout(DataLayout::NCHW)); // Configure flatten kernel _memory_group.manage(&_flatten_output); @@ -258,8 +258,8 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor // Perform validate step ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); + input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), + fc_info)); _are_weights_converted = true; _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; @@ -285,10 +285,10 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor const bool is_batched_fc_layer = output->info()->dimension(1) > 1; if (is_batched_fc_layer) { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); + _is_fc_after_conv = + (TensorShape::num_max_dimensions >= 4) && + (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); } else { @@ -302,7 +302,7 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor { _reshape_weights_managed_function.configure(weights); weights_to_use = utils::cast::polymorphic_downcast( - _weights_manager->acquire(weights, &_reshape_weights_managed_function)); + _weights_manager->acquire(weights, &_reshape_weights_managed_function)); } else { @@ -320,7 +320,7 @@ void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor _convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(), fc_info.weights_trained_layout); weights_to_use = utils::cast::polymorphic_downcast( - _weights_manager->acquire(weights, &_convert_weights_managed)); + _weights_manager->acquire(weights, &_convert_weights_managed)); } else { @@ -359,16 +359,16 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor bool is_fc_after_conv = true; const ITensorInfo &flatten_input = TensorInfo(input->clone() - ->set_is_resizable(true) - .reset_padding() - .set_tensor_shape(compute_flatten_shape(input)) - .set_data_layout(DataLayout::NCHW)); + ->set_is_resizable(true) + .reset_padding() + .set_tensor_shape(compute_flatten_shape(input)) + .set_data_layout(DataLayout::NCHW)); const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); + TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights))); const ITensorInfo &converted_weights = - weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) - : TensorInfo(*reshaped_weights.clone()); + weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) + : TensorInfo(*reshaped_weights.clone()); // With the Fully Connected layer we can have 4 different cases: // 1) Convolution layer -> Fully Connected layer without batches @@ -396,7 +396,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor { // Validate reshape weights kernel ARM_COMPUTE_RETURN_ON_ERROR( - CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights)); + CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights)); weights_to_use = &reshaped_weights; } @@ -404,7 +404,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor { // Validate convert weights kernel ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate( - weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout)); + weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout)); weights_to_use = &converted_weights; } @@ -412,8 +412,8 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor { // Fully Connected layer after a Convolution Layer without batches ARM_COMPUTE_RETURN_ERROR_ON( - (weights_to_use->dimension(1) != - (input->dimension(0) * input->dimension(1) * input->dimension(2)))); + (weights_to_use->dimension(1) != + (input->dimension(0) * input->dimension(1) * input->dimension(2)))); // Validate flatten kernel ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input)); @@ -427,7 +427,7 @@ Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor // Validate matrix multiply kernel ARM_COMPUTE_RETURN_ON_ERROR( - validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info)); + validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info)); return Status{}; } @@ -457,7 +457,7 @@ void CLFullyConnectedLayerEx::run() if (_weights_manager && _weights_manager->are_weights_managed(cur_weights)) { _original_weights = utils::cast::polymorphic_downcast( - _weights_manager->run(cur_weights, &_reshape_weights_managed_function)); + _weights_manager->run(cur_weights, &_reshape_weights_managed_function)); } else { diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp index 157b4d9..ac6982e 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp @@ -41,7 +41,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp // reshape auto_init_if_empty(*_cl_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape).set_data_layout( - _input->info()->data_layout())); + _input->info()->data_layout())); _cl_reshape.configure(_input, &_cl_buffer); input_to_use = &_cl_buffer; } @@ -57,7 +57,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp { bool is_hybrid = (input->info()->data_type() == DataType::F32 || input->info()->data_type() == DataType::F16) && - (weights->info()->data_type() == DataType::S8 || + (weights->info()->data_type() == DataType::QSYMM8 || weights->info()->data_type() == DataType::QASYMM8_SIGNED); if (is_hybrid) @@ -81,7 +81,6 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp { throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type"); } - }(); if (_needs_reshape) diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp index 02ee4ad..c246041 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp @@ -46,8 +46,8 @@ using namespace arm_compute; CLReduceOperation::CLReduceOperation(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(), - _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape() + : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(), + _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape() { } @@ -91,13 +91,13 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo * for (size_t i = 0; i < num_of_kernels; ++i, ++it) { ARM_COMPUTE_RETURN_ON_ERROR( - CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op)); + CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op)); } if (!keep_dims) { ARM_COMPUTE_RETURN_ON_ERROR( - CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output)); + CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output)); } return Status{}; diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp index a502f03..12c0aa8 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSplitVEx.cpp @@ -134,8 +134,8 @@ void configure_slices(const ICLTensor *input, const std::vector &ou // Output auto inizialitation if not yet initialized TensorInfo tmp_output_info = *output->info()->clone(); auto_init_if_empty( - tmp_output_info, - input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape)); + tmp_output_info, + input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape)); // Update coordinate on axis start_coords.set(split_dim, axis_offset); @@ -153,7 +153,7 @@ void configure_slices(const ICLTensor *input, const std::vector &ou } // namespace CLSplitVEx::CLSplitVEx() - : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions() + : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions() { } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp index 3ac95a8..accd513 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp @@ -49,14 +49,14 @@ namespace arm_compute { CLTopKV2::CLTopKV2() - : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0), - _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(), - _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(), - _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), - _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(), - _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(), - _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(), - _reorder_negatives_kernel(), _store_kernel()*/ + : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0), + _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(), + _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(), + _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), _p_out_key_buf(nullptr), + _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(), + _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(), + _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(), + _reorder_negatives_kernel(), _store_kernel()*/ { } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp index 3215d01..0754fd8 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp @@ -53,7 +53,7 @@ using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr memory_manager) - : _memory_manager(std::move(memory_manager)), _function() + : _memory_manager(std::move(memory_manager)), _function() { } @@ -105,20 +105,20 @@ Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); switch (CLTransposeConvLayer::get_deconvolution_method( - input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)) + input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)) { case DeconvolutionMethod::DIRECT: { // Validate direct convolution layer ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate( - input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)); + input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info)); break; } case DeconvolutionMethod::GEMM: { // Validate gemm-based convolution layer ARM_COMPUTE_RETURN_ON_ERROR( - CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info)); + CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info)); break; } default: @@ -130,9 +130,9 @@ Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf } DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method( - const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, - ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right, - unsigned int invalid_bottom, const WeightsInfo &weights_info) + const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, + ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right, + unsigned int invalid_bottom, const WeightsInfo &weights_info) { ARM_COMPUTE_UNUSED(output, bias, weights_info); diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp index a123439..e212a03 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp @@ -58,7 +58,7 @@ namespace Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) { ARM_COMPUTE_RETURN_ON_ERROR( - NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); + NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output)); return Status{}; } @@ -78,11 +78,11 @@ Status NEFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *in } NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer( - std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(), - _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(), - _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false), - _accumulate_biases(false), _is_prepared(false) + std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(), + _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(), + _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false), + _accumulate_biases(false), _is_prepared(false) { } @@ -103,8 +103,8 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor // Perform validate step ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); + input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), + fc_info)); _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; _accumulate_biases = false; @@ -132,10 +132,10 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor bool _is_fc_after_conv; if (is_batched_fc_layer) { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); + _is_fc_after_conv = + (TensorShape::num_max_dimensions >= 4) && + (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); } else { @@ -150,23 +150,23 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor { // Reshape the weights _reshape_weights_output.allocator()->init( - weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights->info()))); + weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights->info()))); _reshape_weights_function.configure(weights_to_use, &_reshape_weights_output); weights_to_use = &_reshape_weights_output; } // Quantize input _quantized_input.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); + input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( + DataType::QASYMM8_SIGNED)); _scale_factor.allocator()->init( - TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32)); + TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32)); _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor); // GEMM _gemmlowp_output.allocator()->init( - output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); + output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output); // Multiply scale @@ -195,8 +195,8 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); + TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights))); // Configure accumulate biases kernel for non quantized asymmetric types if (biases != nullptr) @@ -217,7 +217,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe { // Validate reshape weights kernel ARM_COMPUTE_RETURN_ON_ERROR( - NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights)); + NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights)); weights_to_use = &reshaped_weights; } @@ -225,20 +225,19 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); // Validate quantization kernel - const ITensorInfo &quantized_input = - TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::QASYMM8_SIGNED)); + const ITensorInfo &quantized_input = TensorInfo( + input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED)); const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32); ARM_COMPUTE_RETURN_ON_ERROR( - NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor)); + NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor)); const ITensorInfo &gemmlowp_output = TensorInfo( - output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); + output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); // Validate matrix multiply kernel ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output)); ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate( - &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale)); + &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale)); return Status{}; } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp index cb7557a..a639f29 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp @@ -69,14 +69,14 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I // Validate gemmlowp function ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate( - &input.clone()->set_quantization_info(input_quantization_info), - &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output)); + &input.clone()->set_quantization_info(input_quantization_info), + &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output)); } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate( - &input, &weights, nullptr, &output, 1.f, 0.0f, - GEMMInfo(false, false, false /* Reshape weights only for the first run */))); + ARM_COMPUTE_RETURN_ON_ERROR( + NEGEMM::validate(&input, &weights, nullptr, &output, 1.f, 0.0f, + GEMMInfo(false, false, false /* Reshape weights only for the first run */))); } return Status{}; @@ -84,12 +84,12 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I } // namespace NEFullyConnectedLayerEx::NEFullyConnectedLayerEx(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(), - _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(), - _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(), - _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr), - _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), - _accumulate_biases(false), _is_quantized(false), _is_prepared(false) + : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(), + _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(), + _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(), _converted_weights_output(), + _reshape_weights_output(), _original_weights(nullptr), _are_weights_converted(true), + _are_weights_reshaped(false), _is_fc_after_conv(false), _accumulate_biases(false), + _is_quantized(false), _is_prepared(false) { } @@ -105,9 +105,9 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor * const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); input->info()->set_quantization_info(QuantizationInfo( - input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); weights->info()->set_quantization_info(QuantizationInfo( - weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); + weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Configure gemmlowp function _mm_gemmlowp.configure(input, weights, nullptr, output); @@ -129,8 +129,8 @@ void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITen ITensor *output) { ARM_COMPUTE_ERROR_ON( - (weights->info()->dimension(1) != - (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); + (weights->info()->dimension(1) != + (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); // If the fully connected layer is called after a convolution layer, the input tensor must be // linearized @@ -138,8 +138,7 @@ void NEFullyConnectedLayerEx::configure_conv_fc(const ITensor *input, const ITen // Initialize output tensor for flatten TensorShape shape_flatten = compute_flatten_shape(input->info()); _flatten_output.allocator()->init( - input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - shape_flatten)); + input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten)); // Configure flatten kernel _memory_group.manage(&_flatten_output); @@ -169,8 +168,8 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei // Perform validate step ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate( - input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), - fc_info)); + input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), + fc_info)); _are_weights_converted = true; _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; @@ -183,8 +182,7 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei if (_is_quantized) { _gemmlowp_output.allocator()->init( - output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type( - DataType::S32)); + output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); } // Configure accumulate biases kernel for non quantized asymmetric types @@ -208,10 +206,10 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei const bool is_batched_fc_layer = output->info()->dimension(1) > 1; if (is_batched_fc_layer) { - _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && - (std::equal(input->info()->tensor_shape().cbegin() + 3, - input->info()->tensor_shape().cend(), - output->info()->tensor_shape().cbegin() + 1)); + _is_fc_after_conv = + (TensorShape::num_max_dimensions >= 4) && + (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); } else { @@ -284,16 +282,16 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor bool is_quantized = is_data_type_quantized_asymmetric(input->data_type()); const ITensorInfo &flatten_input = - TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_flatten_shape(input))); + TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_flatten_shape(input))); const ITensorInfo &reshaped_weights = - TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( - compute_transposed_shape(*weights))); + TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights))); const ITensorInfo &converted_weights = - weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) - : TensorInfo(*reshaped_weights.clone()); + weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) + : TensorInfo(*reshaped_weights.clone()); const ITensorInfo &gemmlowp_output = TensorInfo( - output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); + output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); // Configure accumulate biases kernel for non quantized asymmetric types if (biases != nullptr && !is_quantized) @@ -330,7 +328,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor { // Validate reshape weights kernel ARM_COMPUTE_RETURN_ON_ERROR( - NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights)); + NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights)); weights_to_use = &reshaped_weights; } @@ -338,7 +336,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor { // Validate convert weights kernel ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate( - weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout)); + weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout)); weights_to_use = &converted_weights; } @@ -346,8 +344,8 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor { // Fully Connected layer after a Convolution Layer without batches ARM_COMPUTE_RETURN_ERROR_ON( - (weights_to_use->dimension(1) != - (input->dimension(0) * input->dimension(1) * input->dimension(2)))); + (weights_to_use->dimension(1) != + (input->dimension(0) * input->dimension(1) * input->dimension(2)))); // Validate flatten kernel ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input)); @@ -365,7 +363,7 @@ Status NEFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensor if (is_quantized) { ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate( - &gemmlowp_output, biases, output)); + &gemmlowp_output, biases, output)); } return Status{}; diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp index dc6c784..234c783 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp @@ -56,7 +56,7 @@ void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS); bool is_hybrid = input->info()->data_type() == DataType::F32 && - (weights->info()->data_type() == DataType::S8 || + (weights->info()->data_type() == DataType::QSYMM8 || weights->info()->data_type() == DataType::QASYMM8_SIGNED); if (is_hybrid) diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp index 16d74e6..451aa09 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp @@ -46,9 +46,9 @@ namespace arm_compute { NEInstanceNormalizationLayerEx::NEInstanceNormalizationLayerEx( - std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false), - _permute_input(), _permute_output(), _permuted_input(), _permuted_output() + std::shared_ptr memory_manager) + : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false), + _permute_input(), _permute_output(), _permuted_input(), _permuted_output() { } @@ -88,8 +88,8 @@ Status NEInstanceNormalizationLayerEx::validate(const ITensorInfo *input, const float epsilon) { return NEInstanceNormalizationLayerKernelEx::validate( - &input->clone()->set_data_layout(DataLayout::NCHW), - &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon); + &input->clone()->set_data_layout(DataLayout::NCHW), + &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon); } void NEInstanceNormalizationLayerEx::run() diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp index cb1a263..c45c335 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp @@ -49,8 +49,8 @@ using namespace arm_compute; NEReduceOperation::NEReduceOperation(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), - _reduction_ops(), _keep_dims() + : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), + _reduction_ops(), _keep_dims() { } @@ -125,7 +125,7 @@ void NEReduceOperation::configure(ITensor *input, const Coordinates &reduction_a for (unsigned int i = 0; i < _reduction_ops; ++i) { TensorShape out_shape = - i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape(); + i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape(); out_shape.set(axis_local[i], 1); auto in = (i == 0) ? input : (&_reduced_outs[i - 1]); diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp index 26a8879..b21717e 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp @@ -47,8 +47,8 @@ using namespace arm_compute; NEReduceSum::NEReduceSum(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), - _reduction_ops(), _keep_dims() + : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), + _reduction_ops(), _keep_dims() { } @@ -122,7 +122,7 @@ void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, b for (unsigned int i = 0; i < _reduction_ops; ++i) { TensorShape out_shape = - i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape(); + i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape(); out_shape.set(axis_local[i], 1); auto in = (i == 0) ? input : (&_reduced_outs[i - 1]); @@ -135,7 +135,7 @@ void NEReduceSum::configure(ITensor *input, const Coordinates &reduction_axis, b _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), input->info()->data_type(), input->info()->quantization_info()) - .set_data_layout(input->info()->data_layout())); + .set_data_layout(input->info()->data_layout())); _memory_group.manage(&_reduced_outs[i]); _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i], ReductionOperation::SUM); diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp index aa165cc..5031107 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp @@ -51,17 +51,9 @@ namespace arm_compute { NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr memory_manager) // NOLINT - : _memory_group(std::move(memory_manager)), - _conv_f(), - _upsample_f(), - _flip_weights(), - _scaled_output(), - _weights_flipped(), - _flip_axis(), - _original_weights(nullptr), - _input(nullptr), - _info(), - _is_prepared(false) + : _memory_group(std::move(memory_manager)), _conv_f(), _upsample_f(), _flip_weights(), + _scaled_output(), _weights_flipped(), _flip_axis(), _original_weights(nullptr), _input(nullptr), + _info(), _is_prepared(false) { } @@ -76,15 +68,15 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input); const unsigned int width_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); const unsigned int height_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1); auto out_dims = transposeconv_output_dimensions( - input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), - weights->dimension(height_idx), info, invalid_right, invalid_bottom); + input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), + weights->dimension(height_idx), info, invalid_right, invalid_bottom); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); if (bias != nullptr) @@ -117,24 +109,24 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf unsigned int pad_right = 0; unsigned int pad_top = 0; unsigned int pad_bottom = 0; - const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top, - pad_bottom); + const TensorShape scale_out_shape = + compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right, + invalid_bottom, pad_left, pad_right, pad_top, pad_bottom); TensorInfo scale_out_info( - input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); + input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); const unsigned int batches_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES); + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES); const unsigned int channel_idx = - get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL); + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) != scale_out_info.dimension(batches_idx)); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != scale_out_info.dimension(channel_idx)); - ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, - conv_info, WeightsInfo())); + ARM_COMPUTE_RETURN_ON_ERROR( + NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo())); return Status{}; } @@ -146,21 +138,21 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate( - input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), - info, invalid_right, invalid_bottom)); + input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), + info, invalid_right, invalid_bottom)); const DataLayout data_layout = input->info()->data_layout(); const unsigned int width_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const unsigned int height_idx = - get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); auto out_dims = transposeconv_output_dimensions( - input->info()->dimension(width_idx), input->info()->dimension(height_idx), - weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info, - invalid_right, invalid_bottom); + input->info()->dimension(width_idx), input->info()->dimension(height_idx), + weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info, + invalid_right, invalid_bottom); const TensorShape output_shape = - compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); + compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info()); _input = input; _original_weights = weights; @@ -188,8 +180,8 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape( - *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, - pad_right, pad_top, pad_bottom); + *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left, + pad_right, pad_top, pad_bottom); const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom, DimensionRoundingType::FLOOR); diff --git a/compute/cker/include/cker/CpuBackendThreadpool.h b/compute/cker/include/cker/CpuBackendThreadpool.h new file mode 100644 index 0000000..cc6a9db --- /dev/null +++ b/compute/cker/include/cker/CpuBackendThreadpool.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_ +#define __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_ + +#include // from @ruy +#include // from @ruy + +namespace nnfw +{ +namespace cker +{ +namespace cpu_backend_threadpool +{ + +using Task = ruy::Task; + +template +void Execute(int tasks_count, TaskType *tasks, ruy::Context *ruy_context) +{ + assert(tasks_count <= ruy_context->max_num_threads()); + ruy_context->mutable_thread_pool()->Execute(tasks_count, tasks); +} + +} // namespace cpu_backend_threadpool +} // namespace cker +} // namespace nnfw + +#endif // __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_ diff --git a/compute/cker/include/cker/NeonTensorUtils.h b/compute/cker/include/cker/NeonTensorUtils.h index e080406..8bf0bee 100644 --- a/compute/cker/include/cker/NeonTensorUtils.h +++ b/compute/cker/include/cker/NeonTensorUtils.h @@ -131,7 +131,7 @@ inline const int8_t *ShuffleVectors(const int8_t *vectors, const int n_batch, co const int kWeightsPerUint32 = 4; int8 *shuffled_vectors = reinterpret_cast( - aligned_alloc(kWeightsPerUint32, n_batch * m_cols, shuffled_vectors_free)); + aligned_alloc(kWeightsPerUint32, n_batch * m_cols, shuffled_vectors_free)); for (int i = 0; i < n_batch; i += 4) { @@ -145,25 +145,25 @@ inline const int8_t *ShuffleVectors(const int8_t *vectors, const int n_batch, co while (unshuffled_vec0_ptr != end_vec0_ptr) { asm volatile( - // This code path requires that (n_cols % 16) == 0 so we can safely - // read in 16-byte chunks from each row. - "ld1 {v0.16b}, [%[unshuffled_vec0_ptr]], #16\n" - "ld1 {v1.16b}, [%[unshuffled_vec1_ptr]], #16\n" - "ld1 {v2.16b}, [%[unshuffled_vec2_ptr]], #16\n" - "ld1 {v3.16b}, [%[unshuffled_vec3_ptr]], #16\n" - - "st4 {v0.s, v1.s, v2.s, v3.s}[0], [%[shuffled_vectors_ptr]], #16\n" - "st4 {v0.s, v1.s, v2.s, v3.s}[1], [%[shuffled_vectors_ptr]], #16\n" - "st4 {v0.s, v1.s, v2.s, v3.s}[2], [%[shuffled_vectors_ptr]], #16\n" - "st4 {v0.s, v1.s, v2.s, v3.s}[3], [%[shuffled_vectors_ptr]], #16\n" - - : [unshuffled_vec0_ptr] "+r"(unshuffled_vec0_ptr), - [unshuffled_vec1_ptr] "+r"(unshuffled_vec1_ptr), - [unshuffled_vec2_ptr] "+r"(unshuffled_vec2_ptr), - [unshuffled_vec3_ptr] "+r"(unshuffled_vec3_ptr), - [shuffled_vectors_ptr] "+r"(shuffled_vectors_ptr) - : - : "v0", "v1", "v2", "v3", "cc", "memory"); + // This code path requires that (n_cols % 16) == 0 so we can safely + // read in 16-byte chunks from each row. + "ld1 {v0.16b}, [%[unshuffled_vec0_ptr]], #16\n" + "ld1 {v1.16b}, [%[unshuffled_vec1_ptr]], #16\n" + "ld1 {v2.16b}, [%[unshuffled_vec2_ptr]], #16\n" + "ld1 {v3.16b}, [%[unshuffled_vec3_ptr]], #16\n" + + "st4 {v0.s, v1.s, v2.s, v3.s}[0], [%[shuffled_vectors_ptr]], #16\n" + "st4 {v0.s, v1.s, v2.s, v3.s}[1], [%[shuffled_vectors_ptr]], #16\n" + "st4 {v0.s, v1.s, v2.s, v3.s}[2], [%[shuffled_vectors_ptr]], #16\n" + "st4 {v0.s, v1.s, v2.s, v3.s}[3], [%[shuffled_vectors_ptr]], #16\n" + + : [ unshuffled_vec0_ptr ] "+r"(unshuffled_vec0_ptr), + [ unshuffled_vec1_ptr ] "+r"(unshuffled_vec1_ptr), + [ unshuffled_vec2_ptr ] "+r"(unshuffled_vec2_ptr), + [ unshuffled_vec3_ptr ] "+r"(unshuffled_vec3_ptr), + [ shuffled_vectors_ptr ] "+r"(shuffled_vectors_ptr) + : + : "v0", "v1", "v2", "v3", "cc", "memory"); } } @@ -204,104 +204,104 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate(const int8_t *__restr const int8 *mat_ptr3 = matrix + ((row + 3) * m_cols); asm volatile( - // Zero out the accumulator registers. - "dup v0.4s, wzr\n" - "dup v1.4s, wzr\n" - "dup v2.4s, wzr\n" - "dup v3.4s, wzr\n" - - "1:\n" // batch_cols_loop - - // Read 16 more bytes from a pair of matrix rows. - "ld1 {v12.16b}, [%[mat_ptr0]], #16\n" - - // Prefetch two rows ahead. - "prfm pldl1strm, [%[mat_ptr2]]\n" - "prfm pldl1strm, [%[mat_ptr3]]\n" - - // Read from input vectors 4 times; 64 bytes total. - // Each 16-byte register contains parts of 4 vectors; see the - // shuffle logic above. - - // From Benoit, places to look in the future: - // - Move load instructions further from sdot - // - Switch loop use-then-reload - // - Do partial unrolling to use register space better - "ld1 {v8.16b}, [%[vec_ptr]], #16\n" - ".word 0x4f8ce100 // sdot v0.4s, v8.16b, v12.4b[0]\n" - "ld1 {v9.16b}, [%[vec_ptr]], #16\n" - ".word 0x4face121 // sdot v1.4s, v9.16b, v12.4b[1]\n" - "ld1 {v10.16b}, [%[vec_ptr]], #16\n" - ".word 0x4f8ce940 // sdot v0.4s, v10.16b, v12.4b[2]\n" - "ld1 {v11.16b}, [%[vec_ptr]], #16\n" - ".word 0x4face961 // sdot v1.4s, v11.16b, v12.4b[3]\n" - - // Update prefetch pointers. - "add %[mat_ptr2], %[mat_ptr2], #16\n" - "add %[mat_ptr3], %[mat_ptr3], #16\n" - - // Re-use those vectors for the next row as well. - "ld1 {v13.16b}, [%[mat_ptr1]], #16\n" - ".word 0x4f8de102 // sdot v2.4s, v8.16b, v13.4b[0]\n" - ".word 0x4fade123 // sdot v3.4s, v9.16b, v13.4b[1]\n" - ".word 0x4f8de942 // sdot v2.4s, v10.16b, v13.4b[2]\n" - ".word 0x4fade963 // sdot v3.4s, v11.16b, v13.4b[3]\n" - - // If we're not done with these rows, continue. - "cmp %[mat_ptr0], %[mat_ptr0_end]\n" - "bne 1b\n" // batch_cols_loop - - // Done with the rows, sum the results. - "add v0.4s, v0.4s, v1.4s\n" - "add v2.4s, v2.4s, v3.4s\n" - - // Convert the per-vector sums to floating point. - "scvtf v0.4s, v0.4s\n" - "scvtf v1.4s, v2.4s\n" - - // Fetch scale factors. - "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n" - - // Multiply scale factors times sums. - "fmul v0.4s, v4.4s, v0.4s\n" - "fmul v1.4s, v4.4s, v1.4s\n" - - // Load previous result values. - // The result position is: - // result[batch * m_rows + row] - // Here that is factored into: - // result_ptr = result + row - // *result_ptr = res[0] - // (uint8*)result_ptr += (m_rows * sizeof(float)) - // *result_ptr = res[1] - // ... - // Since we're reading two rows at a time, though, we read both - // result[batch * m_rows + row] - // and - // result[batch * m_rows + row + 1] - "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n" - "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" - "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" - "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" - - // Go back to the starting position (subtract wide_rows * 4). - "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n" - - // Add previous result values. - "fadd v9.4s, v9.4s, v0.4s\n" - "fadd v10.4s, v10.4s, v1.4s\n" - - // Store results. - "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n" - "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" - "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" - "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" - : [mat_ptr0] "+r"(mat_ptr0), [mat_ptr1] "+r"(mat_ptr1), [vec_ptr] "+r"(vec_ptr), - [result_ptr] "+r"(result_ptr), [mat_ptr2] "+r"(mat_ptr2), [mat_ptr3] "+r"(mat_ptr3) - : [mat_ptr0_end] "r"(mat_ptr0_end), [scaling_factors_ptr] "r"(scaling_factors_ptr), - [wide_rows] "r"(wide_rows) - : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", - "v13", "cc", "memory"); + // Zero out the accumulator registers. + "dup v0.4s, wzr\n" + "dup v1.4s, wzr\n" + "dup v2.4s, wzr\n" + "dup v3.4s, wzr\n" + + "1:\n" // batch_cols_loop + + // Read 16 more bytes from a pair of matrix rows. + "ld1 {v12.16b}, [%[mat_ptr0]], #16\n" + + // Prefetch two rows ahead. + "prfm pldl1strm, [%[mat_ptr2]]\n" + "prfm pldl1strm, [%[mat_ptr3]]\n" + + // Read from input vectors 4 times; 64 bytes total. + // Each 16-byte register contains parts of 4 vectors; see the + // shuffle logic above. + + // From Benoit, places to look in the future: + // - Move load instructions further from sdot + // - Switch loop use-then-reload + // - Do partial unrolling to use register space better + "ld1 {v8.16b}, [%[vec_ptr]], #16\n" + ".word 0x4f8ce100 // sdot v0.4s, v8.16b, v12.4b[0]\n" + "ld1 {v9.16b}, [%[vec_ptr]], #16\n" + ".word 0x4face121 // sdot v1.4s, v9.16b, v12.4b[1]\n" + "ld1 {v10.16b}, [%[vec_ptr]], #16\n" + ".word 0x4f8ce940 // sdot v0.4s, v10.16b, v12.4b[2]\n" + "ld1 {v11.16b}, [%[vec_ptr]], #16\n" + ".word 0x4face961 // sdot v1.4s, v11.16b, v12.4b[3]\n" + + // Update prefetch pointers. + "add %[mat_ptr2], %[mat_ptr2], #16\n" + "add %[mat_ptr3], %[mat_ptr3], #16\n" + + // Re-use those vectors for the next row as well. + "ld1 {v13.16b}, [%[mat_ptr1]], #16\n" + ".word 0x4f8de102 // sdot v2.4s, v8.16b, v13.4b[0]\n" + ".word 0x4fade123 // sdot v3.4s, v9.16b, v13.4b[1]\n" + ".word 0x4f8de942 // sdot v2.4s, v10.16b, v13.4b[2]\n" + ".word 0x4fade963 // sdot v3.4s, v11.16b, v13.4b[3]\n" + + // If we're not done with these rows, continue. + "cmp %[mat_ptr0], %[mat_ptr0_end]\n" + "bne 1b\n" // batch_cols_loop + + // Done with the rows, sum the results. + "add v0.4s, v0.4s, v1.4s\n" + "add v2.4s, v2.4s, v3.4s\n" + + // Convert the per-vector sums to floating point. + "scvtf v0.4s, v0.4s\n" + "scvtf v1.4s, v2.4s\n" + + // Fetch scale factors. + "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n" + + // Multiply scale factors times sums. + "fmul v0.4s, v4.4s, v0.4s\n" + "fmul v1.4s, v4.4s, v1.4s\n" + + // Load previous result values. + // The result position is: + // result[batch * m_rows + row] + // Here that is factored into: + // result_ptr = result + row + // *result_ptr = res[0] + // (uint8*)result_ptr += (m_rows * sizeof(float)) + // *result_ptr = res[1] + // ... + // Since we're reading two rows at a time, though, we read both + // result[batch * m_rows + row] + // and + // result[batch * m_rows + row + 1] + "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n" + "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" + "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" + "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" + + // Go back to the starting position (subtract wide_rows * 4). + "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n" + + // Add previous result values. + "fadd v9.4s, v9.4s, v0.4s\n" + "fadd v10.4s, v10.4s, v1.4s\n" + + // Store results. + "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n" + "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" + "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" + "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" + : [ mat_ptr0 ] "+r"(mat_ptr0), [ mat_ptr1 ] "+r"(mat_ptr1), [ vec_ptr ] "+r"(vec_ptr), + [ result_ptr ] "+r"(result_ptr), [ mat_ptr2 ] "+r"(mat_ptr2), [ mat_ptr3 ] "+r"(mat_ptr3) + : [ mat_ptr0_end ] "r"(mat_ptr0_end), [ scaling_factors_ptr ] "r"(scaling_factors_ptr), + [ wide_rows ] "r"(wide_rows) + : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", + "v13", "cc", "memory"); } } @@ -309,9 +309,9 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate(const int8_t *__restr } static void DotprodMatrixBatchFourVectorMultiplyAccumulate( - const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors, - const float *scaling_factors, int n_batch, float *__restrict__ result, - const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums) + const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors, + const float *scaling_factors, int n_batch, float *__restrict__ result, + const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums) { void *shuffled_vectors_free; const int8_t *shuffled_vectors = ShuffleVectors(vectors, n_batch, m_cols, &shuffled_vectors_free); @@ -332,102 +332,102 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate( const int32_t *batch_offsets_ptr = input_offset + batch; const int32_t is_channel_scale_nullptr = per_channel_scale == nullptr; const int32_t is_row_sums_nullptr = row_sums_ptr == nullptr; - asm volatile("dup v0.4s, wzr\n" - "dup v1.4s, wzr\n" - "dup v2.4s, wzr\n" - "dup v3.4s, wzr\n" - // Load zero points. - "ld1 {v7.4s}, [%[batch_offsets_ptr]]\n" - "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n" - // Zero out zero point accumulators. - "dup v14.4s, wzr\n" - "dup v15.4s, wzr\n" - - // Load per channel scales if not null. - "cmp %w[is_channel_scale_nullptr], #0\n" - "bne 1f\n" - "ld1r {v16.4s}, [%[channel_scales_ptr]], #4\n" - "ld1r {v17.4s}, [%[channel_scales_ptr]]\n" - "fmul v16.4s, v16.4s, v4.4s\n" - "fmul v17.4s, v17.4s, v4.4s\n" - "b 2f\n" - "1:\n" - "mov v16.16b, v4.16b\n" - "mov v17.16b, v4.16b\n" - "2:\n" - "ld1 {v12.16b}, [%[mat_ptr0]], #16\n" - "ld1 {v8.16b}, [%[vec_ptr]], #16\n" - ".word 0x4f8ce100 // sdot v0.4s, v8.16b, v12.4b[0]\n" - "ld1 {v9.16b}, [%[vec_ptr]], #16\n" - ".word 0x4face121 // sdot v1.4s, v9.16b, v12.4b[1]\n" - "ld1 {v10.16b}, [%[vec_ptr]], #16\n" - ".word 0x4f8ce940 // sdot v0.4s, v10.16b, v12.4b[2]\n" - "ld1 {v11.16b}, [%[vec_ptr]], #16\n" - ".word 0x4face961 // sdot v1.4s, v11.16b, v12.4b[3]\n" - "ld1 {v13.16b}, [%[mat_ptr1]], #16\n" - ".word 0x4f8de102 // sdot v2.4s, v8.16b, v13.4b[0]\n" - ".word 0x4fade123 // sdot v3.4s, v9.16b, v13.4b[1]\n" - ".word 0x4f8de942 // sdot v2.4s, v10.16b, v13.4b[2]\n" - ".word 0x4fade963 // sdot v3.4s, v11.16b, v13.4b[3]\n" - "cmp %w[is_row_sums_nullptr], #1\n" - "bne 3f\n" - // Accumulate row_sums for zero point calculations. - "saddlp v12.8h, v12.16b\n" - "saddlp v13.8h, v13.16b\n" - "sadalp v14.4s, v12.8h\n" - "sadalp v15.4s, v13.8h\n" - "3:\n" - "cmp %[mat_ptr0], %[mat_ptr0_end]\n" - "bne 2b\n" - "add v0.4s, v0.4s, v1.4s\n" - "add v2.4s, v2.4s, v3.4s\n" - - "cmp %w[is_row_sums_nullptr], #1\n" - "bne 4f\n" - // Calculate zero point offsets. - "addv s14, v14.4s\n" - "addv s15, v15.4s\n" - "dup v14.4s, v14.s[0]\n" - "dup v15.4s, v15.s[0]\n" - "b 5f\n" - "4:\n" - "ld1r {v14.4s}, [%[row_sums_ptr]], #4\n" - "ld1r {v15.4s}, [%[row_sums_ptr]]\n" - "5:\n" - - "mul v14.4s, v14.4s, v7.4s\n" - "mul v15.4s, v15.4s, v7.4s\n" - "sub v0.4s, v0.4s, v14.4s\n" - "sub v2.4s, v2.4s, v15.4s\n" - - "scvtf v0.4s, v0.4s\n" - "scvtf v1.4s, v2.4s\n" - - // Multiply scale. - "fmul v0.4s, v16.4s, v0.4s\n" - "fmul v1.4s, v17.4s, v1.4s\n" - - "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n" - "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" - "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" - "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" - "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n" - "fadd v9.4s, v9.4s, v0.4s\n" - "fadd v10.4s, v10.4s, v1.4s\n" - "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n" - "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" - "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" - "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" - : [mat_ptr0] "+r"(mat_ptr0), [mat_ptr1] "+r"(mat_ptr1), [vec_ptr] "+r"(vec_ptr), - [result_ptr] "+r"(result_ptr), [row_sums_ptr] "+r"(row_sums_ptr) - : [mat_ptr0_end] "r"(mat_ptr0_end), - [scaling_factors_ptr] "r"(scaling_factors_ptr), [wide_rows] "r"(wide_rows), - [channel_scales_ptr] "r"(channel_scales_ptr), - [batch_offsets_ptr] "r"(batch_offsets_ptr), - [is_channel_scale_nullptr] "r"(is_channel_scale_nullptr), - [is_row_sums_nullptr] "r"(is_row_sums_nullptr) - : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", - "v12", "v13", "v14", "v15", "v16", "v17", "w0", "w1", "cc", "memory"); + asm volatile( + "dup v0.4s, wzr\n" + "dup v1.4s, wzr\n" + "dup v2.4s, wzr\n" + "dup v3.4s, wzr\n" + // Load zero points. + "ld1 {v7.4s}, [%[batch_offsets_ptr]]\n" + "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n" + // Zero out zero point accumulators. + "dup v14.4s, wzr\n" + "dup v15.4s, wzr\n" + + // Load per channel scales if not null. + "cmp %w[is_channel_scale_nullptr], #0\n" + "bne 1f\n" + "ld1r {v16.4s}, [%[channel_scales_ptr]], #4\n" + "ld1r {v17.4s}, [%[channel_scales_ptr]]\n" + "fmul v16.4s, v16.4s, v4.4s\n" + "fmul v17.4s, v17.4s, v4.4s\n" + "b 2f\n" + "1:\n" + "mov v16.16b, v4.16b\n" + "mov v17.16b, v4.16b\n" + "2:\n" + "ld1 {v12.16b}, [%[mat_ptr0]], #16\n" + "ld1 {v8.16b}, [%[vec_ptr]], #16\n" + ".word 0x4f8ce100 // sdot v0.4s, v8.16b, v12.4b[0]\n" + "ld1 {v9.16b}, [%[vec_ptr]], #16\n" + ".word 0x4face121 // sdot v1.4s, v9.16b, v12.4b[1]\n" + "ld1 {v10.16b}, [%[vec_ptr]], #16\n" + ".word 0x4f8ce940 // sdot v0.4s, v10.16b, v12.4b[2]\n" + "ld1 {v11.16b}, [%[vec_ptr]], #16\n" + ".word 0x4face961 // sdot v1.4s, v11.16b, v12.4b[3]\n" + "ld1 {v13.16b}, [%[mat_ptr1]], #16\n" + ".word 0x4f8de102 // sdot v2.4s, v8.16b, v13.4b[0]\n" + ".word 0x4fade123 // sdot v3.4s, v9.16b, v13.4b[1]\n" + ".word 0x4f8de942 // sdot v2.4s, v10.16b, v13.4b[2]\n" + ".word 0x4fade963 // sdot v3.4s, v11.16b, v13.4b[3]\n" + "cmp %w[is_row_sums_nullptr], #1\n" + "bne 3f\n" + // Accumulate row_sums for zero point calculations. + "saddlp v12.8h, v12.16b\n" + "saddlp v13.8h, v13.16b\n" + "sadalp v14.4s, v12.8h\n" + "sadalp v15.4s, v13.8h\n" + "3:\n" + "cmp %[mat_ptr0], %[mat_ptr0_end]\n" + "bne 2b\n" + "add v0.4s, v0.4s, v1.4s\n" + "add v2.4s, v2.4s, v3.4s\n" + + "cmp %w[is_row_sums_nullptr], #1\n" + "bne 4f\n" + // Calculate zero point offsets. + "addv s14, v14.4s\n" + "addv s15, v15.4s\n" + "dup v14.4s, v14.s[0]\n" + "dup v15.4s, v15.s[0]\n" + "b 5f\n" + "4:\n" + "ld1r {v14.4s}, [%[row_sums_ptr]], #4\n" + "ld1r {v15.4s}, [%[row_sums_ptr]]\n" + "5:\n" + + "mul v14.4s, v14.4s, v7.4s\n" + "mul v15.4s, v15.4s, v7.4s\n" + "sub v0.4s, v0.4s, v14.4s\n" + "sub v2.4s, v2.4s, v15.4s\n" + + "scvtf v0.4s, v0.4s\n" + "scvtf v1.4s, v2.4s\n" + + // Multiply scale. + "fmul v0.4s, v16.4s, v0.4s\n" + "fmul v1.4s, v17.4s, v1.4s\n" + + "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n" + "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" + "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" + "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" + "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n" + "fadd v9.4s, v9.4s, v0.4s\n" + "fadd v10.4s, v10.4s, v1.4s\n" + "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n" + "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n" + "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n" + "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n" + : [ mat_ptr0 ] "+r"(mat_ptr0), [ mat_ptr1 ] "+r"(mat_ptr1), [ vec_ptr ] "+r"(vec_ptr), + [ result_ptr ] "+r"(result_ptr), [ row_sums_ptr ] "+r"(row_sums_ptr) + : [ mat_ptr0_end ] "r"(mat_ptr0_end), [ scaling_factors_ptr ] "r"(scaling_factors_ptr), + [ wide_rows ] "r"(wide_rows), [ channel_scales_ptr ] "r"(channel_scales_ptr), + [ batch_offsets_ptr ] "r"(batch_offsets_ptr), + [ is_channel_scale_nullptr ] "r"(is_channel_scale_nullptr), + [ is_row_sums_nullptr ] "r"(is_row_sums_nullptr) + : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", + "v13", "v14", "v15", "v16", "v17", "w0", "w1", "cc", "memory"); } } @@ -458,9 +458,9 @@ static void DotprodMatrixBatchFourVectorMultiplyAccumulate( // We don't use this kernel when n_batch = 1 because the baseline kernel // is fine for that case. inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate( - const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors, - const float *scaling_factors, int n_batch, float *__restrict__ result, - const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums) + const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors, + const float *scaling_factors, int n_batch, float *__restrict__ result, + const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums) { const int kWeightsPerUint32 = 4; @@ -475,14 +475,14 @@ inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate( void *padded_vectors_free; const int padded_vectors_size = batch_round_up * m_cols; int8_t *padded_vectors = reinterpret_cast( - aligned_alloc(kWeightsPerUint32, padded_vectors_size, &padded_vectors_free)); + aligned_alloc(kWeightsPerUint32, padded_vectors_size, &padded_vectors_free)); memset(padded_vectors, 0, padded_vectors_size); void *padded_result_free; const int result_size = n_batch * m_rows * sizeof(float); const int padded_result_size = batch_round_up * m_rows * sizeof(float); float *padded_result = reinterpret_cast( - aligned_alloc(kWeightsPerUint32, padded_result_size, &padded_result_free)); + aligned_alloc(kWeightsPerUint32, padded_result_size, &padded_result_free)); memcpy(padded_result, result, result_size); memset(reinterpret_cast(padded_result) + result_size, 0, padded_result_size - result_size); @@ -494,7 +494,7 @@ inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate( void *padded_scaling_factors_free; const int padded_scaling_factors_size = batch_round_up * sizeof(float); float *padded_scaling_factors = reinterpret_cast( - aligned_alloc(kWeightsPerUint32, padded_scaling_factors_size, &padded_scaling_factors_free)); + aligned_alloc(kWeightsPerUint32, padded_scaling_factors_size, &padded_scaling_factors_free)); assert(static_cast(n_batch * sizeof(float)) <= padded_scaling_factors_size); assert(static_cast(batch_round_up * sizeof(float)) <= padded_scaling_factors_size); memset(padded_scaling_factors, 0, batch_round_up * sizeof(float)); @@ -505,7 +505,7 @@ inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate( void *padded_input_offset_free; const int padded_input_offset_size = batch_round_up * sizeof(int32_t); int32_t *padded_input_offset = reinterpret_cast( - aligned_alloc(kWeightsPerUint32, padded_input_offset_size, &padded_input_offset_free)); + aligned_alloc(kWeightsPerUint32, padded_input_offset_size, &padded_input_offset_free)); assert(static_cast(n_batch * sizeof(int32_t)) <= padded_input_offset_size); assert(static_cast(batch_round_up * sizeof(int32_t)) <= padded_input_offset_size); memset(padded_input_offset, 0, batch_round_up * sizeof(int32_t)); @@ -513,8 +513,8 @@ inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate( // Call the main kernel. DotprodMatrixBatchFourVectorMultiplyAccumulate( - matrix, m_rows, m_cols, padded_vectors, padded_scaling_factors, batch_round_up, - padded_result, per_channel_scale, padded_input_offset, row_sums); + matrix, m_rows, m_cols, padded_vectors, padded_scaling_factors, batch_round_up, padded_result, + per_channel_scale, padded_input_offset, row_sums); free(padded_input_offset_free); } @@ -533,13 +533,13 @@ inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate( } inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate( - const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors, - const float *scaling_factors, int n_batch, float *__restrict__ result) + const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors, + const float *scaling_factors, int n_batch, float *__restrict__ result) { DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate( - matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result, - /*per_channel_scale=*/nullptr, /*input_offset=*/nullptr, - /*row_sums=*/nullptr); + matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result, + /*per_channel_scale=*/nullptr, /*input_offset=*/nullptr, + /*row_sums=*/nullptr); } #endif // __aarch64__ @@ -736,7 +736,7 @@ inline void NeonSymmetricQuantizeFloats(const float *values, const int size, for (int i = postamble_start; i < size; ++i) { const int32_t quantized_value = - static_cast(std::round(scaling_factor_inv * values[i])); + static_cast(std::round(scaling_factor_inv * values[i])); quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value)); } } @@ -830,7 +830,7 @@ inline void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ m // Here the assumption is that each buffer is 4-byte aligned. Otherwise, // performance may suffer significantly. assert( // NOLINT - ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0); + ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0); const int8x16_t s1_8x16 = vld1q_s8((const int8_t *)(aligned_vec + col)); const int8x16_t s2_8x16 = vld1q_s8((const int8_t *)(row_ptr + col)); // Multiply the low bits (i.e. the lower 8 8bit numbers in the @@ -855,7 +855,7 @@ inline void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ m // Here the assumption is that each buffer is 4-bytes aligned. // Otherwise, performance may suffer significantly. assert( // NOLINT - ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0); + ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0); const int8x8_t s1_8x8 = vld1_s8((const int8_t *)(aligned_vec + col)); const int8x8_t s2_8x8 = vld1_s8((const int8_t *)(row_ptr + col)); const int16x8_t prod_16x8 = vmull_s8(s1_8x8, s2_8x8); @@ -952,7 +952,7 @@ inline void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ m const float32x4_t float_val1 = vcvtq_f32_s32(scratch_val1); const float32x4_t result0 = vmlaq_f32(vld1q_f32(result), float_val0, scaling_factor0); const float32x4_t result1 = - vmlaq_f32(vld1q_f32(result + 4 * result_stride), float_val1, scaling_factor1); + vmlaq_f32(vld1q_f32(result + 4 * result_stride), float_val1, scaling_factor1); vst1q_f32(result, result0); vst1q_f32(result + 4 * result_stride, result1); } diff --git a/compute/cker/include/cker/PortableTensorUtils.h b/compute/cker/include/cker/PortableTensorUtils.h index 3b3b27f..2a58a2e 100644 --- a/compute/cker/include/cker/PortableTensorUtils.h +++ b/compute/cker/include/cker/PortableTensorUtils.h @@ -138,7 +138,7 @@ inline void PortableSymmetricQuantizeFloats(const float *values, const int size, for (int i = 0; i < size; ++i) { const int32_t quantized_value = - static_cast(std::round(values[i] * scaling_factor_inv)); + static_cast(std::round(values[i] * scaling_factor_inv)); // Clamp: just in case some odd numeric offset. quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value)); } diff --git a/compute/cker/include/cker/Types.h b/compute/cker/include/cker/Types.h index acb6cac..10f3ecb 100644 --- a/compute/cker/include/cker/Types.h +++ b/compute/cker/include/cker/Types.h @@ -389,6 +389,11 @@ struct SpaceToDepthParams int32_t block_size; }; +struct LeakyReluParams +{ + float alpha; +}; + enum class Order { kColMajor, @@ -475,9 +480,9 @@ enum class QuantizationFlavor // (only those that need perchannel quantization do). template ::value - ? QuantizationFlavor::kFloatingPoint - : QuantizationFlavor::kIntegerWithUniformMultiplier> + std::is_floating_point::value + ? QuantizationFlavor::kFloatingPoint + : QuantizationFlavor::kIntegerWithUniformMultiplier> struct GemmParams { // Only for non-floating-point cases. The fixed-point part (i.e. the mantissa) @@ -504,12 +509,12 @@ struct GemmParams const AccumScalar *bias = nullptr; // min clamp bound of destination values. DstScalar clamp_min = std::is_floating_point::value - ? -std::numeric_limits::infinity() - : std::numeric_limits::lowest(); + ? -std::numeric_limits::infinity() + : std::numeric_limits::lowest(); // max clamp bound of destination values. DstScalar clamp_max = std::is_floating_point::value - ? std::numeric_limits::infinity() - : std::numeric_limits::max(); + ? std::numeric_limits::infinity() + : std::numeric_limits::max(); }; // Validates self-consistency of GemmParams. diff --git a/compute/cker/include/cker/Utils.h b/compute/cker/include/cker/Utils.h index 2abb998..f73c015 100644 --- a/compute/cker/include/cker/Utils.h +++ b/compute/cker/include/cker/Utils.h @@ -88,8 +88,8 @@ inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multip int left_shift = shift > 0 ? shift : 0; int right_shift = shift > 0 ? 0 : -shift; return gemmlowp::RoundingDivideByPOT( - gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), - right_shift); + gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), + right_shift); } inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(int32_t x, int32_t quantized_multiplier, @@ -103,7 +103,7 @@ inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x, int left_shift) { return gemmlowp::RoundingDivideByPOT( - gemmlowp::SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift); + gemmlowp::SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift); } inline int NodeOffset(int b, int h, int w, int height, int width) @@ -162,7 +162,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift, const F3 fixedpoint_input = F3::FromRaw(input >> 1); const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input); const F3 fixedpoint_half_three = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5); + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5); // Newton-Raphson iteration // Naive unoptimized starting guess: x = 1 F3 x = F3::One(); @@ -173,7 +173,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift, x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3); } const F0 fixedpoint_half_sqrt_2 = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.); + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.); x = x * fixedpoint_half_sqrt_2; *output_inv_sqrt = x.raw(); if (*output_shift < 0) @@ -429,7 +429,7 @@ template class SequentialTensorWriter { public: SequentialTensorWriter(const T *input_data, T *output_data) - : input_data_(input_data), output_ptr_(output_data) + : input_data_(input_data), output_ptr_(output_data) { } diff --git a/compute/cker/include/cker/eigen/EigenSupport.h b/compute/cker/include/cker/eigen/EigenSupport.h index 49c3421..e3b1099 100644 --- a/compute/cker/include/cker/eigen/EigenSupport.h +++ b/compute/cker/include/cker/eigen/EigenSupport.h @@ -39,17 +39,17 @@ namespace eigen_support // library. typedef Eigen::TensorMap, Eigen::Aligned> - EigenMatrix; + EigenMatrix; typedef Eigen::TensorMap, Eigen::Aligned> - ConstEigenMatrix; + ConstEigenMatrix; typedef Eigen::TensorMap, Eigen::Aligned> - EigenTensor; + EigenTensor; typedef Eigen::TensorMap, Eigen::Aligned> - ConstEigenTensor; + ConstEigenTensor; // Utility functions we need for the EigenTensor API. template struct MatMulConvFunctor diff --git a/compute/cker/include/cker/eigen/Utils.h b/compute/cker/include/cker/eigen/Utils.h index f9c7063..40cb854 100644 --- a/compute/cker/include/cker/eigen/Utils.h +++ b/compute/cker/include/cker/eigen/Utils.h @@ -36,9 +36,9 @@ namespace cker // Eigen::Map> template using VectorMap = typename std::conditional< - std::is_const::value, - Eigen::Map::type, Eigen::Dynamic, 1>>, - Eigen::Map>>::type; + std::is_const::value, + Eigen::Map::type, Eigen::Dynamic, 1>>, + Eigen::Map>>::type; template VectorMap MapAsVector(Scalar *data, const Shape &shape) { @@ -51,10 +51,10 @@ template VectorMap MapAsVector(Scalar *data, const Sha // above also applies here. template using MatrixMap = typename std::conditional< - std::is_const::value, - Eigen::Map::type, Eigen::Dynamic, - Eigen::Dynamic>>, - Eigen::Map>>::type; + std::is_const::value, + Eigen::Map< + const Eigen::Matrix::type, Eigen::Dynamic, Eigen::Dynamic>>, + Eigen::Map>>::type; template MatrixMap MapAsMatrixWithLastDimAsRows(Scalar *data, const Shape &shape) diff --git a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h index dc3e255..9d4fd2e 100644 --- a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h +++ b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h @@ -49,20 +49,19 @@ class TensorEvaluatorHasPartialPacket public: template static auto functionExistsSfinae( - typename std::enable_if< - unpacket_traits::masked_load_available && - std::is_same< - PacketT, - decltype(std::declval().template partialPacket( - std::declval(), - std::declval::mask_t>()))>::value>::type *) - -> std::true_type; + typename std::enable_if< + unpacket_traits::masked_load_available && + std::is_same().template partialPacket( + std::declval(), + std::declval::mask_t>()))>::value>::type *) + -> std::true_type; template static auto functionExistsSfinae(...) -> std::false_type; typedef decltype( - functionExistsSfinae(nullptr)) status; + functionExistsSfinae(nullptr)) status; static constexpr bool value = status::value; }; @@ -71,9 +70,9 @@ public: // [from, to) range. If the mask bit is 1, element will be loaded/stored. template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - typename std::enable_if::masked_load_available, - typename unpacket_traits::mask_t>::type - mask(int from, int to) + typename std::enable_if::masked_load_available, + typename unpacket_traits::mask_t>::type + mask(int from, int to) { const Index packet_size = internal::unpacket_traits::size; eigen_assert(0 <= from && to <= (packet_size + 1) && from < to); diff --git a/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h b/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h index 92e1614..c931ac5 100644 --- a/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h +++ b/compute/cker/include/cker/eigen/eigen_spatial_convolutions-inl.h @@ -62,30 +62,27 @@ template class TensorContractionInputMapper< - Scalar_, Index, Side, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> + Scalar_, Index, Side, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> { public: typedef Scalar_ Scalar; typedef TensorContractionInputMapper< - Scalar, Index, Side, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> - Self; + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> + Self; typedef TensorContractionSubMapper< - Scalar, Index, Side, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> - SubMapper; + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> + SubMapper; typedef SubMapper VectorMapper; typedef SubMapper LinearMapper; @@ -95,11 +92,11 @@ public: EIGEN_DEVICE_FUNC TensorContractionInputMapper( - const TensorEvaluator< - const TensorReshapingOp>, - Device> &tensor, - const nocontract_t &, const nocontract_t &, const contract_t &, const contract_t &) - : m_impl(tensor.impl().impl()) + const TensorEvaluator< + const TensorReshapingOp>, Device> + &tensor, + const nocontract_t &, const nocontract_t &, const contract_t &, const contract_t &) + : m_impl(tensor.impl().impl()) { Index patch_rows; Index patch_depth; @@ -167,7 +164,7 @@ public: EIGEN_DEVICE_FUNC TensorContractionInputMapper(const TensorContractionInputMapper &base_mapper) - : m_impl(base_mapper.m_impl) + : m_impl(base_mapper.m_impl) { m_patch_cols = base_mapper.m_patch_cols; m_num_patches = base_mapper.m_num_patches; @@ -280,11 +277,10 @@ public: private: friend class TensorContractionSubMapper< - Scalar, Index, Side, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>; + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>; // Load coefficient from a patch specified by the "within patch offset" // (patchId) and the precomputed indices of the first element of the patch. @@ -298,14 +294,14 @@ private: const Index colOffset = patchOffset / m_fastColStride; const Index inputCol = colIndex + colOffset * m_in_col_strides; const Index origInputCol = (m_patch_col_inflate_strides == 1) - ? inputCol - : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); + ? inputCol + : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); const Index rowOffset = patchOffset - colOffset * m_colStride; const Index inputRow = rowIndex + rowOffset * m_in_row_strides; const Index origInputRow = (m_patch_row_inflate_strides == 1) - ? inputRow - : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); + ? inputRow + : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); if (origInputCol < 0 || origInputRow < 0 || origInputCol >= m_inputCols || origInputRow >= m_inputRows || (inputCol != origInputCol * m_patch_col_inflate_strides) || (inputRow != origInputRow * m_patch_row_inflate_strides)) @@ -314,7 +310,7 @@ private: } const Index depth = patchId - patchOffset * patchDepth(); const Index inputIndex = - depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex; + depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex; return m_impl.coeff(inputIndex); } @@ -338,7 +334,7 @@ private: } const Index depth = patchId - patchOffset * patchDepth(); const Index inputIndex = - depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex; + depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex; return m_impl.coeff(inputIndex); } @@ -390,7 +386,7 @@ private: // span[0] all the way upto (and including) span[1]. const Index depth = patchId - patchOffsets[0] * patchDepth(); const Index inputIndex = - depth + inputRows[0] * m_rowInputStride + inputCol * m_colInputStride + otherIndex; + depth + inputRows[0] * m_rowInputStride + inputCol * m_colInputStride + otherIndex; return m_impl.template partialPacket(inputIndex - span[0], mask(span[0], span[1] + 1)); } @@ -445,10 +441,10 @@ private: // Load partial packets and do bit-wise OR to generate required packet return internal::por( - loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[0], spans[0], - patchOffsets2Cols[0], colOffsets[0]), - loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[1], spans[1], - patchOffsets2Cols[1], colOffsets[1])); + loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[0], spans[0], + patchOffsets2Cols[0], colOffsets[0]), + loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[1], spans[1], + patchOffsets2Cols[1], colOffsets[1])); } // Helper function to load a packet that is present in a single columns. @@ -477,7 +473,7 @@ private: // no padding const Index depth = patchId - patchOffsets[0] * patchDepth(); const Index inputIndex = - depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex; + depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex; return m_impl.template packet(inputIndex); } return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex); @@ -490,7 +486,7 @@ private: // load. template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if< - !TensorEvaluatorHasPartialPacket::value, PacketT>::type + !TensorEvaluatorHasPartialPacket::value, PacketT>::type loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { const Index packetSize = internal::unpacket_traits::size; @@ -538,7 +534,7 @@ private: // packets. template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if< - TensorEvaluatorHasPartialPacket::value, PacketT>::type + TensorEvaluatorHasPartialPacket::value, PacketT>::type loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { const Index packetSize = internal::unpacket_traits::size; @@ -604,7 +600,7 @@ private: // no padding const Index depth = patchId - patchOffset * patchDepth(); const Index inputIndex = - depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex; + depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex; return m_impl.template packet(inputIndex); } @@ -627,10 +623,10 @@ private: computeBaseIndices(Index patchIndex, Index &rowIndex, Index &colIndex, Index &otherIndex) const { const size_t NumInputDims = - array_size::Dimensions>::value; + array_size::Dimensions>::value; otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches; const Index patch2DIndex = - (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches); + (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches); otherIndex *= m_patchInputStride; colIndex = patch2DIndex / m_fastOutputRows; rowIndex = patch2DIndex - colIndex * m_outputRows; @@ -689,31 +685,28 @@ template class TensorContractionSubMapper< - Scalar, Index, Side, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> { public: typedef typename packet_traits::type Packet; typedef typename packet_traits::half HalfPacket; typedef TensorContractionInputMapper< - Scalar, Index, Side, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> - ParentMapper; + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> + ParentMapper; typedef TensorContractionSubMapper< - Scalar, Index, Side, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> - Self; + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> + Self; typedef Self LinearMapper; @@ -722,16 +715,16 @@ public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const ParentMapper &base_mapper, Index vert_offset, Index horiz_offset) - : m_depth_offset(vert_offset), m_col_offset(horiz_offset), m_base_mapper(base_mapper) + : m_depth_offset(vert_offset), m_col_offset(horiz_offset), m_base_mapper(base_mapper) { m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const Self &base_mapper, Index vert_offset, Index horiz_offset) - : m_depth_offset(vert_offset + base_mapper.m_depth_offset), - m_col_offset(horiz_offset + base_mapper.m_col_offset), - m_base_mapper(base_mapper.m_base_mapper) + : m_depth_offset(vert_offset + base_mapper.m_depth_offset), + m_col_offset(horiz_offset + base_mapper.m_col_offset), + m_base_mapper(base_mapper.m_base_mapper) { m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex); } @@ -766,7 +759,7 @@ public: { typedef decltype(m_base_mapper.m_impl) TensorEvaluatorT; return m_base_mapper.template loadPacketStandard( - i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); + i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); } template EIGEN_DEVICE_FUNC bool aligned(Index) const { return false; } @@ -781,7 +774,7 @@ public: EIGEN_ALWAYS_INLINE Index maxCol(const Index peeled_k) const { const Index max_col = - (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1)) / fastPatchColStride(); + (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1)) / fastPatchColStride(); return std::min(1 + max_col, patchCols()); } @@ -789,8 +782,8 @@ public: EIGEN_ALWAYS_INLINE Index maxRow(const Index peeled_k, const Index col) const { const Index max_row = - (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1) - col * patchColStride()) / - fastPatchRowStride(); + (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1) - col * patchColStride()) / + fastPatchRowStride(); return std::min(1 + max_row, patchRows()); } @@ -862,7 +855,7 @@ public: } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if< - TensorEvaluatorHasPartialPacket::value, PacketT>::type + TensorEvaluatorHasPartialPacket::value, PacketT>::type partialPacketNoPadding(const Index depth, const Index baseIndex, Index num_coeffs) const { const Index inputIndex = depth + baseIndex; @@ -913,8 +906,8 @@ public: const Index input_row = m_rowIndex + row * m_base_mapper.m_in_row_strides; *orig_row = (m_base_mapper.m_patch_row_inflate_strides == 1) - ? input_row - : ((input_row >= 0) ? (input_row / m_base_mapper.m_fastInputRowStride) : 0); + ? input_row + : ((input_row >= 0) ? (input_row / m_base_mapper.m_fastInputRowStride) : 0); return (*orig_row < 0 || *orig_row >= m_base_mapper.m_inputRows) || (input_row != *orig_row * m_base_mapper.m_patch_row_inflate_strides); @@ -932,8 +925,8 @@ public: const Index input_col = m_colIndex + col * m_base_mapper.m_in_col_strides; *orig_col = (m_base_mapper.m_patch_col_inflate_strides == 1) - ? input_col - : ((input_col >= 0) ? (input_col / m_base_mapper.m_fastInputColStride) : 0); + ? input_col + : ((input_col >= 0) ? (input_col / m_base_mapper.m_fastInputColStride) : 0); return (*orig_col < 0 || *orig_col >= m_base_mapper.m_inputCols) || (input_col != *orig_col * m_base_mapper.m_patch_col_inflate_strides); @@ -1033,23 +1026,20 @@ template struct gemm_pack_rhs< - Scalar, Index, - TensorContractionSubMapper< - Scalar, Index, Rhs, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, - Alignment>, - nr, ColMajor, false, false> + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>, + nr, ColMajor, false, false> { typedef TensorContractionSubMapper< - Scalar, Index, Rhs, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> - SubMapper; + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> + SubMapper; typedef SubMapper DataMapper; typedef typename packet_traits::type Packet; @@ -1159,7 +1149,7 @@ struct gemm_pack_rhs< const Index idx3 = dm3.baseIndex(r, c); const Index start_depth = - ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0; + ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0; const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth); eigen_assert((max_depth - start_depth) % packet_size == 0); @@ -1248,22 +1238,20 @@ template struct gemm_pack_rhs< - Scalar, Index, - TensorContractionSubMapper< - Scalar, Index, Rhs, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>, - nr, ColMajor, false, false> + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>, + nr, ColMajor, false, false> { typedef TensorContractionSubMapper< - Scalar, Index, Rhs, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment> - SubMapper; + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment> + SubMapper; typedef SubMapper DataMapper; typedef typename packet_traits::type Packet; @@ -1378,7 +1366,7 @@ struct gemm_pack_rhs< const Index idx3 = dm3.baseIndex(r, c); const Index start_depth = - ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0; + ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0; const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth); eigen_assert((max_depth - start_depth) % packet_size == 0); @@ -1472,22 +1460,20 @@ template struct gemm_pack_rhs< - Scalar, Index, - TensorContractionSubMapper< - Scalar, Index, Rhs, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>, - nr, ColMajor, false, false> + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>, + nr, ColMajor, false, false> { typedef TensorContractionSubMapper< - Scalar, Index, Rhs, - TensorEvaluator< - const TensorReshapingOp>, - Device>, - nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> - SubMapper; + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp>, Device>, + nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> + SubMapper; typedef SubMapper DataMapper; EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -1582,27 +1568,25 @@ struct gemm_pack_rhs< */ template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename internal::conditional< - internal::traits::Layout == ColMajor, - TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorContractionOp< - const array::Index>, 1>, - const TensorReshapingOp::Index, 2>, - const Kernel>, - const TensorReshapingOp::Index, 2>, - const TensorImagePatchOp>, - const OutputKernel>>, - TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorContractionOp< - const array::Index>, 1>, - const TensorReshapingOp::Index, 2>, - const TensorImagePatchOp>, - const TensorReshapingOp::Index, 2>, - const Kernel>, - const OutputKernel>>>::type + internal::traits::Layout == ColMajor, + TensorReshapingOp< + const DSizes::Index, internal::traits::NumDimensions>, + const TensorContractionOp< + const array::Index>, 1>, + const TensorReshapingOp::Index, 2>, + const Kernel>, + const TensorReshapingOp::Index, 2>, + const TensorImagePatchOp>, + const OutputKernel>>, + TensorReshapingOp< + const DSizes::Index, internal::traits::NumDimensions>, + const TensorContractionOp< + const array::Index>, 1>, + const TensorReshapingOp::Index, 2>, + const TensorImagePatchOp>, + const TensorReshapingOp::Index, 2>, + const Kernel>, + const OutputKernel>>>::type SpatialConvolution(const Input &input, const Kernel &kernel, const Index row_stride = 1, const Index col_stride = 1, const PaddingType padding_type = PADDING_SAME, const Index row_in_stride = 1, const Index col_in_stride = 1, @@ -1612,11 +1596,11 @@ SpatialConvolution(const Input &input, const Kernel &kernel, const Index row_str typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex>> - in(input); + in(input); TensorRef< - Tensor::Scalar, internal::traits::NumDimensions, - internal::traits::Layout, TensorIndex>> - kern(kernel); + Tensor::Scalar, internal::traits::NumDimensions, + internal::traits::Layout, TensorIndex>> + kern(kernel); EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -1735,46 +1719,46 @@ SpatialConvolution(const Input &input, const Kernel &kernel, const Index row_str } if (padding_explicit) { - return choose( - Cond::Layout == ColMajor>(), - kernel.reshape(kernel_dims) - .contract(input - .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, - row_in_stride, col_in_stride, - /*row_inflate_stride=*/1, - /*col_inflate_stride=*/1, padding_top, - padding_bottom, padding_left, padding_right, - /*padding_value=*/0) - .reshape(pre_contract_dims), - contract_dims, output_kernel) - .reshape(post_contract_dims), - input - .extract_image_patches( - kernelRows, kernelCols, row_stride, col_stride, row_in_stride, col_in_stride, - /*row_inflate_stride=*/1, - /*col_inflate_stride=*/1, padding_top, padding_bottom, padding_left, padding_right, - /*padding_value=*/0) - .reshape(pre_contract_dims) - .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel) - .reshape(post_contract_dims)); + return choose(Cond::Layout == ColMajor>(), + kernel.reshape(kernel_dims) + .contract(input + .extract_image_patches(kernelRows, kernelCols, row_stride, + col_stride, row_in_stride, col_in_stride, + /*row_inflate_stride=*/1, + /*col_inflate_stride=*/1, padding_top, + padding_bottom, padding_left, padding_right, + /*padding_value=*/0) + .reshape(pre_contract_dims), + contract_dims, output_kernel) + .reshape(post_contract_dims), + input + .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, + row_in_stride, col_in_stride, + /*row_inflate_stride=*/1, + /*col_inflate_stride=*/1, padding_top, padding_bottom, + padding_left, padding_right, + /*padding_value=*/0) + .reshape(pre_contract_dims) + .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel) + .reshape(post_contract_dims)); } else { return choose( - Cond::Layout == ColMajor>(), - kernel.reshape(kernel_dims) - .contract(input - .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, - row_in_stride, col_in_stride, padding_type) - .reshape(pre_contract_dims), - contract_dims, output_kernel) - .reshape(post_contract_dims), - input - .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, row_in_stride, - col_in_stride, padding_type) - .reshape(pre_contract_dims) - .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel) - .reshape(post_contract_dims)); + Cond::Layout == ColMajor>(), + kernel.reshape(kernel_dims) + .contract(input + .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, + row_in_stride, col_in_stride, padding_type) + .reshape(pre_contract_dims), + contract_dims, output_kernel) + .reshape(post_contract_dims), + input + .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, row_in_stride, + col_in_stride, padding_type) + .reshape(pre_contract_dims) + .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel) + .reshape(post_contract_dims)); } } diff --git a/compute/cker/include/cker/operation/AveragePool.h b/compute/cker/include/cker/operation/AveragePool.h index 6149caf..a70e39c 100644 --- a/compute/cker/include/cker/operation/AveragePool.h +++ b/compute/cker/include/cker/operation/AveragePool.h @@ -73,10 +73,10 @@ void AveragePool(const PoolParams ¶ms, const Shape &input_shape, cons int hpad = h + params.padding_values.height; int wpad = w + params.padding_values.width; int h_start = - (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1; + (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1; int h_end = std::min(hpad / stride_height + 1, output_height); int w_start = - (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1; + (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1; int w_end = std::min(wpad / stride_width + 1, output_width); // compute elementwise sum for (int ph = h_start; ph < h_end; ++ph) @@ -146,11 +146,11 @@ inline void AveragePool16(const PoolParams ¶ms, const Shape &input_shape, const int filter_y_start = std::max(0, -in_y_origin); const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin); const int filter_count = - (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start); + (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start); memset(acc, 0, tranche_depth * sizeof(acc[0])); const uint8_t *input_ptr = - input_data + depth_base + - depth * (in_x_origin + input_width * (in_y_origin + input_height * batch)); + input_data + depth_base + + depth * (in_x_origin + input_width * (in_y_origin + input_height * batch)); for (int fy = filter_y_start; fy < filter_y_end; fy++) { const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start); @@ -283,11 +283,11 @@ inline void AveragePool32(const PoolParams ¶ms, const Shape &input_shape, const int filter_y_start = std::max(0, -in_y_origin); const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin); const int filter_count = - (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start); + (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start); memset(acc, 0, tranche_depth * sizeof(acc[0])); const uint8_t *input_ptr = - input_data + depth_base + - depth * (in_x_origin + input_width * (in_y_origin + input_height * batch)); + input_data + depth_base + + depth * (in_x_origin + input_width * (in_y_origin + input_height * batch)); for (int fy = filter_y_start; fy < filter_y_end; fy++) { const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start); diff --git a/compute/cker/include/cker/operation/BatchToSpaceND.h b/compute/cker/include/cker/operation/BatchToSpaceND.h index e33b2fb..980ad48 100644 --- a/compute/cker/include/cker/operation/BatchToSpaceND.h +++ b/compute/cker/include/cker/operation/BatchToSpaceND.h @@ -43,7 +43,7 @@ inline void GetIndexRange(int spatial_index_dim, int block_shape_dim, int input_ // Similarly, (*end_index) * block_shape_dim is rounded up too (note that // end_index is exclusive). *end_index = - std::min(input_dim, (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim); + std::min(input_dim, (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim); } template @@ -116,7 +116,7 @@ inline void BatchToSpaceND(const Shape &unextended_input1_shape, const T *input1 for (int in_w = in_w_start; in_w < in_w_end; ++in_w) { const int out_w = - in_w * block_shape_width + spatial_offset % block_shape_width - crops_left; + in_w * block_shape_width + spatial_offset % block_shape_width - crops_left; assert(out_w >= 0); assert(out_w < output_width); T *out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0); diff --git a/compute/cker/include/cker/operation/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/BinaryArithmeticOps.h index d9917a9..fe5f877 100644 --- a/compute/cker/include/cker/operation/BinaryArithmeticOps.h +++ b/compute/cker/include/cker/operation/BinaryArithmeticOps.h @@ -139,7 +139,7 @@ inline bool ProcessBroadcastShapes(const Shape &shape0, const Shape &shape1, // From this point it is assumed contractually that corresponding dimensions // in shape0 and shape1 are either (a) equal or (b) one or other equals 1. const bool swap_inputs = - params->broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast; + params->broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast; const Shape *shape_a = swap_inputs ? &extended_shape1 : &extended_shape0; const Shape *shape_b = swap_inputs ? &extended_shape0 : &extended_shape1; @@ -281,8 +281,8 @@ inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam ¶ms, const S break; case nnfw::cker::BinaryArithmeticOpType::MUL: optimized::BroadcastMulDispatchQuant8( - params, input1_shape, const_cast(input1_data), input2_shape, - const_cast(input2_data), output_shape, output_data); + params, input1_shape, const_cast(input1_data), input2_shape, + const_cast(input2_data), output_shape, output_data); break; case nnfw::cker::BinaryArithmeticOpType::DIV: case nnfw::cker::BinaryArithmeticOpType::POW: @@ -320,8 +320,8 @@ inline void BroadcastBinaryArithmeticOp(BinaryArithmeticOpParam ¶ms, const S break; case nnfw::cker::BinaryArithmeticOpType::POW: reference::BroadcastBinaryArithmeticOpSlow( - params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, - GetBinaryArtithmeticFn()); + params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, + GetBinaryArtithmeticFn()); break; default: assert(false); diff --git a/compute/cker/include/cker/operation/Common.h b/compute/cker/include/cker/operation/Common.h index d69b38a..24d4cc4 100644 --- a/compute/cker/include/cker/operation/Common.h +++ b/compute/cker/include/cker/operation/Common.h @@ -82,7 +82,7 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, const for (; i < bias_size; i++) { array_ptr[i] = - ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max); + ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max); } } #else // not NEON @@ -91,7 +91,7 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, const for (int i = 0; i < bias_size; i++) { array_data[array_offset + i] = ActivationFunctionWithMinMax( - array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max); + array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max); } } #endif diff --git a/compute/cker/include/cker/operation/Comparison.h b/compute/cker/include/cker/operation/Comparison.h index 47eb603..ac6af84 100644 --- a/compute/cker/include/cker/operation/Comparison.h +++ b/compute/cker/include/cker/operation/Comparison.h @@ -42,7 +42,7 @@ inline void ComparisonImpl(const Shape &input1_shape, const T *input1_data, const Shape &output_shape, bool *output_data) { const int64_t flatsize = // number of data.... - MatchingFlatSize(input1_shape, input2_shape, output_shape); + MatchingFlatSize(input1_shape, input2_shape, output_shape); for (int64_t i = 0; i < flatsize; ++i) { output_data[i] = F(input1_data[i], input2_data[i]); @@ -79,9 +79,9 @@ inline void ComparisonWithScaling(ComparisonParams ¶ms, const Shape &input1_ const int32_t shifted_input1_val = input1_val * (1 << left_shift); const int32_t shifted_input2_val = input2_val * (1 << left_shift); const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, input1_multiplier, input1_shift); + shifted_input1_val, input1_multiplier, input1_shift); const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, input2_multiplier, input2_shift); + shifted_input2_val, input2_multiplier, input2_shift); output_data[i] = F(scaled_input1_val, scaled_input2_val); } } @@ -111,8 +111,8 @@ BroadcastComparison4DSlowImpl(const Shape &unextended_input1_shape, const T *inp for (int c = 0; c < output_shape.Dims(3); ++c) { output_data[Offset(output_shape, b, y, x, c)] = - F(input1_data[SubscriptToIndex(desc1, b, y, x, c)], - input2_data[SubscriptToIndex(desc2, b, y, x, c)]); + F(input1_data[SubscriptToIndex(desc1, b, y, x, c)], + input2_data[SubscriptToIndex(desc2, b, y, x, c)]); } } } @@ -159,15 +159,15 @@ inline void BroadcastComparison4DSlowWithScaling(ComparisonParams ¶ms, for (int c = 0; c < output_shape.Dims(3); ++c) { const int32_t input1_val = - input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)]; + input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)]; const int32_t input2_val = - input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; + input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; const int32_t shifted_input1_val = input1_val * (1 << left_shift); const int32_t shifted_input2_val = input2_val * (1 << left_shift); const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, input1_multiplier, input1_shift); + shifted_input1_val, input1_multiplier, input1_shift); const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, input2_multiplier, input2_shift); + shifted_input2_val, input2_multiplier, input2_shift); output_data[Offset(output_shape, b, y, x, c)] = F(scaled_input1_val, scaled_input2_val); } } @@ -175,55 +175,53 @@ inline void BroadcastComparison4DSlowWithScaling(ComparisonParams ¶ms, } } -#define TFLITE_COMPARISON_OP(name) \ - template \ - inline void name(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape, \ - const T *input2_data, const Shape &output_shape, bool *output_data) \ - { \ - Comparison(input1_shape, input1_data, input2_shape, input2_data, output_shape, \ - output_data); \ - } \ - template \ - inline void name##NoScaling(const Shape &input1_shape, const T *input1_data, \ - const Shape &input2_shape, const T *input2_data, \ - const Shape &output_shape, bool *output_data) \ - { \ - ComparisonImpl(input1_shape, input1_data, input2_shape, input2_data, \ - output_shape, output_data); \ - } \ - template \ - inline void name##WithScaling(ComparisonParams ¶ms, const Shape &input1_shape, \ - const T *input1_data, const Shape &input2_shape, \ - const T *input2_data, const Shape &output_shape, \ - bool *output_data) \ - { \ - ComparisonWithScaling(params, input1_shape, input1_data, input2_shape, \ - input2_data, output_shape, output_data); \ - } \ - template \ - inline void Broadcast4DSlow##name##NoScaling(const Shape &input1_shape, const T *input1_data, \ - const Shape &input2_shape, const T *input2_data, \ - const Shape &output_shape, bool *output_data) \ - { \ - BroadcastComparison4DSlowImpl(input1_shape, input1_data, input2_shape, \ - input2_data, output_shape, output_data); \ - } \ - template \ - inline void Broadcast4DSlow##name(const Shape &input1_shape, const T *input1_data, \ - const Shape &input2_shape, const T *input2_data, \ - const Shape &output_shape, bool *output_data) \ - { \ - BroadcastComparison4DSlow(input1_shape, input1_data, input2_shape, input2_data, \ - output_shape, output_data); \ - } \ - template \ - inline void Broadcast4DSlow##name##WithScaling(ComparisonParams ¶ms, \ - const Shape &input1_shape, const T *input1_data, \ - const Shape &input2_shape, const T *input2_data, \ - const Shape &output_shape, bool *output_data) \ - { \ - BroadcastComparison4DSlowWithScaling( \ - params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); \ +#define TFLITE_COMPARISON_OP(name) \ + template \ + inline void name(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape, \ + const T *input2_data, const Shape &output_shape, bool *output_data) \ + { \ + Comparison(input1_shape, input1_data, input2_shape, input2_data, output_shape, \ + output_data); \ + } \ + template \ + inline void name##NoScaling(const Shape &input1_shape, const T *input1_data, \ + const Shape &input2_shape, const T *input2_data, \ + const Shape &output_shape, bool *output_data) \ + { \ + ComparisonImpl(input1_shape, input1_data, input2_shape, input2_data, \ + output_shape, output_data); \ + } \ + template \ + inline void name##WithScaling( \ + ComparisonParams ¶ms, const Shape &input1_shape, const T *input1_data, \ + const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data) \ + { \ + ComparisonWithScaling(params, input1_shape, input1_data, input2_shape, \ + input2_data, output_shape, output_data); \ + } \ + template \ + inline void Broadcast4DSlow##name##NoScaling(const Shape &input1_shape, const T *input1_data, \ + const Shape &input2_shape, const T *input2_data, \ + const Shape &output_shape, bool *output_data) \ + { \ + BroadcastComparison4DSlowImpl(input1_shape, input1_data, input2_shape, \ + input2_data, output_shape, output_data); \ + } \ + template \ + inline void Broadcast4DSlow##name(const Shape &input1_shape, const T *input1_data, \ + const Shape &input2_shape, const T *input2_data, \ + const Shape &output_shape, bool *output_data) \ + { \ + BroadcastComparison4DSlow(input1_shape, input1_data, input2_shape, input2_data, \ + output_shape, output_data); \ + } \ + template \ + inline void Broadcast4DSlow##name##WithScaling( \ + ComparisonParams ¶ms, const Shape &input1_shape, const T *input1_data, \ + const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data) \ + { \ + BroadcastComparison4DSlowWithScaling( \ + params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); \ } TFLITE_COMPARISON_OP(Equal); diff --git a/compute/cker/include/cker/operation/Concatenation.h b/compute/cker/include/cker/operation/Concatenation.h index 394123e..9aaca00 100644 --- a/compute/cker/include/cker/operation/Concatenation.h +++ b/compute/cker/include/cker/operation/Concatenation.h @@ -142,7 +142,7 @@ inline void ConcatenationWithScaling(const ConcatenationParams ¶ms, for (int j = 0; j < copy_size; ++j) { const int32_t value = - static_cast(std::round(input_ptr[j] * scale + bias)) + output_zeropoint; + static_cast(std::round(input_ptr[j] * scale + bias)) + output_zeropoint; output_ptr[j] = static_cast(std::max(std::min(255, value), 0)); } } diff --git a/compute/cker/include/cker/operation/DepthToSpace.h b/compute/cker/include/cker/operation/DepthToSpace.h new file mode 100644 index 0000000..e57fef0 --- /dev/null +++ b/compute/cker/include/cker/operation/DepthToSpace.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_DEPTH_TO_SPACE_H__ +#define __NNFW_CKER_DEPTH_TO_SPACE_H__ + +#include "cker/Shape.h" +#include "cker/Types.h" + +namespace nnfw +{ +namespace cker +{ + +template +inline void DepthToSpace(const Shape &unextended_input_shape, const T *input_data, + const Shape &unextended_output_shape, T *output_data, int32_t block_size) +{ + assert(unextended_input_shape.DimensionsCount() <= 4); + assert(unextended_output_shape.DimensionsCount() <= 4); + const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape); + const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape); + + const int input_depth = input_shape.Dims(3); + const int input_width = input_shape.Dims(2); + const int input_height = input_shape.Dims(1); + + const int output_depth = output_shape.Dims(3); + const int batch_size = output_shape.Dims(0); + + // Number of continuous values that we can copy in one interation. + const int stride = block_size * output_depth; + + for (int batch = 0; batch < batch_size; ++batch) + { + for (int in_h = 0; in_h < input_height; ++in_h) + { + const T *input_ptr = input_data + Offset(input_shape, batch, in_h, 0, 0); + for (int offset_h = 0; offset_h < block_size; ++offset_h) + { + const T *src = input_ptr; + for (int in_w = 0; in_w < input_width; ++in_w) + { + memcpy(output_data, src, stride * sizeof(T)); + output_data += stride; + src += input_depth; + } + input_ptr += stride; + } + } + } +} + +} // namespace cker +} // namespace nnfw + +#endif // __NNFW_CKER_SPACE_TO_DEPTH_H__ diff --git a/compute/cker/include/cker/operation/DepthwiseConv.h b/compute/cker/include/cker/operation/DepthwiseConv.h index 814a9e0..436ddd8 100644 --- a/compute/cker/include/cker/operation/DepthwiseConv.h +++ b/compute/cker/include/cker/operation/DepthwiseConv.h @@ -22,143 +22,159 @@ #include "cker/Types.h" #include "cker/Utils.h" #include "cker/neon/neon_check.h" +#include "cker/operation/optimized/DepthwiseConvFloat.h" #include "cker/operation/optimized/DepthwiseConvUint8.h" +#include "cker/CpuBackendThreadpool.h" namespace nnfw { namespace cker { -inline void DepthwiseConv(const DepthwiseConvParams ¶ms, const Shape &input_shape, - const uint8_t *input_data, const Shape &filter_shape, - const uint8_t *filter_data, const Shape &bias_shape, - const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data) +// TODO(luwa): add multithread to per-channel depthwise_conv +// DepthwiseConv can run with multi threads on the dim specified by thread_dim. +// Each thread processes output elements on dim, thread_dim, in the range of +// [thread_start, thread_end). +// For example, assume thread_start = 2, thread_end = 6, and thread_dim = 1, it +// means that it will calculate DepthwiseConv for output_data[:, 2:5, :, :]. +template struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task { - const int depth_multiplier = params.depth_multiplier; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - assert(dilation_width_factor >= 1); - assert(dilation_height_factor >= 1); - UNUSED_RELEASE(dilation_width_factor); - UNUSED_RELEASE(dilation_height_factor); - assert(input_shape.DimensionsCount() == 4); - assert(filter_shape.DimensionsCount() == 4); - assert(output_shape.DimensionsCount() == 4); - assert(output_activation_min <= output_activation_max); - UNUSED_RELEASE(output_activation_min); - UNUSED_RELEASE(output_activation_max); - const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - const int input_depth = input_shape.Dims(3); - assert(output_depth == input_depth * depth_multiplier); - assert(bias_shape.FlatSize() == output_depth); - UNUSED_RELEASE(input_depth); - UNUSED_RELEASE(output_depth); - UNUSED_RELEASE(depth_multiplier); - -// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on -// Jetson TX-2. This compiler does not support the offsetof() macro. -#if defined(__aarch64__) -// TODO Use below codes - -// const int stride_width = params.stride_width; -// const int stride_height = params.stride_height; -// const int pad_width = params.padding_values.width; -// const int pad_height = params.padding_values.height; -// const int output_shift = params.output_shift; -// -// // Call kernel optimized for depthwise convolutions using 3x3 filters if -// // parameters are supported. -// if (Fast3x3FilterKernelSupported( -// input_shape, filter_shape, stride_width, stride_height, -// dilation_width_factor, dilation_height_factor, pad_width, pad_height, -// depth_multiplier, output_shape, output_shift)) { -// DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape, -// filter_data, bias_shape, bias_data, output_shape, -// output_data); -// return; -// } -#endif - - optimized::DepthwiseConvGeneral(params, input_shape, input_data, filter_shape, filter_data, - bias_shape, bias_data, output_shape, output_data); + DepthwiseConvWorkerTask(const DepthwiseConvParams ¶ms, const Shape &input_shape, + const T *input_data, const Shape &filter_shape, const T *filter_data, + const Shape &bias_shape, const TS *bias_data, const Shape &output_shape, + T *output_data, int thread_start, int thread_end, int thread_dim) + : params_(params), input_shape_(input_shape), input_data_(input_data), + filter_shape_(filter_shape), filter_data_(filter_data), bias_shape_(bias_shape), + bias_data_(bias_data), output_shape_(output_shape), output_data_(output_data), + thread_start_(thread_start), thread_end_(thread_end), thread_dim_(thread_dim) + { + } + + void Run() override + { + optimized::DepthwiseConvImpl(params_, input_shape_, input_data_, filter_shape_, filter_data_, + bias_shape_, bias_data_, output_shape_, output_data_, + thread_start_, thread_end_, thread_dim_); + } + +private: + const DepthwiseConvParams ¶ms_; + const Shape &input_shape_; + const T *input_data_; + const Shape &filter_shape_; + const T *filter_data_; + const Shape &bias_shape_; + const TS *bias_data_; + const Shape &output_shape_; + T *output_data_; + // const CpuFlags& cpu_flags_; + int thread_start_; + int thread_end_; + int thread_dim_; +}; + +inline int HowManyConvThreads(const Shape &output_shape, const Shape &filter_shape) +{ + // How many scalar multiplications are needed to make it worth using one + // more thread + static constexpr int kMinMulPerThread = 1 << 13; // 8k + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int num_muls = output_shape.FlatSize() * filter_height * filter_width; + // Try to avoid real runtime divisions if possible by dividing by a + // compile-time constant. + int thread_count = std::max(1, num_muls / kMinMulPerThread); + return thread_count; +} + +inline bool MultithreadAlongBatches(int thread_count, int batches) +{ + assert(thread_count >= 2); + // If there are fewer batch entries than the number of threads we want to use, + // then better do intra-batch-entry multithreading. + if (batches < thread_count) + { + return false; + } + // If there are at least 2 batch entries to be handed to each thread, then + // it's safe to proceed with batch-wise multithreading: each thread will have + // approximately equal number of batch entries to handle, so the load + // balancing will be reasonable, and the amount to which the load is not + // perfectly balanced will be offset by the inherent advantages of + // batch-wise multithreading (each thread is more efficient thanks to working + // on larger buffers with less boundary-handling overhead). + if (batches >= 2 * thread_count) + { + return true; + } + // In the limit case were there are at least 1 but not much more than 1 + // batch entries per thread, it may be a good idea to do per-batch + // multithreading if the number of batch entries is a multiple of the number + // of threads, so that each thread will have the same number of batch entries + // to process. + return ((batches % thread_count) == 0); } +template inline void DepthwiseConv(const DepthwiseConvParams ¶ms, const Shape &input_shape, - const float *input_data, const Shape &filter_shape, - const float *filter_data, const Shape &bias_shape, const float *bias_data, - const Shape &output_shape, float *output_data) + const T *input_data, const Shape &filter_shape, const T *filter_data, + const Shape &bias_shape, const TS *bias_data, const Shape &output_shape, + T *output_data, ruy::Context *ruy_context) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int depth_multiplier = params.depth_multiplier; - const float output_activation_min = params.float_activation_min; - const float output_activation_max = params.float_activation_max; assert(input_shape.DimensionsCount() == 4); assert(filter_shape.DimensionsCount() == 4); assert(output_shape.DimensionsCount() == 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int input_depth = input_shape.Dims(3); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); + int thread_count = HowManyConvThreads(output_shape, filter_shape); + + // NOTE Borrow RuyContext to get max_num_threads setting + // TODO Define and use max_num_threads for CPU backend + const auto max_threads = (ruy_context == nullptr) ? 1 : ruy_context->max_num_threads(); + + thread_count = std::max(1, std::min(thread_count, max_threads)); + // Cap the number of threads to 2 for float path to avoid regression in + // performance (b/132294857). + if (std::is_floating_point::value) + { + thread_count = std::min(thread_count, 2); + } + + const int output_batches = output_shape.Dims(0); const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - assert(output_depth == input_depth * depth_multiplier); - assert(bias_shape.FlatSize() == output_depth); - UNUSED_RELEASE(output_depth); - UNUSED_RELEASE(bias_shape); - for (int b = 0; b < batches; ++b) + if (thread_count == 1) + { + optimized::DepthwiseConvImpl(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, 0, output_height, + 1); + return; + } + + int thread_dim, thread_dim_size; + if (MultithreadAlongBatches(thread_count, output_batches)) + { + thread_dim = 0; + thread_dim_size = output_batches; + } + else + { + thread_dim = 1; + thread_dim_size = output_height; + } + + std::vector> tasks; + // TODO(b/131746020) don't create new heap allocations every time. + // At least we make it a single heap allocation by using reserve(). + tasks.reserve(thread_count); + int thread_start = 0; + for (int i = 0; i < thread_count; ++i) { - for (int out_y = 0; out_y < output_height; ++out_y) - { - for (int out_x = 0; out_x < output_width; ++out_x) - { - for (int ic = 0; ic < input_depth; ++ic) - { - for (int m = 0; m < depth_multiplier; m++) - { - const int oc = m + ic * depth_multiplier; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - float total = 0.f; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) - { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) - { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) - { - float input_value = input_data[Offset(input_shape, b, in_y, in_x, ic)]; - float filter_value = filter_data[Offset(filter_shape, 0, filter_y, filter_x, oc)]; - total += (input_value * filter_value); - } - } - } - float bias_value = 0.0f; - if (bias_data) - { - bias_value = bias_data[oc]; - } - output_data[Offset(output_shape, b, out_y, out_x, oc)] = ActivationFunctionWithMinMax( - total + bias_value, output_activation_min, output_activation_max); - } - } - } - } + int thread_end = thread_start + (thread_dim_size - thread_start) / (thread_count - i); + tasks.emplace_back(params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data, thread_start, thread_end, thread_dim); + thread_start = thread_end; } + cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), ruy_context); } } // namespace cker diff --git a/compute/cker/include/cker/operation/ELU.h b/compute/cker/include/cker/operation/ELU.h new file mode 100644 index 0000000..6bdd7c6 --- /dev/null +++ b/compute/cker/include/cker/operation/ELU.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_ELU_H__ +#define __NNFW_CKER_ELU_H__ + +#include "cker/Shape.h" + +#include + +namespace nnfw +{ +namespace cker +{ + +inline void ELU(const Shape &input_shape, const float *input_data, const Shape &output_shape, + float *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const float val = input_data[i]; + output_data[i] = val < 0.0 ? std::exp(val) - 1 : val; + } +} + +} // namespace cker +} // namespace nnfw + +#endif // __NNFW_CKER_ELU_H__ diff --git a/compute/cker/include/cker/operation/Einsum.h b/compute/cker/include/cker/operation/Einsum.h index 3d1837f..13fccfd 100644 --- a/compute/cker/include/cker/operation/Einsum.h +++ b/compute/cker/include/cker/operation/Einsum.h @@ -394,8 +394,8 @@ private: for (int label = 0; label < num_labels; ++label) { bool removed = (_output_label_counts[label] == 0); - bool unique = num_inputs == 1 || _input_label_counts[0][label] == 0 || - _input_label_counts[1][label] == 0; + bool unique = + num_inputs == 1 || _input_label_counts[0][label] == 0 || _input_label_counts[1][label] == 0; _label_types[label] = getDimensionType(removed, unique); } } @@ -483,8 +483,8 @@ private: if (inputs[i].shape.DimensionsCount() + 1 < (int32_t)labels->size()) { throw std::runtime_error{"Expected input " + std::to_string(i) + " to have rank at least " + - std::to_string(labels->size() - 1) + " but got: " + - std::to_string(inputs[i].shape.DimensionsCount())}; + std::to_string(labels->size() - 1) + + " but got: " + std::to_string(inputs[i].shape.DimensionsCount())}; } int ellipsis_axis = -1; const int num_bcast_dims = inputs[i].shape.DimensionsCount() - labels->size() + 1; @@ -511,7 +511,7 @@ private: } std::vector::iterator it_input = - std::find(_input_has_ellipsis.begin(), _input_has_ellipsis.end(), true); + std::find(_input_has_ellipsis.begin(), _input_has_ellipsis.end(), true); if (it_input == _input_has_ellipsis.end() && !_output_has_ellipsis) { return; @@ -645,11 +645,11 @@ private: // Reduce along the last axis (i.e axis 1) of the rank-2 Tensor. const int32_t output_size = - reshape[kBroadcasting] * reshape[kBatch] * reshape[kFree] * reshape[kContract]; + reshape[kBroadcasting] * reshape[kBatch] * reshape[kFree] * reshape[kContract]; functor::ReduceFunctor::Reduce( - device, output->shaped({output_size}), - input_deduped.shaped({output_size, reshape[kReduce]}), Eigen::array({1}), - Reducer()); + device, output->shaped({output_size}), + input_deduped.shaped({output_size, reshape[kReduce]}), Eigen::array({1}), + Reducer()); } bool shouldSwapFreeAndContract(const Labels &labels, @@ -779,7 +779,7 @@ private: { const int32_t count = label_counts[label]; const int current_axis = - should_inflate ? strided_shape_dims.size() : inflated_shape_dims.size(); + should_inflate ? strided_shape_dims.size() : inflated_shape_dims.size(); const int32_t dim = input.shape.Dims(current_axis); strided_shape_dims.push_back(dim); inflated_shape_dims.insert(inflated_shape_dims.end(), count, dim); @@ -879,7 +879,7 @@ private: for (size_t i = 0; i < inputs.size(); ++i) { const int32_t free_axis = - inputs[i].shape.DimensionsCount() - (swap_free_and_contract[i] ? 1 : 2); + inputs[i].shape.DimensionsCount() - (swap_free_and_contract[i] ? 1 : 2); output_shape.SetDim(i + old_output_shape.DimensionsCount(), inputs[i].shape.Dims(free_axis)); } bool adj_x = swap_free_and_contract[0]; diff --git a/compute/cker/include/cker/operation/Elementwise.h b/compute/cker/include/cker/operation/Elementwise.h index 9d080d8..0e980f1 100644 --- a/compute/cker/include/cker/operation/Elementwise.h +++ b/compute/cker/include/cker/operation/Elementwise.h @@ -98,6 +98,28 @@ inline void Floor(const Shape &input_shape, const float *input_data, const Shape } } +inline void Sqrt(const Shape &input_shape, const float *input_data, const Shape &output_shape, + float *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + output_data[i] = std::sqrt(input_data[i]); + } +} + +inline void Square(const Shape &input_shape, const float *input_data, const Shape &output_shape, + float *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input_data[i] * input_data[i]; + } +} + } // namespace cker } // namespace nnfw diff --git a/compute/cker/include/cker/operation/Fill.h b/compute/cker/include/cker/operation/Fill.h index 14daf98..d657acc 100644 --- a/compute/cker/include/cker/operation/Fill.h +++ b/compute/cker/include/cker/operation/Fill.h @@ -24,27 +24,12 @@ namespace nnfw { namespace cker { -template -inline void Fill(const Shape &input_shape, int *input_data, const T value_data, - const Shape &output_shape, T output_data) +template inline void Fill(const T value_data, const Shape &output_shape, T output_data) { - int input_size = input_shape.FlatSize(); - int output_size = 1; - for (int i = 0; i < input_size; i++) + int output_size = output_shape.FlatSize(); + for (int i = 0; i < output_size; i++) { - output_size *= input_data[i]; - } - - if (output_size == output_shape.FlatSize()) - { - for (int i = 0; i < output_size; i++) - { - output_data[i] = *value_data; - } - } - else - { - throw std::runtime_error("Cker Fill.h: output's size is not matched inferred size of output"); + output_data[i] = *value_data; } } diff --git a/compute/cker/include/cker/operation/FullyConnected.h b/compute/cker/include/cker/operation/FullyConnected.h index 9585324..b7d27e8 100644 --- a/compute/cker/include/cker/operation/FullyConnected.h +++ b/compute/cker/include/cker/operation/FullyConnected.h @@ -117,7 +117,7 @@ inline void FullyConnected(const FullyConnectedParams ¶ms, const Shape &inpu const int filter_dim_count = filter_shape.DimensionsCount(); const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); const int output_depth = - MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1); + MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1); const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { @@ -229,7 +229,7 @@ inline void FullyConnectedSparseWeightRandom(const FullyConnectedParams ¶ms, const int weights_dims_count = weights_shape.DimensionsCount(); const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1); const int output_depth = - MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1); + MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1); const int accum_depth = weights_shape.Dims(weights_dims_count - 1); UNUSED_RELEASE(bias_shape); @@ -249,7 +249,7 @@ inline void FullyConnectedSparseWeightRandom(const FullyConnectedParams ¶ms, { int idx_1 = w1_indices[pw1]; output_data[b * output_depth + idx_0] += - weights_data[pw1] * input_data[b * accum_depth + idx_1]; + weights_data[pw1] * input_data[b * accum_depth + idx_1]; } } } diff --git a/compute/cker/include/cker/operation/FullyConnectedSparse16x1.h b/compute/cker/include/cker/operation/FullyConnectedSparse16x1.h index 28ae7a3..df397f7 100644 --- a/compute/cker/include/cker/operation/FullyConnectedSparse16x1.h +++ b/compute/cker/include/cker/operation/FullyConnectedSparse16x1.h @@ -70,7 +70,7 @@ inline void FullyConnectedSparseWeight16x1(const FullyConnectedParams ¶ms, const int weights_dims_count = weights_shape.DimensionsCount(); const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1); const int output_depth = - MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1); + MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1); const int accum_depth = weights_shape.Dims(weights_dims_count - 1); UNUSED_RELEASE(bias_shape); diff --git a/compute/cker/include/cker/operation/FusedBatchNorm.h b/compute/cker/include/cker/operation/FusedBatchNorm.h index d17a579..8a97d84 100644 --- a/compute/cker/include/cker/operation/FusedBatchNorm.h +++ b/compute/cker/include/cker/operation/FusedBatchNorm.h @@ -105,7 +105,7 @@ public: float rest_size_inv = static_cast(1.0f / static_cast(rest_size)); // This adjustment is for Bessel's correction float rest_size_adjust = - static_cast(rest_size) / static_cast(rest_size_minus_one); + static_cast(rest_size) / static_cast(rest_size_minus_one); Eigen::Tensor batch_mean(depth); Eigen::Tensor batch_variance(depth); @@ -117,12 +117,12 @@ public: batch_variance.device(d) = x_centered.square().sum(reduce_dims) * rest_size_inv; auto scaling_factor = ((batch_variance + param.epsilon).rsqrt() * scale) - .eval() - .reshape(one_by_depth) - .broadcast(bcast_spec); + .eval() + .reshape(one_by_depth) + .broadcast(bcast_spec); auto x_scaled = x_centered * scaling_factor; auto x_shifted = - (x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec)).template cast(); + (x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec)).template cast(); UNUSED_RELEASE(rest_size_adjust); diff --git a/compute/cker/include/cker/operation/Helper/BCast.h b/compute/cker/include/cker/operation/Helper/BCast.h index a0abf29..211db98 100644 --- a/compute/cker/include/cker/operation/Helper/BCast.h +++ b/compute/cker/include/cker/operation/Helper/BCast.h @@ -22,7 +22,7 @@ * ToDo : This file will be moved into upper folder when integrate with other * custom operations. * And It should merged with EinsumHelper's BCast. -**/ + **/ #include "cker/Shape.h" #include "cker/eigen/EigenSupport.h" @@ -393,7 +393,7 @@ public: BCast(const Vec &x, const Vec &y, const bool fewer_dims_optimization = true, const bool return_flattened_batch_indices = false) - : BCastList<2>({x, y}, fewer_dims_optimization, return_flattened_batch_indices) + : BCastList<2>({x, y}, fewer_dims_optimization, return_flattened_batch_indices) { } diff --git a/compute/cker/include/cker/operation/Helper/RandomDistributions.h b/compute/cker/include/cker/operation/Helper/RandomDistributions.h index baeafd7..cbebff1 100644 --- a/compute/cker/include/cker/operation/Helper/RandomDistributions.h +++ b/compute/cker/include/cker/operation/Helper/RandomDistributions.h @@ -168,7 +168,7 @@ public: // Must have lo < hi UniformDistribution(int32_t lo, int32_t hi) - : lo_(lo), range_(static_cast(hi) - static_cast(lo)) + : lo_(lo), range_(static_cast(hi) - static_cast(lo)) { } @@ -207,7 +207,7 @@ public: // Must have lo < hi UniformDistribution(int64_t lo, int64_t hi) - : lo_(lo), range_(static_cast(hi) - static_cast(lo)) + : lo_(lo), range_(static_cast(hi) - static_cast(lo)) { } @@ -291,22 +291,22 @@ public: template class UniformFullIntDistribution - : public UniformFullIntDistribution32 + : public UniformFullIntDistribution32 { }; template class UniformFullIntDistribution - : public UniformFullIntDistribution32 + : public UniformFullIntDistribution32 { }; template class UniformFullIntDistribution - : public UniformFullIntDistribution64 + : public UniformFullIntDistribution64 { }; template class UniformFullIntDistribution - : public UniformFullIntDistribution64 + : public UniformFullIntDistribution64 { }; @@ -324,7 +324,7 @@ public: PHILOX_DEVICE_INLINE explicit SingleSampleAdapter(Generator *gen) - : generator_(gen), used_result_index_(Generator::kResultElementCount) + : generator_(gen), used_result_index_(Generator::kResultElementCount) { } @@ -615,8 +615,8 @@ class TruncatedNormalDistribution public: // The number of elements that will be returned. static constexpr int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1) - ? SingleSampleGenerator::kNativeElementCount / 2 - : 1; + ? SingleSampleGenerator::kNativeElementCount / 2 + : 1; // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 90; // Indicate that this distribution may take variable number of samples diff --git a/compute/cker/include/cker/operation/Helper/RandomOpCpu.h b/compute/cker/include/cker/operation/Helper/RandomOpCpu.h index 85d2677..6e9ffbd 100644 --- a/compute/cker/include/cker/operation/Helper/RandomOpCpu.h +++ b/compute/cker/include/cker/operation/Helper/RandomOpCpu.h @@ -109,7 +109,7 @@ template struct FillPhiloxRandomTask { const int kGroupSize = Distribution::kResultElementCount; static const int kGeneratorSkipPerOutputGroup = - kGroupSize * kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount; + kGroupSize * kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount; int64_t offset = 0; diff --git a/compute/cker/include/cker/operation/Helper/Tensor.h b/compute/cker/include/cker/operation/Helper/Tensor.h index e6ac008..ec29a15 100644 --- a/compute/cker/include/cker/operation/Helper/Tensor.h +++ b/compute/cker/include/cker/operation/Helper/Tensor.h @@ -29,58 +29,58 @@ template str { // Rank- tensor of scalar type T. typedef Eigen::TensorMap, Eigen::Aligned> - Tensor; + Tensor; typedef Eigen::TensorMap, Eigen::Aligned> - ConstTensor; + ConstTensor; // Unaligned Rank- tensor of scalar type T. typedef Eigen::TensorMap> UnalignedTensor; typedef Eigen::TensorMap> - UnalignedConstTensor; + UnalignedConstTensor; typedef Eigen::TensorMap, Eigen::Aligned> - Tensor32Bit; + Tensor32Bit; // Scalar tensor (implemented as a rank-0 tensor) of scalar type T. typedef Eigen::TensorMap, Eigen::RowMajor, IndexType>, Eigen::Aligned> - Scalar; + Scalar; typedef Eigen::TensorMap< - Eigen::TensorFixedSize, Eigen::RowMajor, IndexType>, Eigen::Aligned> - ConstScalar; + Eigen::TensorFixedSize, Eigen::RowMajor, IndexType>, Eigen::Aligned> + ConstScalar; // Unaligned Scalar tensor of scalar type T. typedef Eigen::TensorMap, Eigen::RowMajor, IndexType>> - UnalignedScalar; + UnalignedScalar; typedef Eigen::TensorMap< - Eigen::TensorFixedSize, Eigen::RowMajor, IndexType>> - UnalignedConstScalar; + Eigen::TensorFixedSize, Eigen::RowMajor, IndexType>> + UnalignedConstScalar; // Rank-1 tensor (vector) of scalar type T. typedef Eigen::TensorMap, Eigen::Aligned> Flat; typedef Eigen::TensorMap, Eigen::Aligned> - ConstFlat; + ConstFlat; typedef Eigen::TensorMap, Eigen::Aligned> Vec; typedef Eigen::TensorMap, Eigen::Aligned> - ConstVec; + ConstVec; // Unaligned Rank-1 tensor (vector) of scalar type T. typedef Eigen::TensorMap> UnalignedFlat; typedef Eigen::TensorMap> - UnalignedConstFlat; + UnalignedConstFlat; typedef Eigen::TensorMap> UnalignedVec; typedef Eigen::TensorMap> UnalignedConstVec; // Rank-2 tensor (matrix) of scalar type T. typedef Eigen::TensorMap, Eigen::Aligned> Matrix; typedef Eigen::TensorMap, Eigen::Aligned> - ConstMatrix; + ConstMatrix; // Unaligned Rank-2 tensor (matrix) of scalar type T. typedef Eigen::TensorMap> UnalignedMatrix; typedef Eigen::TensorMap> - UnalignedConstMatrix; + UnalignedConstMatrix; }; typedef typename TTypes::Tensor32Bit::Index Index32; diff --git a/compute/cker/include/cker/operation/InstanceNorm.h b/compute/cker/include/cker/operation/InstanceNorm.h index 6445e8a..8fa8b03 100644 --- a/compute/cker/include/cker/operation/InstanceNorm.h +++ b/compute/cker/include/cker/operation/InstanceNorm.h @@ -78,8 +78,8 @@ inline void InstanceNorm(const InstanceNormParams ¶ms, const Shape &input_sh double input_value = input_data[Offset(output_shape, batch, height, width, channel)]; double output_value = input_value * a + b; output_data[Offset(output_shape, batch, height, width, channel)] = - ActivationFunctionWithMinMax((float)output_value, output_activation_min, - output_activation_max); + ActivationFunctionWithMinMax((float)output_value, output_activation_min, + output_activation_max); } } } diff --git a/compute/cker/include/cker/operation/L2Normalize.h b/compute/cker/include/cker/operation/L2Normalize.h index a0075c3..c1fca91 100644 --- a/compute/cker/include/cker/operation/L2Normalize.h +++ b/compute/cker/include/cker/operation/L2Normalize.h @@ -77,7 +77,7 @@ void L2NormalizeQuant8(L2NormParams ¶ms, const Shape &input_shape, const uin { int32_t diff = *input_data - input_zero_point; int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( - 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); int32_t unclamped_output_val = 128 + rescaled_diff; int32_t output_val = std::min(static_cast(255), std::max(static_cast(0), unclamped_output_val)); diff --git a/compute/cker/include/cker/operation/LSTM.h b/compute/cker/include/cker/operation/LSTM.h index 27beaae..a8f1f8c 100644 --- a/compute/cker/include/cker/operation/LSTM.h +++ b/compute/cker/include/cker/operation/LSTM.h @@ -283,23 +283,23 @@ void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, const float // contiguous, and we manually loop over the batched outputs. // LINT.IfChange inline void LstmStepFloat( - const float *input_ptr, const float *input_to_input_weights_ptr, - const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr, - const float *input_to_output_weights_ptr, const float *aux_input_ptr, - const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr, - const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr, - const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr, - const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr, - const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr, - const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr, - const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr, - const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr, - const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr, - const float *output_gate_bias_ptr, const float *projection_weights_ptr, - const float *projection_bias_ptr, const LSTMParams *params, int n_batch, int n_cell, - int n_input, int n_aux_input, int n_output, int output_batch_leading_dim, - float *output_state_ptr, float *cell_state_ptr, float *scratch0, float *scratch1, - float *scratch2, float *scratch3, float *output_ptr) + const float *input_ptr, const float *input_to_input_weights_ptr, + const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr, + const float *input_to_output_weights_ptr, const float *aux_input_ptr, + const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr, + const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr, + const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr, + const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr, + const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr, + const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr, + const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr, + const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr, + const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr, + const float *output_gate_bias_ptr, const float *projection_weights_ptr, + const float *projection_bias_ptr, const LSTMParams *params, int n_batch, int n_cell, int n_input, + int n_aux_input, int n_output, int output_batch_leading_dim, float *output_state_ptr, + float *cell_state_ptr, float *scratch0, float *scratch1, float *scratch2, float *scratch3, + float *output_ptr) { // Since we have already checked that weights are all there or none, we can // check the existence of only one to the get the condition. @@ -314,7 +314,7 @@ inline void LstmStepFloat( // Check if inputs are all zeros so we can skip some computations. const bool is_input_all_zeros = IsZeroVector(input_ptr, n_batch * n_input); const bool is_aux_input_all_zeros = - (aux_input_ptr == nullptr || IsZeroVector(aux_input_ptr, n_batch * n_aux_input)); + (aux_input_ptr == nullptr || IsZeroVector(aux_input_ptr, n_batch * n_aux_input)); if (!use_cifg) { // Calculate the input gate. (If not CIFG.) @@ -336,11 +336,11 @@ inline void LstmStepFloat( forget_gate_scratch, is_input_all_zeros, is_aux_input_all_zeros); // Calculate the cell update gate. CalculateLstmGateFloat( - input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr, - output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr, - /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr, - n_batch, n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch, - is_input_all_zeros, is_aux_input_all_zeros); + input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr, + output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr, + /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr, n_batch, + n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch, + is_input_all_zeros, is_aux_input_all_zeros); // Update the cell state. UpdateLstmCellFloat(n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch, cell_gate_scratch, use_cifg, params->cell_clip); diff --git a/compute/cker/include/cker/operation/LeakyReLU.h b/compute/cker/include/cker/operation/LeakyReLU.h new file mode 100644 index 0000000..e12d01b --- /dev/null +++ b/compute/cker/include/cker/operation/LeakyReLU.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_LEKAY_RELU_H__ +#define __NNFW_CKER_LEKAY_RELU_H__ + +#include "cker/Shape.h" +#include "cker/Types.h" + +#include + +namespace nnfw +{ +namespace cker +{ + +inline void LeakyReLU(const LeakyReluParams ¶ms, const Shape &input_shape, + const float *input_data, const Shape &output_shape, float *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + const float val = input_data[i]; + // Note that alpha might be > 1 or < 0, so we don't use std::max here. + output_data[i] = val > 0 ? val : val * params.alpha; + } +} + +} // namespace cker +} // namespace nnfw + +#endif // __NNFW_CKER_RELU_H__ diff --git a/compute/cker/include/cker/operation/LogSoftMax.h b/compute/cker/include/cker/operation/LogSoftMax.h index 326a44f..eb7bdd9 100644 --- a/compute/cker/include/cker/operation/LogSoftMax.h +++ b/compute/cker/include/cker/operation/LogSoftMax.h @@ -71,7 +71,7 @@ inline void LogSoftmax(const SoftmaxParams ¶ms, const Shape &input_shape, for (int c = 0; c < depth; ++c) { output_data[(i * depth + c) * inner_size + j] = - (input_data[(i * depth + c) * inner_size + j] - max) * beta - log_sum; + (input_data[(i * depth + c) * inner_size + j] - max) * beta - log_sum; } } } @@ -124,10 +124,10 @@ inline void LogSoftmax(const SoftmaxParams ¶ms, float input_scale, const Sha for (int c = 0; c < depth; ++c) { const float log_prob = - scale * input_data[(i * depth + c) * inner_size] * beta - precomputed; + scale * input_data[(i * depth + c) * inner_size] * beta - precomputed; const int32_t prob_quantized = std::rint(log_prob) + params.zero_point; output_data[(i * depth + c) * inner_size] = - static_cast(std::max(std::min(clamp_max, prob_quantized), clamp_min)); + static_cast(std::max(std::min(clamp_max, prob_quantized), clamp_min)); } } } diff --git a/compute/cker/include/cker/operation/LogicalAnd.h b/compute/cker/include/cker/operation/LogicalAnd.h new file mode 100644 index 0000000..e877f5f --- /dev/null +++ b/compute/cker/include/cker/operation/LogicalAnd.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_LOGICAL_AND_H__ +#define __NNFW_CKER_LOGICAL_AND_H__ + +#include "cker/Shape.h" +#include "cker/Utils.h" + +namespace nnfw +{ +namespace cker +{ + +template +inline void LogicalAndBroadcast(const Shape &unextended_input1_shape, const T *input1_data, + const Shape &unextended_input2_shape, const T *input2_data, + const Shape &unextended_output_shape, T *output_data) +{ + assert(unextended_input1_shape.DimensionsCount() <= 4); + assert(unextended_input2_shape.DimensionsCount() <= 4); + assert(unextended_output_shape.DimensionsCount() <= 4); + const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1, + &desc2); + + for (int b = 0; b < output_shape.Dims(0); ++b) + { + for (int y = 0; y < output_shape.Dims(1); ++y) + { + for (int x = 0; x < output_shape.Dims(2); ++x) + { + for (int c = 0; c < output_shape.Dims(3); ++c) + { + auto out_idx = Offset(output_shape, b, y, x, c); + auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); + auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = in1_val && in2_val; + } + } + } + } +} + +template +inline void LogicalAndElementwise(const Shape &shape, const T *input1_data, const T *input2_data, + T *output_data) +{ + + int num_elements = shape.FlatSize(); + + for (int t = 0; t < num_elements; t++) + { + output_data[t] = input1_data[t] && input2_data[t]; + } +} + +} // namespace cker +} // namespace nnfw + +#endif // __NNFW_CKER_LOGICAL_AND_H__ diff --git a/compute/cker/include/cker/operation/MatrixBandPart.h b/compute/cker/include/cker/operation/MatrixBandPart.h index 5674ff3..ef28684 100644 --- a/compute/cker/include/cker/operation/MatrixBandPart.h +++ b/compute/cker/include/cker/operation/MatrixBandPart.h @@ -43,11 +43,11 @@ void MatrixBandPart(const T num_lower_diags, const T num_upper_diags, const Shap if (!(num_lower_diags <= row_num)) throw std::runtime_error( - "MatrixBandPart : num_lower must be negative or less or equal to number of rows"); + "MatrixBandPart : num_lower must be negative or less or equal to number of rows"); if (!(num_upper_diags <= col_num)) throw std::runtime_error( - "MatrixBandPart : num_upper must be negative or less or equal to number of columns"); + "MatrixBandPart : num_upper must be negative or less or equal to number of columns"); std::fill(output_data, output_data + output_shape.FlatSize(), 0); // output matrix init @@ -60,9 +60,10 @@ void MatrixBandPart(const T num_lower_diags, const T num_upper_diags, const Shap auto input = input_data + (batch * row_num * col_num + row * col_num); const T band_start = - num_lower_diags < 0 ? 0 : std::min(col_num, std::max(T{0}, row - num_lower_diags)); - const T band_end = num_upper_diags < 0 ? col_num : std::min(static_cast(col_num), - row + num_upper_diags + 1); + num_lower_diags < 0 ? 0 : std::min(col_num, std::max(T{0}, row - num_lower_diags)); + const T band_end = num_upper_diags < 0 + ? col_num + : std::min(static_cast(col_num), row + num_upper_diags + 1); for (T band_idx = band_start; band_idx < band_end; band_idx++) { diff --git a/compute/cker/include/cker/operation/MaxPool.h b/compute/cker/include/cker/operation/MaxPool.h index ea3fcac..5dc84d3 100644 --- a/compute/cker/include/cker/operation/MaxPool.h +++ b/compute/cker/include/cker/operation/MaxPool.h @@ -67,10 +67,10 @@ void MaxPool(const PoolParams ¶ms, const Shape &input_shape, const fl int hpad = h + params.padding_values.height; int wpad = w + params.padding_values.width; int h_start = - (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1; + (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1; int h_end = std::min(hpad / stride_height + 1, output_height); int w_start = - (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1; + (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1; int w_end = std::min(wpad / stride_width + 1, output_width); // compute elementwise sum for (int ph = h_start; ph < h_end; ++ph) @@ -79,8 +79,8 @@ void MaxPool(const PoolParams ¶ms, const Shape &input_shape, const fl { int out_offset = NodeOffset(b, ph, pw, output_height, output_width); out_mat.col(out_offset) = - out_mat.col(out_offset) - .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width))); + out_mat.col(out_offset) + .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width))); } } } @@ -139,8 +139,8 @@ void MaxPool(const PoolParams ¶ms, const Shape &input_shape, const const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin); memset(acc, 0, tranche_depth * sizeof(acc[0])); const uint8_t *input_ptr = - input_data + depth_base + - depth * (in_x_origin + input_width * (in_y_origin + input_height * batch)); + input_data + depth_base + + depth * (in_x_origin + input_width * (in_y_origin + input_height * batch)); for (int fy = filter_y_start; fy < filter_y_end; fy++) { const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start); diff --git a/compute/cker/include/cker/operation/OneHot.h b/compute/cker/include/cker/operation/OneHot.h index c0dbc6d..ddc27b4 100644 --- a/compute/cker/include/cker/operation/OneHot.h +++ b/compute/cker/include/cker/operation/OneHot.h @@ -55,7 +55,7 @@ void OneHot(const int32_t depth, const T on_value, const T off_value, int32_t ax for (int k = 0; k < suffix_dim_size; ++k, ++output_data) { *output_data = - static_cast(indices_data[i * suffix_dim_size + k]) == j ? on_value : off_value; + static_cast(indices_data[i * suffix_dim_size + k]) == j ? on_value : off_value; } } } diff --git a/compute/cker/include/cker/operation/Range.h b/compute/cker/include/cker/operation/Range.h index 5c3a773..d6ccc68 100644 --- a/compute/cker/include/cker/operation/Range.h +++ b/compute/cker/include/cker/operation/Range.h @@ -35,8 +35,8 @@ template inline int GetSize(T start, T limit, T delta) } int size = (std::is_integral::value - ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta)) - : std::ceil(std::abs((limit - start) / delta))); + ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta)) + : std::ceil(std::abs((limit - start) / delta))); return size; } diff --git a/compute/cker/include/cker/operation/Reduce.h b/compute/cker/include/cker/operation/Reduce.h index 2b2e8d3..dbf9381 100644 --- a/compute/cker/include/cker/operation/Reduce.h +++ b/compute/cker/include/cker/operation/Reduce.h @@ -50,7 +50,7 @@ inline void OptimizedReduceSum(const float *input_data, const Shape &input_shape { int r_idx = 0; float tmp_data[4] = { - 0, + 0, }; float32x4_t tmp_data_32x4 = vld1q_f32(tmp_data); for (; r_idx <= reduce_size - 32; r_idx += 32) @@ -143,7 +143,7 @@ inline bool ReduceImpl(const In *input_data, const Shape &input_shape, const Sha { size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); size_t output_offset = - ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis); + ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis); output_data[output_offset] = reducer(output_data[output_offset], input_data[input_offset]); } while (NextIndex(input_num_dims, input_dims, input_iter)); return true; @@ -319,7 +319,7 @@ public: for (size_t idx = 0; idx < num_outputs; ++idx) { const U value = - static_cast(std::round(temp_sum[idx] * scale + bias)) + output_zero_point; + static_cast(std::round(temp_sum[idx] * scale + bias)) + output_zero_point; output_data[idx] = static_cast(value); } } @@ -329,7 +329,7 @@ public: for (size_t idx = 0; idx < num_outputs; ++idx) { float float_mean = - static_cast(temp_sum[idx]) / static_cast(num_elements_in_axis); + static_cast(temp_sum[idx]) / static_cast(num_elements_in_axis); float result = std::min(std::round(float_mean * scale + bias) + output_zero_point, static_cast(std::numeric_limits::max())); result = std::max(result, static_cast(std::numeric_limits::min())); diff --git a/compute/cker/include/cker/operation/ReduceMean.h b/compute/cker/include/cker/operation/ReduceMean.h index 2e4fc62..924e850 100644 --- a/compute/cker/include/cker/operation/ReduceMean.h +++ b/compute/cker/include/cker/operation/ReduceMean.h @@ -72,9 +72,9 @@ inline bool ReduceMeanImpl(const In *input_data, const Shape &input_shape, const { size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); size_t output_offset = - ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis); + ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis); output_data[output_offset] = - reducer(output_data[output_offset], input_data[input_offset], normalizer); + reducer(output_data[output_offset], input_data[input_offset], normalizer); } while (NextIndex(input_num_dims, input_dims, input_iter)); return true; } @@ -102,7 +102,7 @@ inline size_t ReduceSumQuantImpl(const In *input_data, const Shape &input_shape, { size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); size_t output_offset = - ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis); + ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis); temp_sum[output_offset] = reducer(temp_sum[output_offset], input_data[input_offset]); } while (NextIndex(input_num_dims, input_dims, input_iter)); return normalizer; @@ -185,8 +185,8 @@ public: } size_t normalizer = - ReduceSumQuantImpl(input_data, input_shape, resolved_axis_data(), num_resolved_axis, - temp_index_data(), reducer, _temp_sum.data()); + ReduceSumQuantImpl(input_data, input_shape, resolved_axis_data(), num_resolved_axis, + temp_index_data(), reducer, _temp_sum.data()); if (num_outputs > 0) { float scale = input_scale / output_scale; @@ -231,6 +231,37 @@ void MeanQ8Asymm(const Shape &input_shape, const In *input_data, float input_sca sum_reducer); } +template +void MeanAxis1And2(const Shape &input_shape, const In *input_data, const Shape &output_shape, + Out *output_data) +{ + UNUSED_RELEASE(output_shape); + assert(input_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + + const int output_batch = output_shape.Dims(0); + const int output_depth = output_shape.Dims(3); + + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + + for (int out_b = 0; out_b < output_batch; ++out_b) + { + for (int out_d = 0; out_d < output_depth; ++out_d) + { + float value = 0; + for (int in_h = 0; in_h < input_height; ++in_h) + { + for (int in_w = 0; in_w < input_width; ++in_w) + { + value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)]; + } + } + output_data[Offset(output_shape, out_b, 0, 0, out_d)] = value / (input_width * input_height); + } + } +} + } // namespace cker } // namespace nnfw diff --git a/compute/cker/include/cker/operation/ResizeBilinear.h b/compute/cker/include/cker/operation/ResizeBilinear.h index 7fc1e91..8d9a749 100644 --- a/compute/cker/include/cker/operation/ResizeBilinear.h +++ b/compute/cker/include/cker/operation/ResizeBilinear.h @@ -62,7 +62,7 @@ inline void ResizeBilinearKernel2x2(int32_t x0, int32_t x1, int32_t y0, int32_t // Bottom right corner. output_data[output_offset + output_x_offset + output_y_offset] = - (output + ((x1y0 + x1y1) / 2)) / 2; + (output + ((x1y0 + x1y1) / 2)) / 2; } } @@ -192,8 +192,8 @@ inline void ResizeBilinearGenericSmallChannel(int32_t batches, int32_t input_hei &x1); int32_t input_offset[4] = { - Offset(input_shape, b, y0, x0, 0), Offset(input_shape, b, y0, x1, 0), - Offset(input_shape, b, y1, x0, 0), Offset(input_shape, b, y1, x1, 0)}; + Offset(input_shape, b, y0, x0, 0), Offset(input_shape, b, y0, x1, 0), + Offset(input_shape, b, y1, x0, 0), Offset(input_shape, b, y1, x1, 0)}; float scale[4] = {(1 - (input_y - y0)) * (1 - (input_x - x0)), (1 - (input_y - y0)) * (input_x - x0), (input_y - y0) * (1 - (input_x - x0)), (input_y - y0) * (input_x - x0)}; @@ -202,8 +202,8 @@ inline void ResizeBilinearGenericSmallChannel(int32_t batches, int32_t input_hei { const T *input_ptr = &input_data[d]; *output_ptr++ = static_cast( - input_ptr[input_offset[0]] * scale[0] + input_ptr[input_offset[1]] * scale[1] + - input_ptr[input_offset[2]] * scale[2] + input_ptr[input_offset[3]] * scale[3]); + input_ptr[input_offset[0]] * scale[0] + input_ptr[input_offset[1]] * scale[1] + + input_ptr[input_offset[2]] * scale[2] + input_ptr[input_offset[3]] * scale[3]); } } } @@ -253,16 +253,16 @@ void ResizeBilinear(ResizeBilinearParams ¶ms, const Shape &input_shape, int32_t depth = MatchingDim(input_shape, 3, output_shape, 3); float height_scale = (params.align_corners && params.output_height > 1) - ? (static_cast(input_height - 1) / (params.output_height - 1)) - : (static_cast(input_height) / params.output_height); + ? (static_cast(input_height - 1) / (params.output_height - 1)) + : (static_cast(input_height) / params.output_height); float width_scale = (params.align_corners && params.output_width > 1) - ? (static_cast(input_width - 1) / (params.output_width - 1)) - : (static_cast(input_width) / params.output_width); + ? (static_cast(input_width - 1) / (params.output_width - 1)) + : (static_cast(input_width) / params.output_width); ResizeBilinearGenericSmallChannel( - batches, input_height, input_width, depth, params.output_height, params.output_width, - height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers); + batches, input_height, input_width, depth, params.output_height, params.output_width, + height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers); } } // namespace cker } // namespace nnfw diff --git a/compute/cker/include/cker/operation/Select.h b/compute/cker/include/cker/operation/Select.h index ab2de94..644fe0a 100644 --- a/compute/cker/include/cker/operation/Select.h +++ b/compute/cker/include/cker/operation/Select.h @@ -34,7 +34,7 @@ void Select(const Shape &input_condition_shape, const D *input_condition_data, const T *input_y_data, const Shape &output_shape, T *output_data) { const int64_t flatsize = - MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape); + MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape); for (int64_t i = 0; i < flatsize; ++i) { output_data[i] = (input_condition_data[i] != 0) ? input_x_data[i] : input_y_data[i]; @@ -101,7 +101,7 @@ void BroadcastSelect4DSlow(const Shape &input_condition_shape, const D *input_co const int x_index = SubscriptToIndex(desc_x, b, y, x, c); const int y_index = SubscriptToIndex(desc_y, b, y, x, c); output_data[Offset(extended_output_shape, b, y, x, c)] = - input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index]; + input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index]; } } } diff --git a/compute/cker/include/cker/operation/Slice.h b/compute/cker/include/cker/operation/Slice.h index a072cff..ef97fd5 100644 --- a/compute/cker/include/cker/operation/Slice.h +++ b/compute/cker/include/cker/operation/Slice.h @@ -43,16 +43,16 @@ inline void Slice(const SliceParams &op_params, const Shape &input_shape, : start_b + op_params.size[0]; const int start_h = begin_count < 3 ? 0 : op_params.begin[begin_count - 3]; const int stop_h = (size_count < 3 || op_params.size[size_count - 3] == -1) - ? input_shape.Dims(1) - : start_h + op_params.size[size_count - 3]; + ? input_shape.Dims(1) + : start_h + op_params.size[size_count - 3]; const int start_w = begin_count < 2 ? 0 : op_params.begin[begin_count - 2]; const int stop_w = (size_count < 2 || op_params.size[size_count - 2] == -1) - ? input_shape.Dims(2) - : start_w + op_params.size[size_count - 2]; + ? input_shape.Dims(2) + : start_w + op_params.size[size_count - 2]; const int start_d = begin_count < 1 ? 0 : op_params.begin[begin_count - 1]; const int stop_d = (size_count < 1 || op_params.size[size_count - 1] == -1) - ? input_shape.Dims(3) - : start_d + op_params.size[size_count - 1]; + ? input_shape.Dims(3) + : start_d + op_params.size[size_count - 1]; for (int in_b = start_b; in_b < stop_b; ++in_b) { diff --git a/compute/cker/include/cker/operation/SoftMax.h b/compute/cker/include/cker/operation/SoftMax.h index 0e0f364..620c1f9 100644 --- a/compute/cker/include/cker/operation/SoftMax.h +++ b/compute/cker/include/cker/operation/SoftMax.h @@ -65,7 +65,7 @@ inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape, const for (int c = 0; c < depth; ++c) { output_data[i * depth + c] = - std::exp((input_data[i * depth + c] - max) * static_cast(params.beta)) / sum; + std::exp((input_data[i * depth + c] - max) * static_cast(params.beta)) / sum; } } } @@ -163,11 +163,11 @@ inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape, if (input_diff >= diff_min) { const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); + input_diff, input_beta_multiplier, input_beta_left_shift); const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); + FixedPointScaledDiff::FromRaw(input_diff_rescaled); sum_of_exps = sum_of_exps + gemmlowp::Rescale( - exp_on_negative_values(scaled_diff_f8)); + exp_on_negative_values(scaled_diff_f8)); } } @@ -178,11 +178,11 @@ inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape, // no later adjustment will be needed. int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; int32_t shifted_sum_minus_one = - static_cast((static_cast(fixed_sum_of_exps) << headroom_plus_one) - - (static_cast(1) << 31)); + static_cast((static_cast(fixed_sum_of_exps) << headroom_plus_one) - + (static_cast(1) << 31)); FixedPoint0 shifted_scale = - one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one)); + one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one)); for (int c = 0; c < depth; ++c) { @@ -190,16 +190,16 @@ inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape, if (input_diff >= diff_min) { const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); + input_diff, input_beta_multiplier, input_beta_left_shift); const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); + FixedPointScaledDiff::FromRaw(input_diff_rescaled); FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); int32_t unsat_output = gemmlowp::RoundingDivideByPOT((shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); output_data[i * depth + c] = static_cast( - std::max(std::min(unsat_output, static_cast(255)), static_cast(0))); + std::max(std::min(unsat_output, static_cast(255)), static_cast(0))); } else { diff --git a/compute/cker/include/cker/operation/SpaceToBatchND.h b/compute/cker/include/cker/operation/SpaceToBatchND.h index feeb358..aff36e2 100644 --- a/compute/cker/include/cker/operation/SpaceToBatchND.h +++ b/compute/cker/include/cker/operation/SpaceToBatchND.h @@ -79,9 +79,9 @@ inline void SpaceToBatchND(const SpaceToBatchParams ¶ms, const Shape &unexte else { const T *in = - input_data + Offset(input_shape, input_batch, - (out_h * block_shape_height + shift_h) - padding_top, - (out_w * block_shape_width + shift_w) - padding_left, 0); + input_data + Offset(input_shape, input_batch, + (out_h * block_shape_height + shift_h) - padding_top, + (out_w * block_shape_width + shift_w) - padding_left, 0); memcpy(out, in, depth * sizeof(T)); } } diff --git a/compute/cker/include/cker/operation/StatelessRandomUniform.h b/compute/cker/include/cker/operation/StatelessRandomUniform.h index d5952ae..cdd812a 100644 --- a/compute/cker/include/cker/operation/StatelessRandomUniform.h +++ b/compute/cker/include/cker/operation/StatelessRandomUniform.h @@ -95,7 +95,7 @@ inline void StatelessRandomUniform(const Shape &shape_shape, const int *shape_da GenerateKey(seed_t, &key, &counter); Fill>( - random::PhiloxRandom(counter, key), &output_t); + random::PhiloxRandom(counter, key), &output_t); } } // namespace cker } // namespace nnfw diff --git a/compute/cker/include/cker/operation/Tile.h b/compute/cker/include/cker/operation/Tile.h index 1dcdd9b..4243346 100644 --- a/compute/cker/include/cker/operation/Tile.h +++ b/compute/cker/include/cker/operation/Tile.h @@ -55,7 +55,7 @@ std::pair TileOneDimension(const Shape &in_dimensions, const T *in_dat { int stride_size = 0, tiled_stride_size = 0; std::tie(stride_size, tiled_stride_size) = - TileOneDimension(in_dimensions, copy_from_data, multipliers, copy_to_data, dimension + 1); + TileOneDimension(in_dimensions, copy_from_data, multipliers, copy_to_data, dimension + 1); copy_from_data += stride_size; copy_to_data += tiled_stride_size; total_stride_size += stride_size; diff --git a/compute/cker/include/cker/operation/Transpose.h b/compute/cker/include/cker/operation/Transpose.h index 9d8cd34..62eb432 100644 --- a/compute/cker/include/cker/operation/Transpose.h +++ b/compute/cker/include/cker/operation/Transpose.h @@ -555,9 +555,9 @@ void Transpose(const TransposeParams &unshrunk_params, const Shape &unshrunk_inp const int total_size = shrunk_input_shape.FlatSize(); const int non_flatten_size = - Flatten(shrunk_input_shape, shrunk_output_shape, shrunk_params, + Flatten(shrunk_input_shape, shrunk_output_shape, shrunk_params, - &non_flatten_input_shape, &non_flatten_output_shape, &non_flatten_params); + &non_flatten_input_shape, &non_flatten_output_shape, &non_flatten_params); assert(non_flatten_params.perm[0] != 0); for (int i = 0; i < total_size; i += non_flatten_size) diff --git a/compute/cker/include/cker/operation/TransposeConv.h b/compute/cker/include/cker/operation/TransposeConv.h index 7db3a11..d41f860 100644 --- a/compute/cker/include/cker/operation/TransposeConv.h +++ b/compute/cker/include/cker/operation/TransposeConv.h @@ -90,11 +90,11 @@ inline void TransposeConv(const TransposeConvParams ¶ms, const Shape &input_ (out_y < output_height)) { float input_value = - input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; - float filter_value = filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; + input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; + float filter_value = + filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)]; output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] += - input_value * filter_value; + input_value * filter_value; } } } diff --git a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h index 912b01a..8c1d31b 100644 --- a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h +++ b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h @@ -130,12 +130,12 @@ inline int32_t quant8_sum(const BinaryArithmeticOpParam ¶ms, const uint8_t i const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); + shifted_input1_val, params.input1_multiplier, params.input1_shift); const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); + shifted_input2_val, params.input2_multiplier, params.input2_shift); const int32_t raw_sum = scaled_input1_val + scaled_input2_val; const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sum, params.output_multiplier, params.output_shift) + + raw_sum, params.output_multiplier, params.output_shift) + params.output_offset; const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); @@ -192,9 +192,9 @@ inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam ¶ms const int16x4_t s1_narrowed = vmovn_s32(s1); const int16x4_t s2_narrowed = vmovn_s32(s2); const int16x8_t s = - vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset)); - const uint8x8_t clamped = vmax_u8(output_activation_min_vector, - vmin_u8(output_activation_max_vector, vqmovun_s16(s))); + vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset)); + const uint8x8_t clamped = + vmax_u8(output_activation_min_vector, vmin_u8(output_activation_max_vector, vqmovun_s16(s))); vst1_u8(output_data + i, clamped); } #endif // NEON @@ -205,12 +205,12 @@ inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam ¶ms const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); + shifted_input1_val, params.input1_multiplier, params.input1_shift); const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); + shifted_input2_val, params.input2_multiplier, params.input2_shift); const int32_t raw_sum = scaled_input1_val + scaled_input2_val; const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sum, params.output_multiplier, params.output_shift) + + raw_sum, params.output_multiplier, params.output_shift) + params.output_offset; const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); @@ -387,7 +387,7 @@ inline void BinaryOpElementwise(int size, const BinaryArithmeticOpParam ¶ms, auto a2 = vld1q_f32(input2_data + i); auto x = OPERATOR::calculate(a1, a2); // vaddq auto x_clamped = - ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max); + ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max); vst1q_f32(output_data + i, x_clamped); } #endif // USE_NEON @@ -395,7 +395,7 @@ inline void BinaryOpElementwise(int size, const BinaryArithmeticOpParam ¶ms, { auto x = OPERATOR::calculate(input1_data[i], input2_data[i]); output_data[i] = ACTIVATION::applyCeiling( - ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max); + ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max); } } @@ -441,7 +441,7 @@ inline void BinaryOpScalarBroadcast(int size, const BinaryArithmeticOpParam &par auto a2 = vld1q_f32(input2_data + i); auto x = OPERATOR::calculate(broadcast_value_dup, a2); auto x_clamped = - ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max); + ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max); vst1q_f32(output_data + i, x_clamped); } #endif // USE_NEON @@ -449,13 +449,13 @@ inline void BinaryOpScalarBroadcast(int size, const BinaryArithmeticOpParam &par { auto x = OPERATOR::calculate(broadcast_value, input2_data[i]); output_data[i] = ACTIVATION::applyCeiling( - ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max); + ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max); } } using BinaryOpImplFloatFuncs = - std::pair; + std::pair; template inline BinaryOpImplFloatFuncs @@ -514,23 +514,22 @@ inline void BroadcastAddDispatchQuant8(const BinaryArithmeticOpParam ¶ms, if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) { const std::function - fn = [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a, - const uint8_t &b) -> uint8_t { + fn = + [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a, const uint8_t &b) -> uint8_t { return static_cast(quant8_sum(params, a, b)); }; - reference::BroadcastBinaryArithmeticOpSlowQuant8(params, input1_shape, input1_data, - input2_shape, input2_data, output_shape, - output_data, fn); + reference::BroadcastBinaryArithmeticOpSlowQuant8( + params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn); } else { BinaryBroadcastFiveFold( - params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast, - input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, - static_cast(AddElementwiseQuant8), - static_cast(AddScalarBroadcastQuant8)); + params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast, + input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, + static_cast(AddElementwiseQuant8), + static_cast(AddScalarBroadcastQuant8)); } } @@ -542,7 +541,7 @@ inline void BroadcastAddDispatch(const BinaryArithmeticOpParam ¶ms, const Sh if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) { const std::function fn = - [](const float &a, const float &b) -> float { return a + b; }; + [](const float &a, const float &b) -> float { return a + b; }; reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn); } @@ -550,10 +549,10 @@ inline void BroadcastAddDispatch(const BinaryArithmeticOpParam ¶ms, const Sh { auto implFuncs = getBinaryOpWithActivationImplFloat(params); - BinaryBroadcastFiveFold(params, params.broadcast_category == - BroadcastableOpCategory::kSecondInputBroadcastsFast, - input1_shape, input1_data, input2_shape, input2_data, output_shape, - output_data, implFuncs.first, implFuncs.second); + BinaryBroadcastFiveFold( + params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast, + input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, + implFuncs.first, implFuncs.second); } } @@ -580,14 +579,14 @@ inline void BroadcastSubDispatch(const BinaryArithmeticOpParam ¶ms, const Sh else if (params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast) { auto implFuncs = - getBinaryOpWithActivationImplFloat>(params); + getBinaryOpWithActivationImplFloat>(params); BinaryBroadcastFiveFold(params, true, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, implFuncs.first, implFuncs.second); } else { const std::function fn = - [](const float &a, const float &b) -> float { return a - b; }; + [](const float &a, const float &b) -> float { return a - b; }; reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn); } @@ -599,11 +598,11 @@ inline int32_t quant8_mul(const BinaryArithmeticOpParam ¶ms, const uint8_t i const int32_t input1_val = params.input1_offset + input1_data; const int32_t input2_val = params.input2_offset + input2_data; const int32_t unclamped_result = - params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val, - params.output_multiplier, - params.output_shift); + params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val, + params.output_multiplier, + params.output_shift); const int32_t clamped_output = std::min( - params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); + params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); return clamped_output; } @@ -652,8 +651,8 @@ inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam ¶ms const auto p1_narrowed = vqmovn_s32(p1); const auto p2_narrowed = vqmovn_s32(p2); const auto p = vaddq_s16(vcombine_s16(p1_narrowed, p2_narrowed), output_offset_vector); - const auto clamped = vmax_u8(output_activation_min_vector, - vmin_u8(output_activation_max_vector, vqmovun_s16(p))); + const auto clamped = + vmax_u8(output_activation_min_vector, vmin_u8(output_activation_max_vector, vqmovun_s16(p))); vst1_u8(output_data + i, clamped); } #endif // NEON @@ -663,12 +662,11 @@ inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam ¶ms const int32_t input1_val = params.input1_offset + input1_data[i]; const int32_t input2_val = params.input2_offset + input2_data[i]; const int32_t unclamped_result = - params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val, - params.output_multiplier, - params.output_shift); - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, unclamped_result)); + params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val, + params.output_multiplier, + params.output_shift); + const int32_t clamped_output = std::min( + params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); output_data[i] = static_cast(clamped_output); } } @@ -711,22 +709,21 @@ inline void BroadcastMulDispatchQuant8(const BinaryArithmeticOpParam ¶ms, if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) { const std::function - fn = [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a, - const uint8_t &b) -> uint8_t { + fn = + [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a, const uint8_t &b) -> uint8_t { return static_cast(quant8_mul(params, a, b)); }; - reference::BroadcastBinaryArithmeticOpSlowQuant8(params, input1_shape, input1_data, - input2_shape, input2_data, output_shape, - output_data, fn); + reference::BroadcastBinaryArithmeticOpSlowQuant8( + params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn); return; } BinaryBroadcastFiveFold( - params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast, - input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, - static_cast(MulElementwiseQuant8), - static_cast(MulSimpleBroadcastQuant8)); + params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast, + input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, + static_cast(MulElementwiseQuant8), + static_cast(MulSimpleBroadcastQuant8)); } inline void BroadcastMulDispatch(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, @@ -738,16 +735,16 @@ inline void BroadcastMulDispatch(const BinaryArithmeticOpParam ¶ms, const Sh { // TODO: Use GetBinaryArithmeticFn const std::function fn = - [](const float &a, const float &b) -> float { return a * b; }; + [](const float &a, const float &b) -> float { return a * b; }; reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn); return; } auto implFuncs = getBinaryOpWithActivationImplFloat(params); - BinaryBroadcastFiveFold(params, params.broadcast_category == - BroadcastableOpCategory::kSecondInputBroadcastsFast, - input1_shape, input1_data, input2_shape, input2_data, output_shape, - output_data, implFuncs.first, implFuncs.second); + BinaryBroadcastFiveFold( + params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast, + input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, + implFuncs.first, implFuncs.second); } inline void Div(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, @@ -760,7 +757,7 @@ inline void Div(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape (*implFuncs.first)(flat_size, params, input1_data, input2_data, output_data); #else const std::function fn = - [](const float &a, const float &b) -> float { return a / b; }; + [](const float &a, const float &b) -> float { return a / b; }; reference::BinaryArithmeticOp(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn); #endif // __aarch64__ @@ -781,7 +778,7 @@ inline void BroadcastDivDispatch(const BinaryArithmeticOpParam ¶ms, const Sh else if (params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast) { auto implFuncs = - getBinaryOpWithActivationImplFloat>(params); + getBinaryOpWithActivationImplFloat>(params); BinaryBroadcastFiveFold(params, true, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, implFuncs.first, implFuncs.second); } @@ -789,7 +786,7 @@ inline void BroadcastDivDispatch(const BinaryArithmeticOpParam ¶ms, const Sh #endif // __aarch64__ { const std::function fn = - [](const float &a, const float &b) -> float { return a / b; }; + [](const float &a, const float &b) -> float { return a / b; }; reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn); } diff --git a/compute/cker/include/cker/operation/optimized/Conv.h b/compute/cker/include/cker/operation/optimized/Conv.h index 0f62014..26fc443 100644 --- a/compute/cker/include/cker/operation/optimized/Conv.h +++ b/compute/cker/include/cker/operation/optimized/Conv.h @@ -48,7 +48,7 @@ struct GemmlowpOutputPipeline typedef std::tuple, gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent, gemmlowp::OutputStageClamp, gemmlowp::OutputStageSaturatingCastToUint8> - Pipeline; + Pipeline; static Pipeline MakeExp(const int32_t *bias_data, int output_rows, int32_t output_offset, int32_t output_multiplier, int output_left_shift, int32_t output_activation_min, int32_t output_activation_max) @@ -106,7 +106,7 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const uint8 const int filter_height = filter_shape.Dims(1); const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1; const bool need_im2col = - stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1; + stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1; if (need_dilated_im2col) { assert(im2col_data); @@ -141,7 +141,7 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const uint8 // the other calls commented out. This is a partial rollback of cl/196819423. // const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_shape, 3); const int gemm_input_cols = - gemm_input_shape->Dims(0) * gemm_input_shape->Dims(1) * gemm_input_shape->Dims(2); + gemm_input_shape->Dims(0) * gemm_input_shape->Dims(1) * gemm_input_shape->Dims(2); const int filter_rows = filter_shape.Dims(0); // See b/79927784. // const int filter_cols = FlatSizeSkipDim(filter_shape, 0); @@ -156,17 +156,17 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const uint8 assert(bias_shape.FlatSize() == output_rows); UNUSED_RELEASE(bias_shape); gemmlowp::MatrixMap filter_matrix( - filter_data, filter_rows, filter_cols); + filter_data, filter_rows, filter_cols); gemmlowp::MatrixMap input_matrix( - gemm_input_data, gemm_input_rows, gemm_input_cols); + gemm_input_data, gemm_input_rows, gemm_input_cols); gemmlowp::MatrixMap output_matrix(output_data, output_rows, output_cols); const auto &output_pipeline = - GemmlowpOutputPipeline::MakeExp(bias_data, output_rows, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max); + GemmlowpOutputPipeline::MakeExp(bias_data, output_rows, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max); gemmlowp::GemmWithOutputPipeline( - gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, input_offset, - output_pipeline); + gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, input_offset, + output_pipeline); } } // namespace optimized @@ -202,10 +202,10 @@ public: T *output_data, int output_height, int output_width) { const bool is_1x1_kernel = - (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1); + (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1); const bool is_same_height_width = - (filter_height == input_height && filter_width == input_width && pad_width == 0 && - pad_height == 0); + (filter_height == input_height && filter_width == input_width && pad_width == 0 && + pad_height == 0); if (is_1x1_kernel || is_same_height_width) { // is_1x1_kernel: For 1x1 kernel, the 2D convolution is reduced to matrix multiplication. diff --git a/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h b/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h new file mode 100644 index 0000000..d439793 --- /dev/null +++ b/compute/cker/include/cker/operation/optimized/DepthwiseConvFloat.h @@ -0,0 +1,1250 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_FLOAT_H__ +#define __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_FLOAT_H__ + +#include "cker/Shape.h" +#include "cker/Types.h" +#include "cker/Utils.h" +#include "cker/neon/neon_check.h" + +namespace nnfw +{ +namespace cker +{ +namespace optimized +{ + +// Implementation of float DepthwiseConv + +template +struct FloatDepthwiseConvKernel +{ +}; + +#ifdef USE_NEON + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + (void)input_ptr_increment; + // Load the filters + float32x4_t filter[2]; + for (int i = 0; i < 2; i++) + { + filter[i] = vld1q_f32(filter_ptr + 4 * i); + } + int outp = 0; + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) + { + // Load the inputs + float32x4_t input[4]; + for (int i = 0; i < 4; i++) + { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + input_ptr += 16; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlaq_f32(acc[0], input[0], filter[0]); + acc[1] = vmlaq_f32(acc[1], input[1], filter[1]); + acc[2] = vmlaq_f32(acc[2], input[2], filter[0]); + acc[3] = vmlaq_f32(acc[3], input[3], filter[1]); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) + { + // Load the inputs + float32x4_t input[2]; + for (int i = 0; i < 2; i++) + { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + input_ptr += 8; + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) + { + acc[i] = vmlaq_f32(acc[i], input[i], filter[i]); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + (void)input_ptr_increment; + + const float32x2_t filters = vld1_f32(filter_ptr); + const float32x4_t filters_dup2 = vcombine_f32(filters, filters); + int outp = 0; + // Handle 8 output pixels at a time. + for (; outp <= num_output_pixels - 8; outp += 8) + { + // Load the inputs + float32x4_t input[4]; + for (int i = 0; i < 4; i++) + { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + input_ptr += 16; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 4; i++) + { + acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle 4 output pixels at a time. + for (; outp <= num_output_pixels - 4; outp += 4) + { + // Load the inputs + float32x4_t input[2]; + for (int i = 0; i < 2; i++) + { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + input_ptr += 8; + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) + { + acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + // Handle 2 output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) + { + // Load the inputs + const float32x4_t input = vld1q_f32(input_ptr); + input_ptr += 4; + // Load the accumulators from acc_buffer + float32x4_t acc = vld1q_f32(acc_buffer_ptr); + // Multiply-accumulate + acc = vmlaq_f32(acc, input, filters_dup2); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + // Handle 1 output pixel at a time + for (; outp < num_output_pixels; outp++) + { + // Load the inputs + const float32x2_t input = vld1_f32(input_ptr); + input_ptr += 2; + // Load the accumulators from acc_buffer + float32x2_t acc = vld1_f32(acc_buffer_ptr); + // Multiply-accumulate + acc = vmla_f32(acc, input, filters); + // Store the accumulators back to acc_buffer + vst1_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 2; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)depth_multiplier; + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + const float *local_filter_ptr = filter_ptr; + const float *local_input_ptr = input_ptr; + int ic = 0; + // Handle 16 input channels at a time. + for (; ic <= input_depth - 16; ic += 16) + { + // Load the filters + float32x4_t filter_0 = vld1q_f32(local_filter_ptr + 4 * 0); + float32x4_t filter_1 = vld1q_f32(local_filter_ptr + 4 * 1); + float32x4_t filter_2 = vld1q_f32(local_filter_ptr + 4 * 2); + float32x4_t filter_3 = vld1q_f32(local_filter_ptr + 4 * 3); + local_filter_ptr += 16; + // Load the inputs + float32x4_t input_0 = vld1q_f32(local_input_ptr + 4 * 0); + float32x4_t input_1 = vld1q_f32(local_input_ptr + 4 * 1); + float32x4_t input_2 = vld1q_f32(local_input_ptr + 4 * 2); + float32x4_t input_3 = vld1q_f32(local_input_ptr + 4 * 3); + local_input_ptr += 16; + // Load the accumulators from acc_buffer + float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0); + float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1); + float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2); + float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3); + // Multiply-accumulate + acc_0 = vmlaq_f32(acc_0, input_0, filter_0); + acc_1 = vmlaq_f32(acc_1, input_1, filter_1); + acc_2 = vmlaq_f32(acc_2, input_2, filter_2); + acc_3 = vmlaq_f32(acc_3, input_3, filter_3); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3); + acc_buffer_ptr += 16; + } + // Handle 4 input channels at a time. + for (; ic <= input_depth - 4; ic += 4) + { + // Load the filters + float32x4_t filter; + filter = vld1q_f32(local_filter_ptr); + local_filter_ptr += 4; + // Load the inputs + float32x4_t input; + input = vld1q_f32(local_input_ptr); + local_input_ptr += 4; + // Load the accumulators from acc_buffer + float32x4_t acc; + acc = vld1q_f32(acc_buffer_ptr); + // Multiply-accumulate + acc = vmlaq_f32(acc, input, filter); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) + { + const float input_val = *local_input_ptr++; + const float filter_val = *local_filter_ptr++; + *acc_buffer_ptr++ += filter_val * input_val; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)depth_multiplier; + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + const float *local_filter_ptr = filter_ptr; + const float *local_input_ptr = input_ptr; + int ic = 0; + // Handle 2 input channels at a time. + for (; ic <= input_depth - 2; ic += 2) + { + // Load the filters + float32x4_t filter[4]; + for (int i = 0; i < 4; i++) + { + filter[i] = vld1q_f32(local_filter_ptr + 4 * i); + } + local_filter_ptr += 16; + // Load the inputs + const float32x2_t input = vld1_f32(local_input_ptr); + local_input_ptr += 2; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlaq_lane_f32(acc[0], filter[0], input, 0); + acc[1] = vmlaq_lane_f32(acc[1], filter[1], input, 0); + acc[2] = vmlaq_lane_f32(acc[2], filter[2], input, 1); + acc[3] = vmlaq_lane_f32(acc[3], filter[3], input, 1); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) + { + // Load the filters + float32x4_t filter[2]; + for (int i = 0; i < 2; i++) + { + filter[i] = vld1q_f32(local_filter_ptr + 4 * i); + } + local_filter_ptr += 8; + // Load the inputs + const float input_val = *local_input_ptr++; + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) + { + acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + input_ptr += input_ptr_increment; + } + } +}; + +// Note this implementation is very slow for input_depths < 8 +// (e.g. comparable to reference implementation) see, specializations for +// input_depth=3 below. +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)depth_multiplier; + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + const float *local_filter_ptr = filter_ptr; + const float *local_input_ptr = input_ptr; + int ic = 0; + // Handle 8 input channels at a time. + for (; ic <= input_depth - 8; ic += 8) + { + // Load the filters + float32x4_t filter[4]; + for (int i = 0; i < 4; i++) + { + filter[i] = vld1q_f32(local_filter_ptr + 4 * i); + } + local_filter_ptr += 16; + // Load the inputs + float32x4x2_t input_dup2[2]; + for (int i = 0; i < 2; i++) + { + const float32x4_t input = vld1q_f32(local_input_ptr + 4 * i); + input_dup2[i] = vzipq_f32(input, input); + } + local_input_ptr += 8; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + acc[0] = vmlaq_f32(acc[0], filter[0], input_dup2[0].val[0]); + acc[1] = vmlaq_f32(acc[1], filter[1], input_dup2[0].val[1]); + acc[2] = vmlaq_f32(acc[2], filter[2], input_dup2[1].val[0]); + acc[3] = vmlaq_f32(acc[3], filter[3], input_dup2[1].val[1]); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + // Handle 4 input channels at a time. + for (; ic <= input_depth - 4; ic += 4) + { + // Load the filters + float32x2_t filter[4]; + for (int i = 0; i < 4; i++) + { + filter[i] = vld1_f32(local_filter_ptr + 2 * i); + } + local_filter_ptr += 8; + // Load the inputs + const float32x4_t input = vld1q_f32(local_input_ptr); + local_input_ptr += 4; + // Load the accumulators from acc_buffer + float32x2_t acc[4]; + for (int i = 0; i < 4; i++) + { + acc[i] = vld1_f32(acc_buffer_ptr + 2 * i); + } + // Multiply-accumulate + acc[0] = vmla_lane_f32(acc[0], filter[0], vget_low_f32(input), 0); + acc[1] = vmla_lane_f32(acc[1], filter[1], vget_low_f32(input), 1); + acc[2] = vmla_lane_f32(acc[2], filter[2], vget_high_f32(input), 0); + acc[3] = vmla_lane_f32(acc[3], filter[3], vget_high_f32(input), 1); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) + { + vst1_f32(acc_buffer_ptr + 2 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + // Handle 2 input channels at a time. + for (; ic <= input_depth - 2; ic += 2) + { + // Load the filters + const float32x4_t filter = vld1q_f32(local_filter_ptr); + local_filter_ptr += 4; + // Load the inputs + const float32x2_t input = vld1_f32(local_input_ptr); + local_input_ptr += 2; + // Load the accumulators from acc_buffer + float32x2_t acc[2]; + for (int i = 0; i < 2; i++) + { + acc[i] = vld1_f32(acc_buffer_ptr + 2 * i); + } + // Multiply-accumulate + acc[0] = vmla_lane_f32(acc[0], vget_low_f32(filter), input, 0); + acc[1] = vmla_lane_f32(acc[1], vget_high_f32(filter), input, 1); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) + { + vst1_f32(acc_buffer_ptr + 2 * i, acc[i]); + } + acc_buffer_ptr += 4; + } + // Handle one input channel at a time. + for (; ic < input_depth; ic++) + { + // Load the inputs + const float input_val = *local_input_ptr++; + // Multiply-accumulate + for (int i = 0; i < 2; i++) + { + acc_buffer_ptr[i] += local_filter_ptr[i] * input_val; + } + local_filter_ptr += 2; + acc_buffer_ptr += 2; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + + // Load the filters + float32x2_t filter[3]; + for (int i = 0; i < 3; i++) + { + filter[i] = vld1_f32(filter_ptr + 2 * i); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + const float32x2_t input01 = vld1_f32(input_ptr); + const float32x2_t input2 = vld1_dup_f32(input_ptr + 2); + // Load the accumulators from acc_buffer + float32x2_t acc[3]; + for (int i = 0; i < 3; i++) + { + acc[i] = vld1_f32(acc_buffer_ptr + 2 * i); + } + // Multiply-accumulate for each input channel there 2 outputs + acc[0] = vmla_lane_f32(acc[0], filter[0], input01, 0); + acc[1] = vmla_lane_f32(acc[1], filter[1], input01, 1); + acc[2] = vmla_lane_f32(acc[2], filter[2], input2, 0); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 3; i++) + { + vst1_f32(acc_buffer_ptr + 2 * i, acc[i]); + } + acc_buffer_ptr += 6; + input_ptr += input_ptr_increment; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + + // Load the filters + float32x4_t filter[3]; + for (int i = 0; i < 3; i++) + { + filter[i] = vld1q_f32(filter_ptr + 4 * i); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + // NOTE: we only want 3 values, so we read it as two ops where + // the second op just duplicates the lane + const float32x2_t input01 = vld1_f32(input_ptr); + const float32x2_t input2 = vld1_dup_f32(input_ptr + 2); + // Load the accumulators from acc_buffer + float32x4_t acc[3]; + for (int i = 0; i < 3; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate all outputs. + acc[0] = vmlaq_lane_f32(acc[0], filter[0], input01, 0); + acc[1] = vmlaq_lane_f32(acc[1], filter[1], input01, 1); + acc[2] = vmlaq_lane_f32(acc[2], filter[2], input2, 0); + // Store the accumulators back to acc_buffer + for (int i = 0; i < 3; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 12; + input_ptr += input_ptr_increment; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + + // Load the filters + float32x4_t filter[2]; + for (int i = 0; i < 2; i++) + { + filter[i] = vld1q_f32(filter_ptr + 4 * i); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + // Load the inputs + const float input_val = *input_ptr; + input_ptr += input_ptr_increment; + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) + { + acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + + // Load the filters + float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0); + float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1); + float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2); + float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3); + float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4); + float32x4_t filter_5 = vld1q_f32(filter_ptr + 4 * 5); + float32x4_t filter_6 = vld1q_f32(filter_ptr + 4 * 6); + float32x4_t filter_7 = vld1q_f32(filter_ptr + 4 * 7); + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + // Load the inputs + const float input_val = *input_ptr; + input_ptr += input_ptr_increment; + // Load the accumulators from acc_buffer + float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0); + float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1); + float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2); + float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3); + float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4); + float32x4_t acc_5 = vld1q_f32(acc_buffer_ptr + 4 * 5); + float32x4_t acc_6 = vld1q_f32(acc_buffer_ptr + 4 * 6); + float32x4_t acc_7 = vld1q_f32(acc_buffer_ptr + 4 * 7); + // Multiply-accumulate + acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val); + acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val); + acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val); + acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val); + acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val); + acc_5 = vmlaq_n_f32(acc_5, filter_5, input_val); + acc_6 = vmlaq_n_f32(acc_6, filter_6, input_val); + acc_7 = vmlaq_n_f32(acc_7, filter_7, input_val); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3); + vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4); + vst1q_f32(acc_buffer_ptr + 4 * 5, acc_5); + vst1q_f32(acc_buffer_ptr + 4 * 6, acc_6); + vst1q_f32(acc_buffer_ptr + 4 * 7, acc_7); + acc_buffer_ptr += 32; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + + // Load the filters + float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0); + float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1); + float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2); + float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3); + float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4); + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + // Load the inputs + const float input_val = *input_ptr; + input_ptr += input_ptr_increment; + // Load the accumulators from acc_buffer + float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0); + float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1); + float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2); + float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3); + float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4); + // Multiply-accumulate + acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val); + acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val); + acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val); + acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val); + acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3); + vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4); + acc_buffer_ptr += 20; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)depth_multiplier; + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + const float *local_filter_ptr = filter_ptr; + const float *local_input_ptr = input_ptr; + for (int ic = 0; ic < input_depth; ic++) + { + // Load the filters + float32x4_t filter[4]; + for (int i = 0; i < 4; i++) + { + filter[i] = vld1q_f32(local_filter_ptr + 4 * i); + } + local_filter_ptr += 16; + // Load the inputs + const float input_val = *local_input_ptr++; + // Load the accumulators from acc_buffer + float32x4_t acc[4]; + for (int i = 0; i < 4; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 4; i++) + { + acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 4; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 16; + } + input_ptr += input_ptr_increment; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + + // Load the filters + float32x4_t filter[2]; + for (int i = 0; i < 2; i++) + { + filter[i] = vld1q_f32(filter_ptr + 4 * i); + } + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + // Load the inputs + float32x4_t input[2]; + for (int i = 0; i < 2; i++) + { + input[i] = vld1q_f32(input_ptr + 4 * i); + } + // Load the accumulators from acc_buffer + float32x4_t acc[2]; + for (int i = 0; i < 2; i++) + { + acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i); + } + // Multiply-accumulate + for (int i = 0; i < 2; i++) + { + acc[i] = vmlaq_f32(acc[i], input[i], filter[i]); + } + // Store the accumulators back to acc_buffer + for (int i = 0; i < 2; i++) + { + vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]); + } + acc_buffer_ptr += 8; + input_ptr += input_ptr_increment; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + + float32x2_t filter = vld1_f32(filter_ptr); + float32x4_t filter_x4 = vcombine_f32(filter, filter); + int outp = 0; + + // Handle two output pixels at a time. + for (; outp <= num_output_pixels - 2; outp += 2) + { + // Load the inputs + float32x2_t input_1 = vld1_f32(input_ptr); + input_ptr += input_ptr_increment; + float32x2_t input_2 = vld1_f32(input_ptr); + input_ptr += input_ptr_increment; + float32x4_t input = vcombine_f32(input_1, input_2); + + // Load the accumulators from acc_buffer + float32x4_t acc = vld1q_f32(acc_buffer_ptr); + + // Multiply-accumulate + acc = vmlaq_f32(acc, input, filter_x4); + + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + } + // Handle one output pixel at a time. + for (; outp < num_output_pixels; outp++) + { + // Load the inputs + float32x2_t input = vld1_f32(input_ptr); + input_ptr += input_ptr_increment; + + // Load the accumulators from acc_buffer + float32x2_t acc = vld1_f32(acc_buffer_ptr); + + // Multiply-accumulate + acc = vmla_f32(acc, input, filter); + + // Store the accumulators back to acc_buffer + vst1_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 2; + } + } +}; + +template <> struct FloatDepthwiseConvKernel +{ + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float *input_ptr, int input_ptr_increment, const float *filter_ptr, + float *acc_buffer_ptr) + { + (void)input_depth; + (void)depth_multiplier; + + float32x4_t filter = vld1q_f32(filter_ptr); + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) + { + // Load the inputs + float32x4_t input = vld1q_f32(input_ptr); + // Load the accumulators from acc_buffer + float32x4_t acc = vld1q_f32(acc_buffer_ptr); + // Multiply-accumulate + acc = vmlaq_f32(acc, input, filter); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr, acc); + acc_buffer_ptr += 4; + input_ptr += input_ptr_increment; + } + } +}; +#endif + +// Accumulates the effect of one row of the filter, on a segment of one row +// of the output, accessing the corresponding one row of the input. +template +void FloatDepthwiseConvAccumRow(int stride, int dilation_factor, int input_depth, int input_width, + const float *input_data, int pad_width, int depth_multiplier, + int filter_width, const float *filter_data, int out_x_buffer_start, + int out_x_buffer_end, int output_depth, float *acc_buffer) +{ + // Sanity check parameters. This is important in particular to ensure + // that we keep the number of template instantiations minimal, so we don't + // increase binary size unnecessarily. + static_assert(kFixedDepthMultiplier || !kFixedInputDepth, ""); + static_assert(kFixedInputDepth || kAllowStrided, ""); + assert(stride == 1 || kAllowStrided); + if (kFixedInputDepth) + { + assert(input_depth == kFixedInputDepth); + } + if (kFixedDepthMultiplier) + { + assert(depth_multiplier == kFixedDepthMultiplier); + } + assert(output_depth == input_depth * depth_multiplier); + const int input_ptr_increment = stride * input_depth; + const float *filter_base_ptr = filter_data; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + // For the current (filter_x, filter_y) point in the filter, + // compute the boundaries of the corresponding output row segment. + int out_x_loop_start_unclamped = 0; + int out_x_loop_end_unclamped = 0; + if (kAllowStrided) + { + if (stride == 2) + { + out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 1) / 2; + out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 1) / 2; + } + else if (stride == 4) + { + out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 3) / 4; + out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 3) / 4; + } + else + { + out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + stride - 1) / stride; + out_x_loop_end_unclamped = + (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride; + } + } + else + { + out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x; + out_x_loop_end_unclamped = pad_width + input_width - dilation_factor * filter_x; + } + // The kernel will have to iterate on the segment of the + // output row that starts at out_x_loop_start and out_x_loop_end. + const int out_x_loop_start = std::max(out_x_buffer_start, out_x_loop_start_unclamped); + const int out_x_loop_end = std::min(out_x_buffer_end, out_x_loop_end_unclamped); + + float *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; + const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x; + const float *input_ptr = input_data + in_x_origin * input_depth; + const int num_output_pixels = out_x_loop_end - out_x_loop_start; + FloatDepthwiseConvKernel::Run( + num_output_pixels, input_depth, depth_multiplier, input_ptr, input_ptr_increment, + filter_base_ptr, acc_buffer_ptr); + filter_base_ptr += output_depth; + } +} + +// generic fallback of FloatDepthwiseConvAccumRow, portable, non-templatized. +inline void FloatDepthwiseConvAccumRowGeneric(int stride, int dilation_factor, int input_depth, + int input_width, const float *input_data, + int pad_width, int depth_multiplier, int filter_width, + const float *filter_data, int out_x_buffer_start, + int out_x_buffer_end, int output_depth, + float *acc_buffer) +{ + const float *filter_base_ptr = filter_data; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int out_x_loop_start = + std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride); + const int out_x_loop_end = + std::min(out_x_buffer_end, + (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride); + + float *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; + const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x; + const float *input_ptr = input_data + in_x_origin * input_depth; + const int input_ptr_increment = (stride - 1) * input_depth; + for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++) + { + const float *filter_ptr = filter_base_ptr; + for (int ic = 0; ic < input_depth; ++ic) + { + const float input_val = *input_ptr++; + for (int m = 0; m < depth_multiplier; m++) + { + const float filter_val = *filter_ptr++; + *acc_buffer_ptr++ += filter_val * input_val; + } + } + input_ptr += input_ptr_increment; + } + filter_base_ptr += output_depth; + } +} + +// Initializes the accumulator buffer with bias values. +inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth, + const float *bias_data, float *acc_buffer) +{ + // TODO(benoitjacob): This might need optimized specializations + // for small output_depth values, if that ever becomes an important + // case (like it was for some quantized DepthwiseConv cases). + for (int i = 0; i < num_output_pixels; i++) + { + memcpy(acc_buffer + i * output_depth, bias_data, sizeof(acc_buffer[0]) * output_depth); + } +} + +// DepthwiseConv can run with multi threads on the dim specified by thread_dim. +// Each thread processes output elements on dim, thread_dim, in the range of +// [thread_start, thread_end). +// For example, assume thread_start = 2, thread_end = 6, and thread_dim = 1, it +// means that it will calculate DepthwiseConv for output_data[:, 2:5, :, :]. +inline void DepthwiseConvImpl(const DepthwiseConvParams ¶ms, const Shape &input_shape, + const float *input_data, const Shape &filter_shape, + const float *filter_data, const Shape &bias_shape, + const float *bias_data, const Shape &output_shape, float *output_data, + int thread_start, int thread_end, int thread_dim) +{ + UNUSED_RELEASE(bias_shape); + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + assert(input_shape.DimensionsCount() == 4); + assert(filter_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + assert(thread_dim == 0 || thread_dim == 1); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + assert(output_depth == input_depth * depth_multiplier); + assert(bias_shape.FlatSize() == output_depth); + + static const int kAccBufferMaxSize = 4832; + float acc_buffer[kAccBufferMaxSize]; + assert(kAccBufferMaxSize >= output_depth); + const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth; + const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth; + assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize); + assert(kAccBufferActualSize <= kAccBufferMaxSize); + assert(kOutputPixelsInAccBuffer >= 1); + + UNUSED_RELEASE(kAccBufferActualSize); + + // row_accum_func will point to the core accumulation function to be used + // for this DepthwiseConv op. + using row_accum_func_t = decltype(&FloatDepthwiseConvAccumRowGeneric); + row_accum_func_t row_accum_func = nullptr; + +#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER) \ + if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \ + (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \ + depth_multiplier == FIXED_DEPTH_MULTIPLIER) \ + { \ + row_accum_func = \ + FloatDepthwiseConvAccumRow; \ + } + +#ifdef USE_NEON + // We go over our list of kernels by decreasing order of preference + // for the cases where multiple kernels could apply. + + // Start with the fastest kernels: AllowStrided=false, fixed input depth. + + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1) + + // Next come the strided kernels: AllowStrided=true, fixed input depth. + // They are a bit less efficient, but allow stride!=1. + + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 2) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 4) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1) + + // Finally, the kernels allowing a variable input depth, + // these are the least efficient but most general kernels. + + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 8) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 16) + +#endif // USE_NEON + +#undef TFMINI_USE_DEPTHWISECONV_KERNEL + + // No matching fast kernel found, use slow fallback. + if (!row_accum_func) + { + row_accum_func = FloatDepthwiseConvAccumRowGeneric; + } + + const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2); + const int input_batch_stride = input_height_stride * input_shape.Dims(1); + const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2); + + // Now that we have determined row_accum_func, we can start work. + int batch_start = 0; + int batch_end = batches; + int row_start = 0; + int row_end = output_height; + int output_ptr_offset = 0; + + switch (thread_dim) + { + case 0: + // Multithread along with the batch axis + assert(thread_start >= 0); + assert(thread_end <= batches); + batch_start = thread_start; + batch_end = thread_end; + output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0); + break; + case 1: + // Multithread along with the row axis + assert(thread_start >= 0); + assert(thread_end <= output_height); + row_start = thread_start; + row_end = thread_end; + output_ptr_offset = row_start * output_width * output_depth; + break; + } + + float *output_ptr = output_data + output_ptr_offset; + int batch_step = (output_height + row_start - row_end) * output_width * output_depth; + + for (int b = batch_start; b < batch_end; ++b) + { + for (int out_y = row_start; out_y < row_end; ++out_y) + { + const int in_y_origin = (out_y * stride_height) - pad_height; + const int filter_y_start = + std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor); + const int filter_y_end = + std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) / + dilation_height_factor); + for (int out_x_buffer_start = 0; out_x_buffer_start < output_width; + out_x_buffer_start += kOutputPixelsInAccBuffer) + { + const int out_x_buffer_end = + std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer); + // We call a 'pixel' a group of activation that share all but the + // 'depth'/'channel' coordinate. num_output_pixels is the number of + // output pixels that we will accumulate in this loop iteration. + const int num_output_pixels = out_x_buffer_end - out_x_buffer_start; + // Initialize our local accumulator with the bias values, so we don't + // have to add them later. + DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, acc_buffer); + // Accumulation loop. Most of the time should be spent in here. + for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) + { + const int in_y = in_y_origin + dilation_height_factor * filter_y; + row_accum_func(stride_width, dilation_width_factor, input_depth, input_width, + input_data + in_y * input_height_stride + b * input_batch_stride, + pad_width, depth_multiplier, filter_width, + filter_data + filter_y * filter_height_stride, out_x_buffer_start, + out_x_buffer_end, output_depth, acc_buffer); + } + // Finished accumulating. Now store to destination. + const int num_output_values = output_depth * num_output_pixels; + int i = 0; +// TODO(benoitjacob) optimized code goes here +#ifdef USE_NEON + // Handle 16 values at a time + for (; i <= num_output_values - 16; i += 16) + { + float32x4_t acc[4]; + for (int k = 0; k < 4; k++) + { + acc[k] = vld1q_f32(acc_buffer + i + 4 * k); + } + for (int k = 0; k < 4; k++) + { + acc[k] = vmaxq_f32(vdupq_n_f32(output_activation_min), + vminq_f32(vdupq_n_f32(output_activation_max), acc[k])); + } + for (int k = 0; k < 4; k++) + { + vst1q_f32(output_ptr + 4 * k, acc[k]); + } + output_ptr += 16; + } + // Handle 4 values at a time + for (; i <= num_output_values - 4; i += 4) + { + float32x4_t acc = vld1q_f32(acc_buffer + i); + + acc = vmaxq_f32(vdupq_n_f32(output_activation_min), + vminq_f32(vdupq_n_f32(output_activation_max), acc)); + + vst1q_f32(output_ptr, acc); + output_ptr += 4; + } +#endif + // Handle leftover values, one by one. This is very slow. + for (; i < num_output_values; i++) + { + float acc = acc_buffer[i]; + acc = std::max(output_activation_min, std::min(output_activation_max, acc)); + + *output_ptr++ = acc; + } + } + } + output_ptr += batch_step; + } +} + +} // nnfw +} // cker +} // optimized + +#endif diff --git a/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h b/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h index d383b12..5ca56fd 100644 --- a/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h +++ b/compute/cker/include/cker/operation/optimized/DepthwiseConvUint8.h @@ -32,6 +32,8 @@ namespace cker { namespace optimized { +namespace depthwise_conv +{ // Implementation of quantized DepthwiseConv @@ -44,8 +46,8 @@ struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -57,7 +59,7 @@ template <> struct QuantizedDepthwiseConvKernel for (int i = 0; i < 2; i++) { filter[i] = - vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), vdupq_n_s16(filter_offset)); + vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), vdupq_n_s16(filter_offset)); } // Handle one output pixel at a time. for (int outp = 0; outp < num_output_pixels; outp++) @@ -80,9 +82,9 @@ template <> struct QuantizedDepthwiseConvKernel for (int i = 0; i < 2; i++) { acc[0].val[i] = - vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i])); + vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i])); acc[1].val[i] = - vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i])); + vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i])); } // Store the accumulators back to acc_buffer for (int i = 0; i < 2; i++) @@ -98,8 +100,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -174,8 +176,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -206,9 +208,9 @@ template <> struct QuantizedDepthwiseConvKernel for (int i = 0; i < 2; i++) { acc[2 * i + 0] = - vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i])); + vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i])); acc[2 * i + 1] = - vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i])); + vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i])); } // Store the accumulators back to acc_buffer for (int i = 0; i < 4; i++) @@ -253,8 +255,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -338,8 +340,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -409,8 +411,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -534,8 +536,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -600,8 +602,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -703,8 +705,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -778,8 +780,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -864,8 +866,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -873,7 +875,7 @@ template <> struct QuantizedDepthwiseConvKernel // We will do that by register-level table-look-up using VTBL instructions. // Here we prepare the registers containing the table-lookup indices. static const uint8_t dup3_indices_array[3][8] = { - {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}}; + {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}}; uint8x8_t dup3_indices[3]; for (int i = 0; i < 3; i++) { @@ -928,9 +930,9 @@ template <> struct QuantizedDepthwiseConvKernel for (int j = 0; j < 3; j++) { acc[0].val[j] = - vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j])); + vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j])); acc[1].val[j] = - vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j])); + vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j])); } // Store the accumulators back to acc_buffer for (int i = 0; i < 2; i++) @@ -944,10 +946,10 @@ template <> struct QuantizedDepthwiseConvKernel // Handle one input channel at a time. for (; ic < input_depth; ic++) { - const uint16_t input_val = *local_input_ptr++ + input_offset; + const int16_t input_val = *local_input_ptr++ + input_offset; for (int i = 0; i < 3; i++) { - const uint16_t filter_val = local_filter_ptr[i] + filter_offset; + const int16_t filter_val = local_filter_ptr[i] + filter_offset; *acc_buffer_ptr++ += static_cast(filter_val) * input_val; } local_filter_ptr += 3; @@ -960,8 +962,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1002,9 +1004,9 @@ template <> struct QuantizedDepthwiseConvKernel for (int j = 0; j < 2; j++) { acc[0].val[j] = - vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j])); + vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j])); acc[1].val[j] = - vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j])); + vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j])); } // Store the accumulators back to acc_buffer. for (int i = 0; i < 2; i++) @@ -1018,10 +1020,10 @@ template <> struct QuantizedDepthwiseConvKernel for (; ic < input_depth; ic++) { // Load the inputs. - const uint16_t input_val = *local_input_ptr++ + input_offset; + const int16_t input_val = *local_input_ptr++ + input_offset; for (int i = 0; i < 2; i++) { - const uint16_t filter_val = local_filter_ptr[i] + filter_offset; + const int16_t filter_val = local_filter_ptr[i] + filter_offset; *acc_buffer_ptr++ += static_cast(filter_val) * input_val; } local_filter_ptr += 2; @@ -1034,8 +1036,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1112,8 +1114,8 @@ template <> struct QuantizedDepthwiseConvKernel // Handle one input channel at a time. for (; ic < input_depth; ic++) { - const uint16_t input_val = *local_input_ptr++ + input_offset; - const uint16_t filter_val = *local_filter_ptr++ + filter_offset; + const int16_t input_val = *local_input_ptr++ + input_offset; + const int16_t filter_val = *local_filter_ptr++ + filter_offset; *acc_buffer_ptr++ += static_cast(filter_val) * input_val; } input_ptr += input_ptr_increment; @@ -1124,8 +1126,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1174,7 +1176,7 @@ template <> struct QuantizedDepthwiseConvKernel { acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(input[i]), vget_low_s16(filter[i])); acc[2 * i + 1] = - vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i])); + vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i])); } // Store the accumulators back to acc_buffer for (int i = 0; i < 4; i++) @@ -1189,8 +1191,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1228,8 +1230,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1253,7 +1255,7 @@ template <> struct QuantizedDepthwiseConvKernel { uint8_t input_u8 = *input_ptr; input_ptr += input_ptr_increment; - uint16_t input = static_cast(input_u8 + input_offset); + int16_t input = static_cast(input_u8) + input_offset; // Load the accumulators from acc_buffer int32x4_t acc[4]; for (int i = 0; i < 4; i++) @@ -1279,8 +1281,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1302,7 +1304,7 @@ template <> struct QuantizedDepthwiseConvKernel { uint8_t input_u8 = *input_ptr; input_ptr += input_ptr_increment; - uint16_t input = static_cast(input_u8 + input_offset); + int16_t input = static_cast(input_u8) + input_offset; // Load the accumulators from acc_buffer int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0); int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1); @@ -1338,8 +1340,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1363,7 +1365,7 @@ template <> struct QuantizedDepthwiseConvKernel { uint8_t input_u8 = *input_ptr; input_ptr += input_ptr_increment; - uint16_t input = static_cast(input_u8 + input_offset); + int16_t input = static_cast(input_u8) + input_offset; // Load the accumulators from acc_buffer int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0); int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1); @@ -1390,21 +1392,21 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; // Load the filters, add filter_offset. const uint8x8_t filter_u8 = vld1_u8(filter_ptr); const int16x8_t filter = - vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8)), vdupq_n_s16(filter_offset)); + vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8)), vdupq_n_s16(filter_offset)); // Handle one output pixel at a time. for (int outp = 0; outp < num_output_pixels; outp++) { uint8_t input_u8 = *input_ptr; input_ptr += input_ptr_increment; - uint16_t input = static_cast(input_u8 + input_offset); + int16_t input = static_cast(input_u8) + input_offset; // Load the accumulators from acc_buffer int32x4_t acc[2]; for (int i = 0; i < 2; i++) @@ -1427,8 +1429,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1455,7 +1457,7 @@ template <> struct QuantizedDepthwiseConvKernel input_u16 = vset_lane_u16((reinterpret_cast(input_ptr))[0], input_u16, 1); input_ptr += input_ptr_increment; const int16x4_t input_s16 = - vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u16(input_u16)))); + vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u16(input_u16)))); const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset)); // Multiply-accumulate. @@ -1490,8 +1492,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1555,8 +1557,8 @@ template <> struct QuantizedDepthwiseConvKernel template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, - const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment, - const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr) + const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment, + const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr) { (void)input_depth; (void)depth_multiplier; @@ -1652,9 +1654,9 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, int input_d else { out_x_loop_start_unclampled = - (pad_width - dilation_factor * filter_x + stride - 1) / stride; + (pad_width - dilation_factor * filter_x + stride - 1) / stride; out_x_loop_end_unclampled = - (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride; + (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride; } } else @@ -1672,8 +1674,8 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, int input_d const uint8_t *input_ptr = input_data + in_x_origin * input_depth; const int num_output_pixels = out_x_loop_end - out_x_loop_start; QuantizedDepthwiseConvKernel::Run( - num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset, - input_ptr_increment, filter_base_ptr, filter_offset, acc_buffer_ptr); + num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset, + input_ptr_increment, filter_base_ptr, filter_offset, acc_buffer_ptr); filter_base_ptr += output_depth; } } @@ -1690,11 +1692,11 @@ inline void QuantizedDepthwiseConvAccumRowGeneric(int stride, int dilation_facto const uint8_t *filter_base_ptr = filter_data; for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int out_x_loop_start = std::max( - out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride); + const int out_x_loop_start = + std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride); const int out_x_loop_end = - std::min(out_x_buffer_end, - (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride); + std::min(out_x_buffer_end, + (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride); int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x; @@ -1813,7 +1815,8 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams ¶ms, const Shape const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, - uint8_t *output_data) + uint8_t *output_data, int thread_start, int thread_end, + int thread_dim) { (void)bias_shape; const int stride_width = params.stride_width; @@ -1852,6 +1855,8 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams ¶ms, const Shape assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize); assert(kAccBufferActualSize <= kAccBufferMaxSize); assert(kOutputPixelsInAccBuffer >= 1); + assert(thread_dim == 0 || thread_dim == 1); + UNUSED_RELEASE(kAccBufferActualSize); // row_accum_func will point to the core accumulation function to be used @@ -1865,7 +1870,7 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams ¶ms, const Shape depth_multiplier == FIXED_DEPTH_MULTIPLIER) \ { \ row_accum_func = \ - QuantizedDepthwiseConvAccumRow; \ + QuantizedDepthwiseConvAccumRow; \ } #ifdef USE_NEON @@ -1919,22 +1924,49 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams ¶ms, const Shape const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2); // Now that we have determined row_accum_func, we can start work. - uint8_t *output_ptr = output_data; - for (int b = 0; b < batches; ++b) + int batch_start = 0; + int batch_end = batches; + int row_start = 0; + int row_end = output_height; + int output_ptr_offset = 0; + + switch (thread_dim) + { + case 0: + // Multithread along with the batch axis + assert(thread_start >= 0); + assert(thread_end <= batches); + batch_start = thread_start; + batch_end = thread_end; + output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0); + break; + case 1: + // Multithread along with the row axis + assert(thread_start >= 0); + assert(thread_end <= output_height); + row_start = thread_start; + row_end = thread_end; + output_ptr_offset = row_start * output_width * output_depth; + break; + } + + uint8_t *output_ptr = output_data + output_ptr_offset; + int batch_step = (output_height + row_start - row_end) * output_width * output_depth; + for (int b = batch_start; b < batch_end; ++b) { - for (int out_y = 0; out_y < output_height; ++out_y) + for (int out_y = row_start; out_y < row_end; ++out_y) { const int in_y_origin = (out_y * stride_height) - pad_height; const int filter_y_start = - std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor); + std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor); const int filter_y_end = - std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) / - dilation_height_factor); + std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) / + dilation_height_factor); for (int out_x_buffer_start = 0; out_x_buffer_start < output_width; out_x_buffer_start += kOutputPixelsInAccBuffer) { const int out_x_buffer_end = - std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer); + std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer); // We call a 'pixel' a group of activation that share all but the // 'depth'/'channel' coordinate. num_output_pixels is the number of // output pixels that we will accumulate in this loop iteration. @@ -1952,7 +1984,7 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams ¶ms, const Shape filter_data + filter_y * filter_height_stride, filter_offset, out_x_buffer_start, out_x_buffer_end, output_depth, acc_buffer); } - // Finished accumulating int32 values. Now need to convert them to + // Finished accumulating int32_t values. Now need to convert them to // the final 8bit form and store them. const int num_output_values = output_depth * num_output_pixels; int i = 0; @@ -2113,9 +2145,111 @@ inline void DepthwiseConvGeneral(const DepthwiseConvParams ¶ms, const Shape } } } + output_ptr += batch_step; } } +} // namespace depthwise_conv + +// template +inline void DepthwiseConvWithRounding(const DepthwiseConvParams ¶ms, const Shape &input_shape, + const uint8_t *input_data, const Shape &filter_shape, + const uint8_t *filter_data, const Shape &bias_shape, + const int32_t *bias_data, const Shape &output_shape, + uint8_t *output_data, int thread_start, int thread_end, + int thread_dim) +{ + const int depth_multiplier = params.depth_multiplier; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + assert(dilation_width_factor >= 1); + assert(dilation_height_factor >= 1); + assert(input_shape.DimensionsCount() == 4); + assert(filter_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + assert(output_activation_min <= output_activation_max); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_depth = input_shape.Dims(3); + assert(output_depth == input_depth * depth_multiplier); + assert(bias_shape.FlatSize() == output_depth); + + UNUSED_RELEASE(depth_multiplier); + UNUSED_RELEASE(output_activation_min); + UNUSED_RELEASE(output_activation_max); + UNUSED_RELEASE(dilation_width_factor); + UNUSED_RELEASE(dilation_height_factor); + UNUSED_RELEASE(output_depth); + UNUSED_RELEASE(input_depth); + +// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on +// Jetson TX-2. This compiler does not support the offsetof() macro. +#if defined(__aarch64__) && !defined(GOOGLE_L4T) +// TODO Use below codes +// // Dispatch to dot-product 3x3 kernels when supported. +// +// ruy::Context *ruy_context = cpu_backend_context->ruy_context(); +// const bool has_dot_product_instructions = +// ruy_context != nullptr && +// (ruy_context->GetRuntimeEnabledPaths() & ruy::Path::kNeonDotprod) != ruy::Path::kNone; +// if (has_dot_product_instructions) +// { +// using optimized_ops::depthwise_conv::DotProduct3x3KernelType; +// DotProduct3x3KernelType kernel_type = +// optimized_ops::depthwise_conv::CategorizeDotProductKernel( +// input_shape, filter_shape, params); +// if (kernel_type != DotProduct3x3KernelType::kNone) +// { +// optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3< +// DepthwiseConvImplementation::kUseNeon3x3DotProduct>(params, input_shape, input_data, +// filter_shape, filter_data, +// bias_shape, +// bias_data, output_shape, +// output_data); +// return; +// } +// } +// +// // Dispatch to non-dot-product 3x3 kernels when supported. +// +// const int stride_width = params.stride_width; +// const int stride_height = params.stride_height; +// const int pad_width = params.padding_values.width; +// const int pad_height = params.padding_values.height; +// const int output_shift = params.output_shift; +// +// // Call kernel optimized for depthwise convolutions using 3x3 filters if +// // parameters are supported. +// if (depthwise_conv::Fast3x3FilterKernelSupported(input_shape, filter_shape, stride_width, +// stride_height, dilation_width_factor, +// dilation_height_factor, pad_width, pad_height, +// depth_multiplier, output_shape, output_shift)) +// { +// depthwise_conv::DepthwiseConv3x3Filter( +// params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, +// output_shape, output_data, thread_start, thread_end, thread_dim); +// return; +// } +#endif + + depthwise_conv::DepthwiseConvGeneral(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + thread_start, thread_end, thread_dim); +} + +inline void DepthwiseConvImpl(const DepthwiseConvParams ¶ms, const Shape &input_shape, + const uint8_t *input_data, const Shape &filter_shape, + const uint8_t *filter_data, const Shape &bias_shape, + const int32_t *bias_data, const Shape &output_shape, + uint8_t *output_data, int thread_start, int thread_end, + int thread_dim) +{ + return DepthwiseConvWithRounding(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, thread_start, + thread_end, thread_dim); +} + } // namespace optimized } // namespace cker } // namespace nnfw diff --git a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h index ae1f9e7..f5edc94 100644 --- a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h +++ b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h @@ -111,7 +111,7 @@ inline void ExtractPatchIntoBufferColumn(const Shape &input_shape, int w, int h, { const int bottom_row_elements = (bottom_padding * kwidth * in_depth); const int bottom_start = - output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth); + output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth); memset(conv_buffer_data + bottom_start, zero_byte, (bottom_row_elements * sizeof(T))); } } @@ -159,7 +159,7 @@ void DilatedIm2col(const ConvParams ¶ms, const Shape &input_shape, const T * for (int batch = 0; batch < batches; ++batch) { const T zero_byte = - zero_bytes_len > 1 ? static_cast(zero_bytes[batch]) : static_cast(zero_bytes[0]); + zero_bytes_len > 1 ? static_cast(zero_bytes[batch]) : static_cast(zero_bytes[0]); for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) diff --git a/compute/cker/include/cker/operation/reference/BatchMatMul.h b/compute/cker/include/cker/operation/reference/BatchMatMul.h index e8ffd40..1b3020d 100644 --- a/compute/cker/include/cker/operation/reference/BatchMatMul.h +++ b/compute/cker/include/cker/operation/reference/BatchMatMul.h @@ -87,9 +87,8 @@ inline void BatchMatMul(const Shape &lhs_shape, const float *lhs_data, const Sha { const float *lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; const float *rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; - float *out_ptr = - output_data + - ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * lhs_rows * rhs_cols; + float *out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * + lhs_rows * rhs_cols; for (int j = 0; j < rhs_cols; ++j) { for (int i = 0; i < lhs_rows; ++i) diff --git a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h index f7e3924..93cb21e 100644 --- a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h +++ b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h @@ -56,17 +56,16 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam ¶ms, const Shap const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape); for (int i = 0; i < size; i++) { - output_data[i] = - ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]), - params.float_activation_min, params.float_activation_max); + output_data[i] = ActivationFunctionWithMinMax( + fn(input1_data[i], input2_data[i]), params.float_activation_min, params.float_activation_max); } } template inline void BroadcastBinaryArithmeticOpSlowQuant8( - const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const T *input1_data, - const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data, - const std::function &fn) + const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const T *input1_data, + const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data, + const std::function &fn) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; @@ -100,10 +99,10 @@ inline void BroadcastBinaryArithmeticOpSlowQuant8( for (int c = 0; c < extended_output_shape.Dims(3); ++c) { output_data[Offset(extended_output_shape, b, y, x, c)] = - ActivationFunctionWithMinMax( - fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)], - input2_data[SubscriptToIndex(desc2, b, y, x, c)]), - params.quantized_activation_min, params.quantized_activation_max); + ActivationFunctionWithMinMax( + fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)], + input2_data[SubscriptToIndex(desc2, b, y, x, c)]), + params.quantized_activation_min, params.quantized_activation_max); } } } @@ -143,9 +142,9 @@ inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam ¶m for (int c = 0; c < extended_output_shape.Dims(3); ++c) { output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax( - fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)], - input2_data[SubscriptToIndex(desc2, b, y, x, c)]), - params.quantized_activation_min, params.quantized_activation_max); + fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)], + input2_data[SubscriptToIndex(desc2, b, y, x, c)]), + params.quantized_activation_min, params.quantized_activation_max); } } } @@ -154,9 +153,9 @@ inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam ¶m template <> inline void BroadcastBinaryArithmeticOpSlow( - const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const float *input1_data, - const Shape &input2_shape, const float *input2_data, const Shape &output_shape, - float *output_data, const std::function &fn) + const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const float *input1_data, + const Shape &input2_shape, const float *input2_data, const Shape &output_shape, + float *output_data, const std::function &fn) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; @@ -171,10 +170,10 @@ inline void BroadcastBinaryArithmeticOpSlow( { for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax( - fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)], - input2_data[SubscriptToIndex(desc2, b, y, x, c)]), - params.float_activation_min, params.float_activation_max); + output_data[Offset(extended_output_shape, b, y, x, c)] = + ActivationFunctionWithMinMax(fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)], + input2_data[SubscriptToIndex(desc2, b, y, x, c)]), + params.float_activation_min, params.float_activation_max); } } } diff --git a/compute/cker/include/cker/operation/reference/Conv.h b/compute/cker/include/cker/operation/reference/Conv.h index 86e8b51..43a5bf2 100644 --- a/compute/cker/include/cker/operation/reference/Conv.h +++ b/compute/cker/include/cker/operation/reference/Conv.h @@ -98,8 +98,8 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const float bias_value = bias_data[out_channel]; } output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - ActivationFunctionWithMinMax(total + bias_value, output_activation_min, - output_activation_max); + ActivationFunctionWithMinMax(total + bias_value, output_activation_min, + output_activation_max); } } } @@ -183,7 +183,7 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const uint8 acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(acc); + static_cast(acc); } } } diff --git a/compute/cker/include/cker/ruy/RuySupport.h b/compute/cker/include/cker/ruy/RuySupport.h index 7b4ff20..62eeaf6 100644 --- a/compute/cker/include/cker/ruy/RuySupport.h +++ b/compute/cker/include/cker/ruy/RuySupport.h @@ -52,7 +52,7 @@ void MakeRuyMatrix(const MatrixParams ¶ms, DataPointer data_ptr, ruy::Matrix *dst, bool use_caching = false) { ruy::Order ruy_order = - params.order == Order::kColMajor ? ruy::Order::kColMajor : ruy::Order::kRowMajor; + params.order == Order::kColMajor ? ruy::Order::kColMajor : ruy::Order::kRowMajor; ruy::MakeSimpleLayout(params.rows, params.cols, ruy_order, dst->mutable_layout()); // Note that ruy::Matrix::data is a ConstCheckingPtr, not a plain pointer. // It does care whether we assign to it a Scalar* or a const Scalar*. diff --git a/compute/ruy/CMakeLists.txt b/compute/ruy/CMakeLists.txt new file mode 100644 index 0000000..d98ee1c --- /dev/null +++ b/compute/ruy/CMakeLists.txt @@ -0,0 +1,11 @@ +nnfw_find_package(Ruy REQUIRED) + +add_library(nnfw_lib_ruy INTERFACE) +target_link_libraries(nnfw_lib_ruy INTERFACE ruy) +target_link_libraries(nnfw_lib_ruy INTERFACE ruy_instrumentation) +target_compile_definitions(nnfw_lib_ruy INTERFACE USE_RUY_GEMV) +if(PROFILE_RUY) + target_link_libraries(nnfw_lib_ruy INTERFACE ruy_profiler) +endif(PROFILE_RUY) + +target_include_directories(nnfw_lib_ruy INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include) diff --git a/compute/ruy/include/ruy/NeonTensorUtils.h b/compute/ruy/include/ruy/NeonTensorUtils.h new file mode 100644 index 0000000..fb8b0a3 --- /dev/null +++ b/compute/ruy/include/ruy/NeonTensorUtils.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_NEON_TENSOR_UTILS_H__ +#define __NNFW_RUY_NEON_TENSOR_UTILS_H__ + +#include "ruy/neon/neon_check.h" + +#ifdef USE_NEON + +#define kFloatWeightsPerNeonLane 4 + +namespace nnfw +{ +namespace ruy +{ + +inline bool NeonIsZeroVector(const float *vector, int v_size) +{ + // If v_size is not divisible by kFloatWeightsPerNeonLane, we cannot + // use the main vectorized loop, and we need to process sequentially. + // postamble_start shows the start index where this should happen. + const int postamble_start = v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + + const float32x4_t zero_x4_float = vmovq_n_f32(0.0f); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) + { + const float32x4_t i_x4_float = vld1q_f32(vector + v); + uint32x4_t cmp_result = vceqq_f32(i_x4_float, zero_x4_float); + if (vgetq_lane_u32(cmp_result, 0) == 0) + return false; + if (vgetq_lane_u32(cmp_result, 1) == 0) + return false; + if (vgetq_lane_u32(cmp_result, 2) == 0) + return false; + if (vgetq_lane_u32(cmp_result, 3) == 0) + return false; + } + + // Postamble loop + for (int v = postamble_start; v < v_size; ++v) + { + if (vector[v] != 0.0) + return false; + } + return true; +} + +} // namespace ruy +} // namespace nnfw + +#endif // USE_NEON + +#endif // __NNFW_RUY_NEON_TENSOR_UTILS_H__ diff --git a/compute/ruy/include/ruy/PortableTensorUtils.h b/compute/ruy/include/ruy/PortableTensorUtils.h new file mode 100644 index 0000000..2d2c36c --- /dev/null +++ b/compute/ruy/include/ruy/PortableTensorUtils.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__ +#define __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__ + +namespace nnfw +{ +namespace ruy +{ + +inline bool PortableIsZeroVector(const float *vector, int v_size) +{ + for (int i = 0; i < v_size; ++i) + { + if (*vector++ != 0.0f) + return false; + } + return true; +} + +} // namespace ruy +} // namespace nnfw + +#endif // __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__ diff --git a/compute/ruy/include/ruy/RuySupport.h b/compute/ruy/include/ruy/RuySupport.h new file mode 100644 index 0000000..7086a96 --- /dev/null +++ b/compute/ruy/include/ruy/RuySupport.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_RUY_SUPPORT_H__ +#define __NNFW_RUY_RUY_SUPPORT_H__ + +#include +#include +#include +#include +#include "Types.h" + +namespace nnfw +{ +namespace ruy +{ +namespace ruy_support +{ + +inline ::ruy::CachePolicy ToRuyCachePolicy(CachePolicy cache_policy) +{ + switch (cache_policy) + { + case CachePolicy::kNeverCache: + return ::ruy::CachePolicy::kNeverCache; + case CachePolicy::kCacheIfLargeSpeedup: + return ::ruy::CachePolicy::kCacheIfLargeSpeedup; + case CachePolicy::kAlwaysCache: + return ::ruy::CachePolicy::kAlwaysCache; + default: + assert(false); + return ::ruy::CachePolicy::kNeverCache; + } +} + +template +void MakeRuyMatrix(const MatrixParams ¶ms, DataPointer data_ptr, + ::ruy::Matrix *dst, bool use_caching = false) +{ + ::ruy::Order ruy_order = + params.order == Order::kColMajor ? ::ruy::Order::kColMajor : ::ruy::Order::kRowMajor; + ::ruy::MakeSimpleLayout(params.rows, params.cols, ruy_order, dst->mutable_layout()); + // Note that ruy::Matrix::data is a ConstCheckingPtr, not a plain pointer. + // It does care whether we assign to it a Scalar* or a const Scalar*. + dst->set_data(data_ptr); + dst->set_zero_point(params.zero_point); + if (use_caching) + { + dst->set_cache_policy(ToRuyCachePolicy(params.cache_policy)); + } +} + +template +void MakeRuyMulParams(const GemmParamsType ¶ms, RuySpecType *ruy_mul_params) +{ + // This validation has already been performed by the Gemm API entry point, + // but it doesn't hurt to test specifically this again here, where it's + // being used. + ValidateGemmParams(params); + + ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint); + ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent); + ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel); + ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel); + ruy_mul_params->set_bias(params.bias); + ruy_mul_params->set_clamp_min(params.clamp_min); + ruy_mul_params->set_clamp_max(params.clamp_max); +} + +} // namespace ruy_support +} // namespace ruy +} // namespace nnfw + +#endif // __NNFW_RUY_RUY_SUPPORT_H__ diff --git a/compute/ruy/include/ruy/Shape.h b/compute/ruy/include/ruy/Shape.h new file mode 100644 index 0000000..981c5b4 --- /dev/null +++ b/compute/ruy/include/ruy/Shape.h @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_SHAPE_H__ +#define __NNFW_RUY_SHAPE_H__ + +#include +#include +#include +#include + +#define UNUSED_RELEASE(a) (void)(a) + +namespace nnfw +{ +namespace ruy +{ + +class Shape +{ +public: + // Shapes with dimensions up to 5 are stored directly in the structure, while + // larger shapes are separately allocated. + static constexpr int kMaxSmallSize = 5; + + Shape &operator=(Shape const &) = delete; + + Shape() : _size(0) {} + + explicit Shape(int dimensions_count) : _size(dimensions_count) + { + if (dimensions_count > kMaxSmallSize) + { + _dims_pointer = new int32_t[dimensions_count]; + } + } + + Shape(int shape_size, int32_t value) : _size(0) + { + Resize(shape_size); + for (int i = 0; i < shape_size; ++i) + { + SetDim(i, value); + } + } + + Shape(int dimensions_count, const int32_t *dims_data) : _size(0) + { + ReplaceWith(dimensions_count, dims_data); + } + + Shape(const std::initializer_list init_list) : _size(0) { BuildFrom(init_list); } + + // Avoid using this constructor. We should be able to delete it when C++17 + // rolls out. + Shape(Shape const &other) : _size(other.DimensionsCount()) + { + if (_size > kMaxSmallSize) + { + _dims_pointer = new int32_t[_size]; + } + std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * _size); + } + + bool operator==(const Shape &comp) const + { + return this->_size == comp._size && + std::memcmp(DimsData(), comp.DimsData(), _size * sizeof(int32_t)) == 0; + } + + ~Shape() + { + if (_size > kMaxSmallSize) + { + delete[] _dims_pointer; + } + } + + inline int32_t DimensionsCount() const { return _size; } + inline int32_t Dims(int i) const + { + assert(i >= 0); + assert(i < _size); + return _size > kMaxSmallSize ? _dims_pointer[i] : _dims[i]; + } + inline void SetDim(int i, int32_t val) + { + assert(i >= 0); + assert(i < _size); + if (_size > kMaxSmallSize) + { + _dims_pointer[i] = val; + } + else + { + _dims[i] = val; + } + } + + inline int32_t *DimsData() { return _size > kMaxSmallSize ? _dims_pointer : _dims; } + inline const int32_t *DimsData() const { return _size > kMaxSmallSize ? _dims_pointer : _dims; } + // The caller must ensure that the shape is no bigger than 4-D. + inline const int32_t *DimsDataUpTo4D() const { return _dims; } + + inline void Resize(int dimensions_count) + { + if (_size > kMaxSmallSize) + { + delete[] _dims_pointer; + } + _size = dimensions_count; + if (dimensions_count > kMaxSmallSize) + { + _dims_pointer = new int32_t[dimensions_count]; + } + } + + inline void ReplaceWith(int dimensions_count, const int32_t *dims_data) + { + Resize(dimensions_count); + int32_t *dst_dims = DimsData(); + std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t)); + } + + inline void ReplaceWith(const Shape &other) + { + ReplaceWith(other.DimensionsCount(), other.DimsData()); + } + + inline void ReplaceWith(Shape &&other) + { + Resize(0); + std::swap(_size, other._size); + if (_size <= kMaxSmallSize) + std::copy(other._dims, other._dims + kMaxSmallSize, _dims); + else + _dims_pointer = other._dims_pointer; + } + + template inline void BuildFrom(const T &src_iterable) + { + const int dimensions_count = std::distance(src_iterable.begin(), src_iterable.end()); + Resize(dimensions_count); + int32_t *data = DimsData(); + for (auto it : src_iterable) + { + *data = it; + ++data; + } + } + + // This will probably be factored out. Old code made substantial use of 4-D + // shapes, and so this function is used to extend smaller shapes. Note that + // (a) as Dims<4>-dependent code is eliminated, the reliance on this should be + // reduced, and (b) some kernels are stricly 4-D, but then the shapes of their + // inputs should already be 4-D, so this function should not be needed. + inline static Shape ExtendedShape(int new_shape_size, const Shape &shape) + { + return Shape(new_shape_size, shape, 1); + } + + inline void BuildFrom(const std::initializer_list init_list) + { + BuildFrom>(init_list); + } + + // Returns the total count of elements, that is the size when flattened into a + // vector. + inline int FlatSize() const + { + int buffer_size = 1; + const int *dims_data = DimsData(); + for (int i = 0; i < _size; i++) + { + const int dim = dims_data[i]; + assert(dim >= 1); + buffer_size *= dim; + } + return buffer_size; + } + + bool operator!=(const Shape &comp) const { return !((*this) == comp); } + +private: + // For use only by ExtendedShape(), written to guarantee (return-value) copy + // elision in C++17. + // This creates a shape padded to the desired size with the specified value. + Shape(int new_shape_size, const Shape &shape, int pad_value) : _size(0) + { + assert(new_shape_size >= shape.DimensionsCount()); + assert(new_shape_size <= kMaxSmallSize); + Resize(new_shape_size); + const int size_increase = new_shape_size - shape.DimensionsCount(); + for (int i = 0; i < size_increase; ++i) + { + SetDim(i, pad_value); + } + std::memcpy(DimsData() + size_increase, shape.DimsData(), + sizeof(int32_t) * shape.DimensionsCount()); + } + + int32_t _size; + union { + int32_t _dims[kMaxSmallSize]; + int32_t *_dims_pointer{nullptr}; + }; +}; + +inline int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2) +{ + UNUSED_RELEASE(shape2); + UNUSED_RELEASE(index2); + assert(shape1.Dims(index1) == shape2.Dims(index2)); + return shape1.Dims(index1); +} + +template +int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2, Args... args) +{ + assert(shape1.Dims(index1) == shape2.Dims(index2)); + UNUSED_RELEASE(shape2); + UNUSED_RELEASE(index2); + return MatchingDim(shape1, index1, args...); +} + +inline Shape GetShape(const std::vector &data) { return Shape(data.size(), data.data()); } + +inline int Offset(const Shape &shape, int i0, int i1, int i2, int i3) +{ + assert(shape.DimensionsCount() == 4); + const int *dims_data = shape.DimsDataUpTo4D(); + assert(i0 >= 0 && i0 < dims_data[0]); + assert(i1 >= 0 && i1 < dims_data[1]); + assert(i2 >= 0 && i2 < dims_data[2]); + assert(i3 >= 0 && i3 < dims_data[3]); + return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3; +} + +inline int Offset(const Shape &shape, int *index) +{ + return Offset(shape, index[0], index[1], index[2], index[3]); +} + +inline int FlatSizeSkipDim(const Shape &shape, int skip_dim) +{ + const int dims_count = shape.DimensionsCount(); + assert(skip_dim >= 0 && skip_dim < dims_count); + const auto *dims_data = shape.DimsData(); + int flat_size = 1; + for (int i = 0; i < dims_count; ++i) + { + flat_size *= (i == skip_dim) ? 1 : dims_data[i]; + } + return flat_size; +} + +// Flat size calculation, checking that dimensions match with one or more other +// arrays. +template inline bool checkMatching(const Shape &shape, Ts... check_shapes) +{ + const Shape check_shapes_array[sizeof...(Ts)] = {std::forward(check_shapes)...}; + for (const auto &check_shape : check_shapes_array) + { + // Check matching of shapes except the case of that two shapes can be scalar + if (shape.DimensionsCount() > 1 || check_shape.DimensionsCount() > 1 || shape.FlatSize() != 1 || + check_shape.FlatSize() != 1) + { + if (shape.DimensionsCount() != check_shape.DimensionsCount()) + { + return false; + } + for (int i = 0; i < shape.DimensionsCount(); ++i) + { + if (shape.Dims(i) != check_shape.Dims(i)) + { + return false; + } + } + } + } + return true; +} + +struct UNUSED_ALL +{ + template UNUSED_ALL(Args const &...) {} +}; +template inline int MatchingFlatSize(const Shape &shape, Ts... check_shapes) +{ + UNUSED_ALL{check_shapes...}; + assert(checkMatching(shape, std::forward(check_shapes)...)); + return shape.FlatSize(); +} + +inline int MatchingFlatSizeSkipDim(const Shape &shape, int skip_dim, const Shape &check_shape_0) +{ + UNUSED_RELEASE(check_shape_0); + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) + { + if (i != skip_dim) + { + assert(shape.Dims(i) == check_shape_0.Dims(i)); + } + } + return FlatSizeSkipDim(shape, skip_dim); +} + +inline int MatchingFlatSizeSkipDim(const Shape &shape, int skip_dim, const Shape &check_shape_0, + const Shape &check_shape_1) +{ + UNUSED_RELEASE(check_shape_0); + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) + { + if (i != skip_dim) + { + assert(shape.Dims(i) == check_shape_0.Dims(i)); + } + } + return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1); +} + +inline int MatchingElementsSize(const Shape &shape, const Shape &check_shape_0, + const Shape &check_shape_1) +{ + const int size_1 = shape.FlatSize(); + const int size_2 = check_shape_0.FlatSize(); + const int size_3 = check_shape_1.FlatSize(); + assert(size_1 == size_2); + assert(size_2 == size_3); + UNUSED_RELEASE(size_2); + UNUSED_RELEASE(size_3); + return size_1; +} + +} // namespace ruy +} // namespace nnfw + +#endif // __NNFW_RUY_SHAPE_H__ diff --git a/compute/ruy/include/ruy/TensorUtils.h b/compute/ruy/include/ruy/TensorUtils.h new file mode 100644 index 0000000..149037c --- /dev/null +++ b/compute/ruy/include/ruy/TensorUtils.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_TENSOR_UTILS_H__ +#define __NNFW_RUY_TENSOR_UTILS_H__ + +#include "ruy/PortableTensorUtils.h" +#include "ruy/NeonTensorUtils.h" + +namespace nnfw +{ +namespace ruy +{ + +inline bool IsZeroVector(const float *vector, int v_size) +{ + return NEON_OR_PORTABLE(IsZeroVector, vector, v_size); +} + +} // namespace ruy +} // namespace nnfw + +#endif // __NNFW_RUY_TENSOR_UTILS_H__ diff --git a/compute/ruy/include/ruy/Types.h b/compute/ruy/include/ruy/Types.h new file mode 100644 index 0000000..b19b597 --- /dev/null +++ b/compute/ruy/include/ruy/Types.h @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_TYPES_H__ +#define __NNFW_RUY_TYPES_H__ + +#include +#include +#include +#include +#include +#include "Shape.h" + +namespace nnfw +{ +namespace ruy +{ + +enum class FusedActivationFunctionType +{ + kNone = 0, + kRelu6 = 1, + kRelu1 = 2, + kRelu = 3, + kTanh = 4, + kSigmoid = 6, +}; + +enum class PaddingType +{ + kNone = 0, + kSame = 1, + kValid = 2, +}; + +struct PaddingValues +{ + int16_t width; + int16_t height; +}; + +struct ConvParams +{ + PaddingType padding_type; + PaddingValues padding_values; + // TODO(starka): This was just "stride", so check that width+height is OK. + int16_t stride_width; + int16_t stride_height; + int16_t dilation_width_factor; + int16_t dilation_height_factor; + // uint8_t inference params. + // TODO(b/65838351): Use smaller types if appropriate. + int32_t input_offset; + int32_t weights_offset; + int32_t output_offset; + int32_t output_multiplier; + int output_shift; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; + // float activation params. + float float_activation_min; + float float_activation_max; + bool is_replaced_weights{false}; +}; + +struct FullyConnectedParams +{ + FusedActivationFunctionType activation{FusedActivationFunctionType::kNone}; + // uint8 inference params. + // TODO(b/65838351): Use smaller types if appropriate. + int32_t input_offset; + int32_t weights_offset; + float weights_scale; + int32_t output_offset; + int32_t output_multiplier; + int output_shift; + // uint8, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; + // float activation params - no one use this params, but ruy might use them later. + float float_activation_min; + float float_activation_max; + // Mark the operands as cacheable if they are unchanging, e.g. weights. + bool lhs_cacheable; + bool rhs_cacheable; + // FullyConnectedWeightsFormat weights_format; +}; + +enum class Order +{ + kColMajor, + kRowMajor +}; + +enum class CachePolicy : std::uint8_t +{ + kNeverCache, + kCacheIfLargeSpeedup, + kAlwaysCache, +}; + +// MatrixParams encapsulates the parameters that Gemm needs about each +// matrix, besides the buffer data pointer. +// Compare to ruy::Matrix, which also encapsulates the data pointer. +// Rationale for leaving the data pointer out of here: doing so +// requires complicated const-correctness mechanics. See +// ruy::ConstCheckingPtr. +template struct MatrixParams +{ + // Storage layout order. For now we only do plain linear non-strided + // layout. It would be easy to support a stride if needed. + Order order = Order::kColMajor; + // Number of rows of the matrix. + int rows = 0; + // Number of columns of the matrix. + int cols = 0; + // The zero_point, i.e. which Scalar value is to be interpreted as zero. + // When Scalar is floating-point, this must be 0. + Scalar zero_point = 0; + // When the data pointed to by this matrix is constant data, so that it is + // valid to assume that equality of pointers implies equality of data, + // a CachePolicy may be used instead of the default kNeverCache, + // which will enable ruy to take advantage of this constancy of the data to + // cache the packing work, which can be a large speedup in matrix*vector + // and other narrow shapes. + CachePolicy cache_policy = CachePolicy::kNeverCache; +}; + +// Enumeration of broad categories of Gemm. +// +// The primary reason for this to exist is to allow Gemm to compile +// only uniform-quantized or only per-channel-quantized code paths. +// This is unneeded with ruy as the back-end, as this is only a runtime +// difference in ruy, but with gemmlowp these really are separate code +// paths and templatizing in a QuantizationFlavor is necessary to avoid +// compiling unused gemmlowp code. Indeed, TFLite currently uses +// uint8 with uniform quantization and int8 with per-channel quantization, +// and does not use uint8 with per-channel. We want to avoid compiling +// the gemmlowp uint8 per-channel path when gemmlowp is the back-end. +// +// It's possible to drop this in the future if gemmlowp goes away and no +// other then-relevant backend library handles quantized paths in a way that +// requires knowing this at compile-time. +enum class QuantizationFlavor +{ + // Floating-point Gemm: the accumulators are not multiplied by any + // 'multiplier'. + kFloatingPoint, + // Quantized Gemm using a single multiplier for all accumulators. + kIntegerWithUniformMultiplier, + // Quantized Gemm using a separate multipliers for accumulators of each + // row of the destination matrix. This is what is called 'per-channel' + // in GemmParams. Here we use the more specific 'per-row' terminology + // to allow for the possibility of 'per-column' in the future, and to + // allow for that to be a separate code path in some back-end such as + // gemmlowp. + kIntegerWithPerRowMultiplier +}; + +// Additional parameters that Gemm needs, beyond what falls into +// the MatrixParams that it takes. Compare to ruy::Spec. +// +// Decoupling AccumScalar from DstScalar (rather than deducing it from that) +// is useful future-proofing. Think of a float16 path using float32 accum. +// +// QuantizationFlavor is passed here even though it's technically not used +// in this class. This is so that we retain the ability in the future to +// specialize this class for quantization flavor, and this allows for +// Gemm to be templatized in quantization_flavor via the GemmParams that it +// takes, allowing for automatic template parameter deduction to take place, +// so that most call sites don't need to specify a QuantizationFlavor +// (only those that need perchannel quantization do). +template ::value + ? QuantizationFlavor::kFloatingPoint + : QuantizationFlavor::kIntegerWithUniformMultiplier> +struct GemmParams +{ + // Only for non-floating-point cases. The fixed-point part (i.e. the mantissa) + // of the multiplier by which accumulators are multiplied before being casted + // to the destination type. + AccumScalar multiplier_fixedpoint = 0; + // Only for non-floating-point cases. The exponent part of the aforementioned + // multiplier. + int multiplier_exponent = 0; + // Per-channel variant of multiplier_fixedpoint. If not nullptr, this must + // point to a buffer of as many values as there are rows in the destination + // matrix. Each row of the destination matrix will use the corresponding + // buffer element instead of multiplier_fixedpoint. + const AccumScalar *multiplier_fixedpoint_perchannel = nullptr; + // Per-channel variant of multiplier_exponent. If not nullptr, this must + // point to a buffer of as many values as there are rows in the destination + // matrix. Each row of the destination matrix will use the corresponding + // buffer element instead of multiplier_exponent. + // + // Either none or both of multiplier_exponent_perchannel and + // multiplier_fixedpoint_perchannel must be nullptr. + const int *multiplier_exponent_perchannel = nullptr; + // The bias vector data, if not null. + const AccumScalar *bias = nullptr; + // min clamp bound of destination values. + DstScalar clamp_min = std::is_floating_point::value + ? -std::numeric_limits::infinity() + : std::numeric_limits::lowest(); + // max clamp bound of destination values. + DstScalar clamp_max = std::is_floating_point::value + ? std::numeric_limits::infinity() + : std::numeric_limits::max(); +}; + +// Validates self-consistency of GemmParams. +template +void ValidateGemmParams(const GemmParams ¶ms) +{ + // Guard consistency of the quantized multiplier fields. + if (quantization_flavor == QuantizationFlavor::kFloatingPoint) + { + assert(!params.multiplier_fixedpoint); + assert(!params.multiplier_exponent); + assert(!params.multiplier_fixedpoint_perchannel); + assert(!params.multiplier_exponent_perchannel); + } + else if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier && + !std::is_same::value) + { + assert(params.multiplier_fixedpoint); + // Nothing to check about multiplier_exponent + assert(!params.multiplier_fixedpoint_perchannel); + assert(!params.multiplier_exponent_perchannel); + } + else if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier && + !std::is_same::value) + { + assert(!params.multiplier_fixedpoint); + assert(!params.multiplier_exponent); + assert(params.multiplier_fixedpoint_perchannel); + assert(params.multiplier_exponent_perchannel); + } + else + { + // For the get raw accumulator case, we should make sure none of the + // quantization params are set. + assert(!params.multiplier_fixedpoint); + assert(!params.multiplier_exponent); + assert(!params.multiplier_fixedpoint_perchannel); + assert(!params.multiplier_exponent_perchannel); + } + UNUSED_RELEASE(params); +} + +inline CachePolicy DefaultCachePolicy(bool is_constant_data) +{ + return is_constant_data ? CachePolicy::kCacheIfLargeSpeedup : CachePolicy::kNeverCache; +} + +} // namespace ruy +} // namespace nnfw + +#endif // __NNFW_RUY_TYPES_H__ diff --git a/compute/ruy/include/ruy/Utils.h b/compute/ruy/include/ruy/Utils.h new file mode 100644 index 0000000..50205ab --- /dev/null +++ b/compute/ruy/include/ruy/Utils.h @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_UTILS_H__ +#define __NNFW_RUY_UTILS_H__ + +#include "Types.h" +#include "Shape.h" + +#include + +namespace nnfw +{ +namespace ruy +{ +template +inline void ExtractPatchIntoBufferColumn(const Shape &input_shape, int w, int h, int b, int kheight, + int kwidth, int stride_width, int stride_height, + int pad_width, int pad_height, int in_width, int in_height, + int in_depth, int single_buffer_length, int buffer_id, + const T *in_data, T *conv_buffer_data, uint8_t zero_byte) +{ + assert(input_shape.DimensionsCount() == 4); + // This chunk of code reshapes all the inputs corresponding to + // output (b, h, w) to a column vector in conv_buffer(:, buffer_id). + const int kwidth_times_indepth = kwidth * in_depth; + const int inwidth_times_indepth = in_width * in_depth; + const int ih_ungated_start = h * stride_height - pad_height; + const int ih_ungated_end = (ih_ungated_start + kheight); + const int ih_end = std::min(ih_ungated_end, in_height); + const int iw_ungated_start = w * stride_width - pad_width; + const int iw_ungated_end = (iw_ungated_start + kwidth); + const int iw_end = std::min(iw_ungated_end, in_width); + // If the patch is off the edge of the input image, skip writing those rows + // and columns from the patch into the output array. + const int h_offset = std::max(0, -ih_ungated_start); + const int w_offset = std::max(0, -iw_ungated_start); + const int ih_start = std::max(0, ih_ungated_start); + const int iw_start = std::max(0, iw_ungated_start); + const int single_row_num = std::min(kwidth - w_offset, in_width - iw_start) * in_depth; + const int output_row_offset = (buffer_id * single_buffer_length); + int out_offset = output_row_offset + (h_offset * kwidth + w_offset) * in_depth; + int in_offset = Offset(input_shape, b, ih_start, iw_start, 0); + + // Express all of the calculations as padding around the input patch. + const int top_padding = h_offset; + const int bottom_padding = (ih_ungated_end - ih_end); + const int left_padding = w_offset; + const int right_padding = (iw_ungated_end - iw_end); + assert(single_row_num == ((kwidth - (left_padding + right_padding)) * in_depth)); + + // Write out zeroes to the elements representing the top rows of the input + // patch that are off the edge of the input image. + if (top_padding > 0) + { + const int top_row_elements = (top_padding * kwidth * in_depth); + memset(conv_buffer_data + output_row_offset, zero_byte, (top_row_elements * sizeof(T))); + } + + // If the patch is on the interior of the input image horizontally, just copy + // over the rows sequentially, otherwise add zero padding at the start or end. + if ((left_padding == 0) && (right_padding == 0)) + { + for (int ih = ih_start; ih < ih_end; ++ih) + { + memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T)); + out_offset += kwidth_times_indepth; + in_offset += inwidth_times_indepth; + } + } + else + { + for (int ih = ih_start; ih < ih_end; ++ih) + { + if (left_padding > 0) + { + const int left_start = (out_offset - (left_padding * in_depth)); + memset(conv_buffer_data + left_start, zero_byte, (left_padding * in_depth * sizeof(T))); + } + memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T)); + if (right_padding > 0) + { + const int right_start = (out_offset + single_row_num); + memset(conv_buffer_data + right_start, zero_byte, (right_padding * in_depth * sizeof(T))); + } + out_offset += kwidth_times_indepth; + in_offset += inwidth_times_indepth; + } + } + + // If the bottom of the patch falls off the input image, pad the values + // representing those input rows with zeroes. + if (bottom_padding > 0) + { + const int bottom_row_elements = (bottom_padding * kwidth * in_depth); + const int bottom_start = + output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth); + memset(conv_buffer_data + bottom_start, zero_byte, (bottom_row_elements * sizeof(T))); + } +} + +// Supports per-batch zero_byte for per-batch asymmetric quantized inputs. +template +void DilatedIm2col(const ConvParams ¶ms, const Shape &input_shape, const T *input_data, + const Shape &filter_shape, const Shape &output_shape, T *im2col_data, + const int32_t *zero_bytes, const int zero_bytes_len) +{ + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + assert(input_shape.DimensionsCount() == 4); + assert(filter_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + + // For dilated convolution, the input pixels are not contiguous therefore we + // can't use the same optimizations as Im2Col(). Though note this code would + // work fine for the non-dilated case too (though likely a bit slower). + assert(dilation_width_factor != 1 || dilation_height_factor != 1); + assert(im2col_data); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + MatchingDim(output_shape, 3, filter_shape, 0); + + // Construct the MxN sized im2col matrix. + // The rows M, are sub-ordered B x H x W + const Shape row_shape({1, batches, output_height, output_width}); + // The columns, N, are sub-ordered Kh x Kw x Din + const Shape col_shape({1, filter_height, filter_width, input_depth}); + // Use dimensions M and N to construct dims for indexing directly into im2col + const Shape im2col_shape({1, 1, row_shape.FlatSize(), col_shape.FlatSize()}); + + // Loop through the output rows (B x H x W) + for (int batch = 0; batch < batches; ++batch) + { + const T zero_byte = + zero_bytes_len > 1 ? static_cast(zero_bytes[batch]) : static_cast(zero_bytes[0]); + for (int out_y = 0; out_y < output_height; ++out_y) + { + for (int out_x = 0; out_x < output_width; ++out_x) + { + // Each im2col row is an output pixel. Arrange the input data in this + // row in an order we can conveniently multiply with the filter data. + int row_offset = Offset(row_shape, 0, batch, out_y, out_x); + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + // Loop through all the pixels of the filter (Kh x Kw) + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + const int in_y = in_y_origin + dilation_height_factor * filter_y; + if ((in_y >= 0) && (in_y < input_height)) + { + // Filter row is within the input data. + // Loop through all the filter pixels in this row. + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0); + T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset); + if ((in_x >= 0) && (in_x < input_width)) + { + // Filter pixel is within the input, copy the input data. + T const *src = input_data + Offset(input_shape, batch, in_y, in_x, 0); + memcpy(dst, src, input_depth * sizeof(T)); + } + else + { + // Filter pixel is outside the input, zero it out. + memset(dst, zero_byte, input_depth * sizeof(T)); + } + } + } + else + { + // Filter row is outside the input, zero out the entire filter row. + int col_offset = Offset(col_shape, 0, filter_y, 0, 0); + T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset); + memset(dst, zero_byte, filter_width * input_depth * sizeof(T)); + } + } + } + } + } +} + +template +void DilatedIm2col(const ConvParams ¶ms, uint8_t zero_byte, const Shape &input_shape, + const T *input_data, const Shape &filter_shape, const Shape &output_shape, + T *im2col_data) +{ + const int32_t zero_point = static_cast(zero_byte); + DilatedIm2col(params, input_shape, input_data, filter_shape, output_shape, im2col_data, + &zero_point, 1); +} + +template +void Im2col(const ConvParams ¶ms, int kheight, int kwidth, uint8_t zero_byte, + const Shape &input_shape, const T *input_data, const Shape &output_shape, + T *output_data) +{ + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + assert(input_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = input_shape.Dims(3); + const int input_width = input_shape.Dims(2); + const int input_height = input_shape.Dims(1); + const int output_depth = output_shape.Dims(3); + const int output_width = output_shape.Dims(2); + const int output_height = output_shape.Dims(1); + + int buffer_id = 0; + // Loop over the output nodes. + for (int b = 0; b < batches; ++b) + { + for (int h = 0; h < output_height; ++h) + { + for (int w = 0; w < output_width; ++w) + { + ExtractPatchIntoBufferColumn(input_shape, w, h, b, kheight, kwidth, stride_width, + stride_height, pad_width, pad_height, input_width, + input_height, input_depth, output_depth, buffer_id, input_data, + output_data, zero_byte); + ++buffer_id; + } + } + } +} + +} // namespace ruy +} // namespace nnfw + +#endif // __NNFW_RUY_UTILS_H__ diff --git a/compute/ruy/include/ruy/neon/neon_check.h b/compute/ruy/include/ruy/neon/neon_check.h new file mode 100644 index 0000000..08394f2 --- /dev/null +++ b/compute/ruy/include/ruy/neon/neon_check.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_NEON_CHECK_H__ +#define __NNFW_RUY_NEON_CHECK_H__ + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#include +#endif + +// Disable X86_NEON +// #if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON +#if 0 +#define USE_NEON +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#pragma GCC diagnostic ignored "-Wattributes" +#pragma GCC diagnostic ignored "-Wnarrowing" +#pragma GCC diagnostic ignored "-Wsequence-point" +#include "NEON_2_SSE.h" +#pragma GCC diagnostic pop +#endif + +// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is +// defined, PortableSomeFunc(args) otherwise. +#ifdef USE_NEON +// Always use Neon code +#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__) + +#else +// No NEON available: Use Portable code +#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__) + +#endif // defined(USE_NEON) + +#endif // __NNFW_RUY_NEON_CHECK_H__ diff --git a/compute/ruy/include/ruy/operation/Conv.h b/compute/ruy/include/ruy/operation/Conv.h new file mode 100644 index 0000000..2b9c8c3 --- /dev/null +++ b/compute/ruy/include/ruy/operation/Conv.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_CONV_H__ +#define __NNFW_RUY_CONV_H__ + +#include "ruy/Types.h" +#include "ruy/Shape.h" +#include "ruy/Utils.h" +#include "ruy/RuySupport.h" + +#include +#include +#include +#include + +namespace nnfw +{ +namespace ruy +{ + +class Conv +{ +public: + Conv() : _im2col_shape(4), _need_im2col(false), _prepared(false) {} + + void prepare(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape, + uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor, + uint32_t dilation_height_factor) + { + if (!_prepared) + { + IsRequiredIm2col(input_shape, kernel_shape, output_shape, stride_width, stride_height, + dilation_width_factor, dilation_height_factor); + _prepared = true; + } + } + + void operator()(const ConvParams ¶ms, const Shape &input_shape, const float *input_data, + const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, + const float *bias_data, const Shape &output_shape, float *output_data, + ::ruy::Context *ruy_context) + { + if (!_prepared) + { + // This means that input or output are dynamic or filter is not constant + IsRequiredIm2col(input_shape, filter_shape, output_shape, params.stride_width, + params.stride_height, params.dilation_width_factor, + params.dilation_height_factor); + _prepared = true; + } + + int im2col_size = _need_im2col ? _im2col_shape.FlatSize() : 0; + + // Use heap if size is larger than 8MB + if (im2col_size > 2 * 1024 * 1024) + { + std::unique_ptr im2col_data = std::make_unique(im2col_size); + ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data, _im2col_shape, im2col_data.get(), ruy_context); + } + else if (im2col_size > 0) + { + float im2col_data[im2col_size]; + ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data, _im2col_shape, im2col_data, ruy_context); + } + else + { + ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data, _im2col_shape, nullptr, ruy_context); + } + } + +private: + void ConvFloat(const ConvParams ¶ms, const Shape &input_shape, const float *input_data, + const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, + const float *bias_data, const Shape &output_shape, float *output_data, + const Shape &im2col_shape, float *im2col_data, ::ruy::Context *ruy_context) + { + UNUSED_RELEASE(bias_shape); + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + assert(input_shape.DimensionsCount() == 4); + assert(filter_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + + // NB: the float 0.0f value is represented by all zero bytes. + const uint8_t float_zero_byte = 0x00; + const float *gemm_input_data = nullptr; + const Shape *gemm_input_shape = nullptr; + const int filter_width = filter_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1; + const bool need_im2col = + stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1; + if (need_dilated_im2col) + { + DilatedIm2col(params, float_zero_byte, input_shape, input_data, filter_shape, output_shape, + im2col_data); + gemm_input_data = im2col_data; + gemm_input_shape = &im2col_shape; + } + else if (need_im2col) + { + assert(im2col_data); + Im2col(params, filter_height, filter_width, float_zero_byte, input_shape, input_data, + im2col_shape, im2col_data); + gemm_input_data = im2col_data; + gemm_input_shape = &im2col_shape; + } + else + { + // TODO(aselle): We need to make sure to not send im2col if it is not + // needed. + assert(!im2col_data); + gemm_input_data = input_data; + gemm_input_shape = &input_shape; + } + + const int gemm_input_dims = gemm_input_shape->DimensionsCount(); + int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1); + int n = output_shape.Dims(3); + int k = gemm_input_shape->Dims(gemm_input_dims - 1); + + // When an optimized CBLAS implementation is not available, fall back + // to using cpu_backend_gemm. + MatrixParams lhs_params; + lhs_params.order = Order::kRowMajor; + lhs_params.rows = n; + lhs_params.cols = k; + MatrixParams rhs_params; + rhs_params.order = Order::kColMajor; + rhs_params.rows = k; + rhs_params.cols = m; + MatrixParams dst_params; + dst_params.order = Order::kColMajor; + dst_params.rows = n; + dst_params.cols = m; + GemmParams gemm_params; + gemm_params.bias = bias_data; + gemm_params.clamp_min = output_activation_min; + gemm_params.clamp_max = output_activation_max; + + // Below code is from tflite::cpu_backend_gemm::detail::GemmImplUsingRuy + ::ruy::Matrix ruy_lhs; + ::ruy::Matrix ruy_rhs; + ::ruy::Matrix ruy_dst; + // Note that cache is always enabled for input and weight tensors + ruy_support::MakeRuyMatrix(lhs_params, filter_data, &ruy_lhs, true); + ruy_support::MakeRuyMatrix(rhs_params, gemm_input_data, &ruy_rhs, true); + ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst); + + ::ruy::BasicSpec ruy_mul_params; + ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params); + + ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst); + } + + void IsRequiredIm2col(const Shape &input_shape, const Shape &kernel_shape, + const Shape &output_shape, uint32_t stride_width, uint32_t stride_height, + uint32_t dilation_width_factor, uint32_t dilation_height_factor) + { + const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1; + const bool need_non_dilated_im2col = stride_width != 1 || stride_height != 1 || + kernel_shape.Dims(1) != 1 || kernel_shape.Dims(2) != 1; + + _need_im2col = need_dilated_im2col || need_non_dilated_im2col; + + if (_need_im2col) + { + _im2col_shape.SetDim(0, output_shape.Dims(0)); + _im2col_shape.SetDim(1, output_shape.Dims(1)); + _im2col_shape.SetDim(2, output_shape.Dims(2)); + _im2col_shape.SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2)); + } + } + +private: + Shape _im2col_shape; + bool _need_im2col; + bool _prepared; +}; +} // namespace ruy +} // namespace nnfw + +#endif // __NNFW_RUY_CONV_H_ diff --git a/compute/ruy/include/ruy/operation/FullyConnected.h b/compute/ruy/include/ruy/operation/FullyConnected.h new file mode 100644 index 0000000..59facdb --- /dev/null +++ b/compute/ruy/include/ruy/operation/FullyConnected.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_RUY_FULLY_CONNECTED_H__ +#define __NNFW_RUY_FULLY_CONNECTED_H__ + +#include "ruy/Shape.h" +#include "ruy/Types.h" +#include "ruy/Utils.h" +#include "ruy/RuySupport.h" + +#include +#include + +namespace nnfw +{ +namespace ruy +{ + +inline void FullyConnected(const FullyConnectedParams ¶ms, const Shape &input_shape, + const float *input_data, const Shape &weights_shape, + const float *weights_data, const Shape &, + const float *optional_bias_data, const Shape &output_shape, + float *output_data, ::ruy::Context *ruy_context) +{ + const int dims_count = weights_shape.DimensionsCount(); + const int input_rows = weights_shape.Dims(dims_count - 1); + MatrixParams rhs_params; + rhs_params.order = Order::kColMajor; + rhs_params.rows = input_rows; + rhs_params.cols = input_shape.FlatSize() / input_rows; + rhs_params.cache_policy = DefaultCachePolicy(params.rhs_cacheable); + assert(input_shape.FlatSize() == (rhs_params.rows * rhs_params.cols)); + MatrixParams lhs_params; + lhs_params.order = Order::kRowMajor; + lhs_params.cols = weights_shape.Dims(dims_count - 1); + lhs_params.rows = FlatSizeSkipDim(weights_shape, dims_count - 1); + lhs_params.cache_policy = DefaultCachePolicy(params.lhs_cacheable); + MatrixParams dst_params; + dst_params.order = Order::kColMajor; + dst_params.rows = output_shape.Dims(output_shape.DimensionsCount() - 1); + dst_params.cols = FlatSizeSkipDim(output_shape, output_shape.DimensionsCount() - 1); + GemmParams gemm_params; + gemm_params.bias = optional_bias_data; + gemm_params.clamp_min = params.float_activation_min; + gemm_params.clamp_max = params.float_activation_max; + + // Below code was copied from tflite::cpu_backend_gemm::detail::GemmImplUsingRuy + ::ruy::Matrix ruy_lhs; + ::ruy::Matrix ruy_rhs; + ::ruy::Matrix ruy_dst; + // Note that cache is always enabled for input and weight tensors + ruy_support::MakeRuyMatrix(lhs_params, weights_data, &ruy_lhs, true); + ruy_support::MakeRuyMatrix(rhs_params, input_data, &ruy_rhs, true); + ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst); + + ::ruy::BasicSpec ruy_mul_params; + ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params); + + ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst); +} + +} // namespace ruy +} // namespace nnfw + +#endif // __NNFW_RUY_FULLY_CONNECTED_H__ diff --git a/compute/test/cker/Range.cc b/compute/test/cker/Range.cc index 55f4fcf..e5fe480 100644 --- a/compute/test/cker/Range.cc +++ b/compute/test/cker/Range.cc @@ -48,9 +48,7 @@ TEST(CKer_Operation, Range) const float start = 3; const float limit = 1; const float delta = -0.5; - std::vector expected = { - 3, 2.5, 2, 1.5, - }; + std::vector expected = {3, 2.5, 2, 1.5}; std::vector actual(expected.size()); nnfw::cker::Range(&start, &limit, &delta, actual.data()); diff --git a/docs/conf.py b/docs/conf.py index 1185bcf..68b7d06 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors' author = 'Samsung Research & contributors' # The full version, including alpha/beta/rc tags -release = '1.11.1' +release = '1.12.0' # -- General configuration --------------------------------------------------- diff --git a/docs/howto/how-to-add-a-new-operation.md b/docs/howto/how-to-add-a-new-operation.md index 8ea7014..241ba6c 100644 --- a/docs/howto/how-to-add-a-new-operation.md +++ b/docs/howto/how-to-add-a-new-operation.md @@ -6,4 +6,4 @@ ## Runtime -- [How to introduce a new operatoin into runtime](how-to-introduce-a-new-operation-into-runtime.md) +- [How to introduce a new operation into runtime](how-to-introduce-a-new-operation-into-runtime.md) diff --git a/docs/howto/how-to-introduce-a-new-operation-into-runtime.md b/docs/howto/how-to-introduce-a-new-operation-into-runtime.md index f8fc020..9ab4987 100644 --- a/docs/howto/how-to-introduce-a-new-operation-into-runtime.md +++ b/docs/howto/how-to-introduce-a-new-operation-into-runtime.md @@ -24,7 +24,6 @@ onert support the operation. - [acl_cl](#acl_cl-1) - [acl_neon](#acl_neon-1) - [cpu](#cpu-1) - - [TensorRegister (in some cases)](#tensorregister-in-some-cases) - [ConstantInitializer (in some cases)](#constantinitializer-in-some-cases) - [cpu](#cpu-2) - [Samples (to be updated)](#samples-to-be-updated) @@ -420,51 +419,28 @@ void visit(const ir::operation::Select &) override; ```cpp void KernelGenerator::visit(const ir::operation::Select &node) { - const auto output_index{node.getOutputs().at(ir::operation::Select::Output::OUTPUT)}; - const auto cond_index{node.getInputs().at(ir::operation::Select::Input::COND)}; - const auto input1_index{node.getInputs().at(ir::operation::Select::Input::INPUT1)}; - const auto input2_index{node.getInputs().at(ir::operation::Select::Input::INPUT2)}; - - const auto output_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor( - _ctx.at(output_index), _current_op_seq_layout); - const auto cond_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor( - _ctx.at(cond_index), _current_op_seq_layout); - const auto input1_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor( - _ctx.at(input1_index), _current_op_seq_layout); - const auto input2_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor( - _ctx.at(input2_index), _current_op_seq_layout); + const auto output_index{node.getOutputs().at(0)}; + const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)}; + const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)}; + const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)}; - auto output_alloc = _tensor_builder->at(output_index).get(); - auto cond_alloc = _tensor_builder->at(cond_index).get(); - auto input1_alloc = _tensor_builder->at(input1_index).get(); - auto input2_alloc = _tensor_builder->at(input2_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto condition_tensor = _tensor_reg->getPortableTensor(condition_index); + auto true_tensor = _tensor_reg->getPortableTensor(true_index); + auto false_tensor = _tensor_reg->getPortableTensor(false_index); - auto fn = std::make_unique<::onert::backend::cpu::kernel::SelectLayer>(); + auto fn = std::make_unique(); - fn->configure(cond_alloc->buffer(), cond_backend_descr, input1_alloc->buffer(), - input1_backend_descr, input2_alloc->buffer(), input2_backend_descr, - output_alloc->buffer(), output_backend_descr); + fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor); - _execution_builder->append(std::move(fn)); + _return_fn = std::move(fn); } ``` -### TensorRegister (in some cases) - -This component registers tensors. Most tensors will be automatically registered internally. There -are some exceptions, however, where additional implementations are required. It is the case when a -tensor is treated unusually in its backend. - -The kernel of some operation has weights in `HWIO` as layout(data format) in case of that input's -layout is `NHWC`. And, for `NCHW`, weights is `OIHW`. But TFLite model has weigths, `OHWI` for -`NHWC` and `OIHW` for `NCHW`. Therefore, to register the appropriate tensor on the backend, you have -to implement it additionally. - ### ConstantInitializer (in some cases) This component registers function initializing constant tensors and initialize constant tensor -layer. This is similar to TensorRegister. Most tensors will be automatically registered internally. -And there are some exceptions. +layer. Most tensors will be automatically registered internally. And there are some exceptions. #### cpu diff --git a/docs/howto/how-to-use-specific-backend.md b/docs/howto/how-to-use-specific-backend.md new file mode 100644 index 0000000..32e1b83 --- /dev/null +++ b/docs/howto/how-to-use-specific-backend.md @@ -0,0 +1,40 @@ +# How to Use Specific Backend during Inference + +ONE runtime has many ways to use specific backend during inference + +## Using NNFW API + +### [nnfw_set_available_backends](https://github.com/Samsung/ONE/blob/c46ddc04abdb58323fbd38389e6927f003bfaea1/runtime/onert/api/include/nnfw.h#L458) +- Multiple backends can be set and they must be separated by a semicolon (ex: "acl_cl;cpu"). +- For each backend string, `libbackend_{backend}.so` will be dynamically loaded during nnfw_prepare. +- Among the multiple backends, the 1st element is used as the default backend. + +### [nnfw_set_op_backend](https://github.com/Samsung/ONE/blob/c46ddc04abdb58323fbd38389e6927f003bfaea1/runtime/onert/api/include/nnfw.h#L476) +- The backend for op has higher priority than available backends specified by nnfw_set_available_backends. + +## Using Environment Variable + +### 1. BACKENDS +- Same as `nnfw_set_available_backends` +- Example +```bash +BACKENDS=cpu ./Product/out/bin/nnpackage_run ... +``` + +### 2. OP_BACKEND_[OP_TYPE] +- Same as `nnfw_set_op_backend` +- Set backend for specific operator type +- Example + - Execute `Conv2D` operator on ruy backend and others on cpu backend +```bash +OP_BACKEND_Conv2D=ruy BACKENDS="cpu;ruy" ./Product/out/bin/nnpackage_run ... +``` + +### 3. OP_BACKEND_MAP +- Set backend for specific operator by its index +- Format : `=;=...` +- Example + - Execute `operator 10` on `acl_cl` backend and others on `acl_neon` backend +```bash +OP_BACKEND_MAP="10=acl_cl" BACKENDS="acl_neon;acl_cl" ./Product/out/bin/nnpackage_run ... +``` diff --git a/docs/howto/index.rst b/docs/howto/index.rst index c84902a..faeedbf 100644 --- a/docs/howto/index.rst +++ b/docs/howto/index.rst @@ -10,19 +10,22 @@ How To :maxdepth: 2 :caption: Contents: - ./how-to-add-a-new-operation.md ./how-to-build-compiler.md ./how-to-build-package.md ./how-to-build-runtime.md ./how-to-build-runtime-tizen-gbs-rpi4.md ./how-to-build-runtime-using-prebuilt-docker-image.md - ./how-to-cross-build-runtime-for-arm.md ./how-to-cross-build-runtime-for-aarch64.md ./how-to-cross-build-runtime-for-android.md - ./how-to-contribute.md - ./how-to-make-an-application-with-runtime.md - ./how-to-remote-debugging-with-visual-studio-code.md + ./how-to-cross-build-runtime-for-arm.md ./how-to-run-package.md + ./how-to-make-an-application-with-runtime.md ./how-to-use-api.md - ./how-to-use-nnfw-api.md ./how-to-use-nnapi-binding.md + ./how-to-use-nnfw-api.md + ./how-to-use-specific-backend.md + ./how-to-contribute.md + ./how-to-remote-debugging-with-visual-studio-code.md + ./how-to-add-a-new-operation.md + ./how-to-introduce-a-new-operation-into-compiler.md + ./how-to-introduce-a-new-operation-into-runtime.md diff --git a/docs/release/1.10/index.rst b/docs/release/1.10/index.rst new file mode 100644 index 0000000..bc415fb --- /dev/null +++ b/docs/release/1.10/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Thu May 14 18:13:12 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.0 +=== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.10.0.md diff --git a/docs/release/1.11/index.rst b/docs/release/1.11/index.rst new file mode 100644 index 0000000..2e4544a --- /dev/null +++ b/docs/release/1.11/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Thu May 14 18:13:12 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.0 +=== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.11.0.md diff --git a/docs/release/1.11/release-note-1.11.1.md b/docs/release/1.11/release-note-1.11.1.md deleted file mode 100644 index 9efedf6..0000000 --- a/docs/release/1.11/release-note-1.11.1.md +++ /dev/null @@ -1,7 +0,0 @@ -# Release Note 1.11.1 - -## ONE Runtime - -### Hot Fixes - -- Fix segfault due to the wrong BCQGather DynamicShapeInferer's behavior diff --git a/docs/release/1.12/index.rst b/docs/release/1.12/index.rst new file mode 100644 index 0000000..68b4c73 --- /dev/null +++ b/docs/release/1.12/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Thu May 14 18:13:12 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.0 +=== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.12.0.md diff --git a/docs/release/1.12/release-note-1.12.0.md b/docs/release/1.12/release-note-1.12.0.md new file mode 100644 index 0000000..1f13bc4 --- /dev/null +++ b/docs/release/1.12/release-note-1.12.0.md @@ -0,0 +1,28 @@ +# Release Note 1.12.0 + +## ONE Compiler + +### Compiler Frontend + +- Add optimization pass: ReplaceMulAddWithDepthwiseConvPass, SubstitutePackToReshape, RemoveRedundantTranspose, ShuffleWeightTo16x1Float32Pass +- Add quantization for InstanceNorm. +- Fix bug of `one-import-bcq` command for `--v1`, `--v2` arguments. +- Fix FuseBCQPass to work with inter-subgraphs in the model file and minor BCQ related optimizations. + +## ONE Runtime + +### Runtime backend operation supports more operations and types + +- CPU backend + - Concat: int8 + - DepthToSpace: float, uint8, int8 + - LeakyRelu: float +- ACL-CL backend + - ArgMin: float, uint8, int8 +- ACL-NEON backend + - ArgMax: int8 + - ArgMin: float, uint8, int8 + +### nnpackage defines configuration file + +- Allow users to set configuration variable via conf file. For more information, See [nnpackage spec](../../../nnpackage/spec) diff --git a/docs/release/1.5/index.rst b/docs/release/1.5/index.rst new file mode 100644 index 0000000..0764bf2 --- /dev/null +++ b/docs/release/1.5/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Thu May 14 18:13:12 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.0 +=== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.5.0.md diff --git a/docs/release/1.6/index.rst b/docs/release/1.6/index.rst new file mode 100644 index 0000000..79389cf --- /dev/null +++ b/docs/release/1.6/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Thu May 14 18:13:12 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.0 +=== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.6.0.md diff --git a/docs/release/1.7/index.rst b/docs/release/1.7/index.rst new file mode 100644 index 0000000..65a839f --- /dev/null +++ b/docs/release/1.7/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Thu May 14 18:13:12 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.0 +=== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.7.0.md diff --git a/docs/release/1.7/release-note-1.7.0.md b/docs/release/1.7/release-note-1.7.0.md new file mode 100644 index 0000000..c1a4f50 --- /dev/null +++ b/docs/release/1.7/release-note-1.7.0.md @@ -0,0 +1,46 @@ +## Feature Highlights + +- **ONE** Compiler + - Compiler supports more operations + - New command line interface for user interface consistancy +- **ONE** Runtime + - Runtime CPU backend supports more operations + - Runtime CPU backend supports more quant8 operations + - API changes + - New optimization + +## ONE Compiler + +### Compiler supports more operations + +- MatrixDiag, MatrixSetDiag, ReverseSequence, ReverseV2, SegmentSum, SelectV2, SparseToDense, Where + +### New command line interface for user interface consistancy + +- one-import: imports conventional model files to circle + - one-import-tf: imports TensorFlow model to circle + - one-import-tflite: imports TensorFlow lite model to circle +- one-optimize: circle optimize command +- one-quantize: circle quantize command + - supports float32 to uint8, layer wise (for Conv series) +- one-pack: package command +- one-prepare-venv: prepares python virtual environment for importing TensorFlow model +- one-codegen: backend(if available) code generator + +## ONE Runtime + +### Runtime CPU backend supports more operations + +- LogSoftmax, SpaceToBatchND + +### Runtime CPU backend supports more quant8 operations + +- Logistic, Mul, Tanh, SpaceToBatchND, Transpose, Sub, Max, Min, Less, Greater, GreaterEqual, LessEqual, Equal, NotEqual + +### API changes + +- Introduce basic asynchronous execution API + +### New optimization + +- Remove dynamic tensor overhead from static models diff --git a/docs/release/1.8/index.rst b/docs/release/1.8/index.rst new file mode 100644 index 0000000..4dc1d5b --- /dev/null +++ b/docs/release/1.8/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Thu May 14 18:13:12 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.0 +=== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.8.0.md diff --git a/docs/release/1.9/index.rst b/docs/release/1.9/index.rst new file mode 100644 index 0000000..d77012c --- /dev/null +++ b/docs/release/1.9/index.rst @@ -0,0 +1,14 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Thu May 14 18:13:12 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.0 +=== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.9.0.md + ./release-note-1.9.1.md diff --git a/docs/release/index.rst b/docs/release/index.rst index bb542bc..1a5a780 100644 --- a/docs/release/index.rst +++ b/docs/release/index.rst @@ -15,3 +15,11 @@ Release ./1.2/index ./1.3/index ./1.4/index + ./1.5/index + ./1.6/index + ./1.7/index + ./1.8/index + ./1.9/index + ./1.10/index + ./1.11/index + ./1.12/index diff --git a/docs/runtime/index.rst b/docs/runtime/index.rst index d44f822..e80dfc8 100644 --- a/docs/runtime/index.rst +++ b/docs/runtime/index.rst @@ -12,8 +12,9 @@ Runtime ./api.md ./core.md - ./compute.md + ./controlflow-operations.md ./executors.md - ./backend-api.md ./heterogeneous-execution.md - ./controlflow-operations.md + ./backend-api.md + ./compute.md + ./supported-operations-backend.md diff --git a/docs/runtime/supported-operations-backend.md b/docs/runtime/supported-operations-backend.md index bcc6355..04ece97 100644 --- a/docs/runtime/supported-operations-backend.md +++ b/docs/runtime/supported-operations-backend.md @@ -1,6 +1,6 @@ # Supported Operations and backend -As of 2020-11-10 +As of 2020-12-07 ### Raw-data format (float32, int32, boolean, etc) @@ -10,7 +10,7 @@ Abs | O | O | O Add | O | O | O AddN | O | | ArgMax | O | O | O -ArgMin | O | | +ArgMin | O | O | O AvgPool2D | O | O | O BatchMatmul | O | | BatchToSpaceND | O | O | O @@ -19,7 +19,7 @@ Concat | O | O | O Conv2D | O | O | O Cos | O | | Custom | O | | -DepthToSpace | | O | O +DepthToSpace | O | O | O DepthwiseConv2D | O | O | O Div | O | O | O EmbeddingLookup | | O | O @@ -37,7 +37,7 @@ If | O | | InstanceNormalize | | O | O L2Normalization | O | O | O L2Pool | | O | O -LeakyRelu | | O | O +LeakyRelu | O | O | O Less | O | O | O LessEqual | O | O | O LocalResponseNormalize | | O | O @@ -89,6 +89,7 @@ SpaceToDepth | O | O | O Split | O | O | O SplitV | O | | Sqrt | O | O | O +Square | O | | | SquaredDifference | O | O | O Squeeze | O | O | O StridedSlice | O | O | O @@ -110,14 +111,14 @@ Operation | CPU | ACL-CL | ACL-NEON -- | -- | -- | -- Add | O | O | O ArgMax | O | O | O -ArgMin | O | | +ArgMin | O | O | O AvgPool2D | O | O | O BatchToSpaceND | O | O | O Cast | O | O | Concat | O | O | O Conv2D | O | O | O Custom | O | | -DepthToSpace | | O | O +DepthToSpace | O | O | O DepthwiseConv2D | O | O | O Dequantize | O | O | O EmbeddingLookup | | O | O @@ -170,6 +171,12 @@ Unpack(Unstack) | | O | O ### Quantization format (int8) +Operation | CPU | ACL-CL | ACL-NEON +-- | -- | -- | -- +ArgMax | O | O | O +ArgMin | O | O | O +Concat | O | | +DepthToSpace | O | | Dequantize | O | | Rank | O | | Shape | O | | diff --git a/infra/cmake/packages/Fp16SourceConfig.cmake b/infra/cmake/packages/Fp16SourceConfig.cmake new file mode 100644 index 0000000..3623fd2 --- /dev/null +++ b/infra/cmake/packages/Fp16SourceConfig.cmake @@ -0,0 +1,21 @@ +function(_Fp16Source_import) + if(NOT ${DOWNLOAD_FP16}) + set(Fp16Source_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT ${DOWNLOAD_FP16}) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + # fp16 commit in xnnpack 8b283aa30a31 + envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/3c54eacb74f6f5e39077300c5564156c424d77ba.tar.gz) + ExternalSource_Download(FP16 + DIRNAME FP16 + URL ${FP16_URL}) + + set(Fp16Source_DIR ${FP16_SOURCE_DIR} PARENT_SCOPE) + set(Fp16Source_FOUND TRUE PARENT_SCOPE) +endfunction(_Fp16Source_import) + +_Fp16Source_import() diff --git a/infra/cmake/packages/FxdivSourceConfig.cmake b/infra/cmake/packages/FxdivSourceConfig.cmake new file mode 100644 index 0000000..4427bf2 --- /dev/null +++ b/infra/cmake/packages/FxdivSourceConfig.cmake @@ -0,0 +1,21 @@ +function(_FxdivSource_import) + if(NOT ${DOWNLOAD_FXDIV}) + set(FxdivSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT ${DOWNLOAD_FXDIV}) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + # fxdiv commit in xnnpack 8b283aa30a31 + envoption(FXDIV_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FXdiv/archive/f8c5354679ec2597792bc70a9e06eff50c508b9a.tar.gz) + ExternalSource_Download(FXDIV + DIRNAME FXDIV + URL ${FXDIV_URL}) + + set(FxdivSource_DIR ${FXDIV_SOURCE_DIR} PARENT_SCOPE) + set(FxdivSource_FOUND TRUE PARENT_SCOPE) +endfunction(_FxdivSource_import) + +_FxdivSource_import() diff --git a/infra/cmake/packages/PsimdSourceConfig.cmake b/infra/cmake/packages/PsimdSourceConfig.cmake new file mode 100644 index 0000000..1da5cdc --- /dev/null +++ b/infra/cmake/packages/PsimdSourceConfig.cmake @@ -0,0 +1,21 @@ +function(_PsimdSource_import) + if(NOT ${DOWNLOAD_PSIMD}) + set(PsimdSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT ${DOWNLOAD_PSIMD}) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + # psimd commit in xnnpack 8b283aa30a31 + envoption(PSIMD_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.tar.gz) + ExternalSource_Download(PSIMD + DIRNAME PSIMD + URL ${PSIMD_URL}) + + set(PsimdSource_DIR ${PSIMD_SOURCE_DIR} PARENT_SCOPE) + set(PsimdSource_FOUND TRUE PARENT_SCOPE) +endfunction(_PsimdSource_import) + +_PsimdSource_import() diff --git a/infra/cmake/packages/PthreadpoolSourceConfig.cmake b/infra/cmake/packages/PthreadpoolSourceConfig.cmake new file mode 100644 index 0000000..4e1910a --- /dev/null +++ b/infra/cmake/packages/PthreadpoolSourceConfig.cmake @@ -0,0 +1,21 @@ +function(_PthreadpoolSource_import) + if(NOT ${DOWNLOAD_PTHREADPOOL}) + set(PthreadpoolSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT ${DOWNLOAD_PTHREADPOOL}) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + # pthreadpool commit in xnnpack 8b283aa30a31 + envoption(PTHREADPOOL_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/pthreadpool/archive/029c88620802e1361ccf41d1970bd5b07fd6b7bb.tar.gz) + ExternalSource_Download(PTHREADPOOL + DIRNAME PTHREADPOOL + URL ${PTHREADPOOL_URL}) + + set(PthreadpoolSource_DIR ${PTHREADPOOL_SOURCE_DIR} PARENT_SCOPE) + set(PthreadpoolSource_FOUND TRUE PARENT_SCOPE) +endfunction(_PthreadpoolSource_import) + +_PthreadpoolSource_import() diff --git a/infra/cmake/packages/XnnpackSourceConfig.cmake b/infra/cmake/packages/XnnpackSourceConfig.cmake new file mode 100644 index 0000000..36a9204 --- /dev/null +++ b/infra/cmake/packages/XnnpackSourceConfig.cmake @@ -0,0 +1,21 @@ +function(_XnnpackSource_import) + if(NOT ${DOWNLOAD_XNNPACK}) + set(XnnpackSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT ${DOWNLOAD_XNNPACK}) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com") + # xnnpack commit in tflite v2.3 + envoption(XNNPACK_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/XNNPACK/archive/8b283aa30a3186c6e640aed520543e9c067132d.tar.gz) + ExternalSource_Download(XNNPACK + DIRNAME XNNPACK + URL ${XNNPACK_URL}) + + set(XnnpackSource_DIR ${XNNPACK_SOURCE_DIR} PARENT_SCOPE) + set(XnnpackSource_FOUND TRUE PARENT_SCOPE) +endfunction(_XnnpackSource_import) + +_XnnpackSource_import() diff --git a/infra/command/format b/infra/command/format index 7f37e06..c57e6dc 100644 --- a/infra/command/format +++ b/infra/command/format @@ -132,8 +132,6 @@ function check_cpp_files() { fi CLANG_FORMAT_CANDIDATES+=("clang-format-3.9") - CLANG_FORMAT_CANDIDATES+=("clang-format") - for CLANG_FORMAT_CANDIDATE in ${CLANG_FORMAT_CANDIDATES[@]}; do if command_exists ${CLANG_FORMAT_CANDIDATE} ; then CLANG_FORMAT="${CLANG_FORMAT_CANDIDATE}" @@ -142,14 +140,29 @@ function check_cpp_files() { done if [[ -z ${CLANG_FORMAT} ]]; then - echo "[ERROR] clang-format is unavailable" + echo "[ERROR] clang-format-3.9 is unavailable" echo - echo "Please install clang-format before running format check" + echo " Please install clang-format-3.9 before running format check" exit 1 fi + # Migration to clang-format-8 + # TODO Remove this after migration to clang-format-8 + CLANG_FORMAT_8="clang-format-8" + if ! command_exists $CLANG_FORMAT_8_CANDIDATE; then + echo "[ERROR] clang-format-8 is unavailable" + echo + echo " Please install clang-format-8 before running format check" + echo " (or use latest docker image if you are using docker for format check)" + exit 1 + fi + for DIR_CLANG_FORMAT_8 in $(git ls-files -co --exclude-standard '*/.clang-format'); do + DIRECTORIES_USE_CLANG_FORMAT_8+=($(dirname "${DIR_CLANG_FORMAT_8}")) + done + # Check c++ files FILES_TO_CHECK_CPP=() + FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8=() for f in ${FILES_TO_CHECK[@]}; do # Manually ignore style checking if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then @@ -158,13 +171,28 @@ function check_cpp_files() { # File extension to check if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then - FILES_TO_CHECK_CPP+=("${f}") + + # Check clang-format-8 target files first + # TODO Remove this after migration to clang-format-8 + FOUND_CLANG_8=0 + for USE_CLANG_FORMAT_8 in ${DIRECTORIES_USE_CLANG_FORMAT_8[@]}; do + if [[ $f = $USE_CLANG_FORMAT_8* ]]; then + FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8+=("$f") + FOUND_CLANG_8=1 + break + fi + done + + if [[ $FOUND_CLANG_8 -ne 1 ]]; then + FILES_TO_CHECK_CPP+=("${f}") + fi fi done # Skip by '.FORMATDENY' file for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do FILES_TO_CHECK_CPP=(${FILES_TO_CHECK_CPP[*]/$s*/}) + FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8=(${FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8[*]/$s*/}) done if [[ ${#FILES_TO_CHECK_CPP} -ne 0 ]]; then @@ -174,6 +202,16 @@ function check_cpp_files() { INVALID_EXIT=${EXIT_CODE} fi fi + + # Check by clang-format-8 + # TODO Remove this after migration to clang-format-8 + if [[ ${#FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8} -ne 0 ]]; then + ${CLANG_FORMAT_8} -i ${FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8[@]} + EXIT_CODE=$? + if [[ ${EXIT_CODE} -ne 0 ]]; then + INVALID_EXIT=${EXIT_CODE} + fi + fi } function check_python_files() { diff --git a/infra/docker/bionic/Dockerfile b/infra/docker/bionic/Dockerfile index 6a5f64a..15a91d7 100644 --- a/infra/docker/bionic/Dockerfile +++ b/infra/docker/bionic/Dockerfile @@ -30,7 +30,9 @@ RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoo RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler # Additonal tools -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget zip unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get -qqy install doxygen graphviz wget zip unzip clang-format-3.9 clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl RUN pip3 install --upgrade pip RUN pip3 install yapf==0.22.0 numpy @@ -49,9 +51,15 @@ RUN update-alternatives --install /usr/bin/aarch64-linux-gnu-gcc aarch64-linux-g --slave /usr/bin/aarch64-linux-gnu-g++ aarch64-linux-gnu-g++ /usr/bin/aarch64-linux-gnu-g++-8 \ --slave /usr/bin/aarch64-linux-gnu-gcov aarch64-linux-gnu-gcov /usr/bin/aarch64-linux-gnu-gcov-8 -# Install lcov 1.13-4 for gcc-8 support (installed lcov 1.13-3 can't support gcc-8) -RUN wget http://launchpadlibrarian.net/370213541/lcov_1.13-4_all.deb -RUN dpkg -i lcov_1.13-4_all.deb +# Install lcov 1.14-2 for gcc-8 support +# Default version lcov 1.13-3 can't support gcc-8 +# lcov 1.13-4 with gcc-8 have bug: reports no coverage for class declaration +WORKDIR /root/lcov +RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/l/lcov/lcov_1.14-2_all.deb +RUN apt-get update && apt-get -qqy install libperlio-gzip-perl libjson-perl +RUN dpkg -i lcov_1.14-2_all.deb +WORKDIR /root +RUN rm -rf /root/lcov # Build and install google test static libraries WORKDIR /root/gtest diff --git a/infra/docker/focal/Dockerfile b/infra/docker/focal/Dockerfile index 7f5a1b9..cccf304 100644 --- a/infra/docker/focal/Dockerfile +++ b/infra/docker/focal/Dockerfile @@ -29,7 +29,9 @@ RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoo RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler # Additonal tools (except clang-format-3.9) -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget zip unzip python3 python3-pip python3-venv hdf5-tools pylint +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive \ + apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl RUN pip3 install --upgrade pip RUN pip3 install yapf==0.22.0 numpy diff --git a/infra/docker/xenial/Dockerfile b/infra/docker/xenial/Dockerfile index 052cc4f..ae3c464 100644 --- a/infra/docker/xenial/Dockerfile +++ b/infra/docker/xenial/Dockerfile @@ -19,7 +19,8 @@ RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoo RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler # Additonal tools -RUN apt-get update && apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint +RUN apt-get update && \ + apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl RUN pip3 install --upgrade pip RUN pip3 install yapf==0.22.0 numpy diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake index 450aa21..f6ad0ca 100644 --- a/infra/nnfw/cmake/CfgOptionFlags.cmake +++ b/infra/nnfw/cmake/CfgOptionFlags.cmake @@ -15,12 +15,7 @@ option(ENABLE_COVERAGE "Build for coverage test" OFF) option(BUILD_EXT_MULTITHREAD "Build external build using multi thread" ON) option(BUILD_ONERT "Build onert" ON) option(BUILD_LOGGING "Build logging runtime" ON) -CMAKE_DEPENDENT_OPTION(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" - # Set BUILD_RUNTIME_NNAPI_TEST as ON - # if CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2 - ON "CMAKE_COMPILER_IS_GNUCC;NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2" - # Otherwise set BUILD_RUNTIME_NNAPI_TEST as OFF - OFF) +option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" ON) option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON) option(BUILD_TFLITE_RUN "Build tflite-run" ON) option(BUILD_TFLITE_VANILLA_RUN "Build tflite-vanilla-run" OFF) @@ -53,7 +48,6 @@ option(BUILD_MLAPSE "Build mlapse benchmark toolkit" OFF) # option(BUILD_KBENCHMARK "Build kernel benchmark tool" OFF) option(BUILD_OPENCL_TOOL "Build OpenCL tool" OFF) -option(BUILD_NNAPI_QUICKCHECK "Build NN API Quickcheck tools" OFF) option(BUILD_TFLITE_ACCURACY "Build tflite accuracy tool" OFF) # # Default external libraries source download and build configuration @@ -78,6 +72,17 @@ option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON) option(BUILD_RUY "Build ruy library from the downloaded source" ON) option(BUILD_CPUINFO "Build cpuinfo library from the downloaded source" ON) option(PROFILE_RUY "Enable ruy library profiling" OFF) +option(DOWNLOAD_XNNPACK "Download xnnpack source" ON) +option(BUILD_XNNPACK "Build xnnpack library from the downloaded source" ON) +option(DOWNLOAD_PTHREADPOOL "Download pthreadpool source" ON) +option(BUILD_PTHREADPOOL "Build pthreadpool library from the source" ON) +option(DOWNLOAD_PSIMD "Download psimd source" ON) +option(BUILD_PSIMD "Build psimd library from the source" ON) +option(DOWNLOAD_FP16 "Download fp16 source" ON) +option(BUILD_FP16 "Build fp16 library from the source" ON) +option(DOWNLOAD_FXDIV "Download fxdiv source" ON) +option(BUILD_FXDIV "Build fxdiv library from the source" ON) + # ## Default sample build configuration diff --git a/infra/nnfw/cmake/options/options_aarch64-android.cmake b/infra/nnfw/cmake/options/options_aarch64-android.cmake index d8eceef..9332f52 100644 --- a/infra/nnfw/cmake/options/options_aarch64-android.cmake +++ b/infra/nnfw/cmake/options/options_aarch64-android.cmake @@ -3,15 +3,10 @@ # NOTE BUILD_ANDROID_TFLITE(JNI lib) is disabled due to BuiltinOpResolver issue. # tensorflow-lite does not build BuiltinOpResolver but JNI lib need it # Related Issue : #1403 -option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" ON) +option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" OFF) option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" ON) option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF) # Need boost library option(DOWNLOAD_BOOST "Download boost source" ON) option(BUILD_BOOST "Build boost source" ON) -option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" OFF) -option(BUILD_NNAPI_TEST "Build nnapi_test" OFF) -option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON) -option(BUILD_TFLITE_RUN "Build tflite-run" ON) -option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" OFF) option(BUILD_LOGGING "Build logging runtime" OFF) diff --git a/infra/nnfw/cmake/options/options_x86_64-darwin.cmake b/infra/nnfw/cmake/options/options_x86_64-darwin.cmake index 97642e6..135cfbf 100644 --- a/infra/nnfw/cmake/options/options_x86_64-darwin.cmake +++ b/infra/nnfw/cmake/options/options_x86_64-darwin.cmake @@ -3,3 +3,4 @@ # option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF) option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF) +option(BUILD_XNNPACK "Build XNNPACK" OFF) diff --git a/infra/nnfw/cmake/options/options_x86_64-linux.cmake b/infra/nnfw/cmake/options/options_x86_64-linux.cmake index 97642e6..135cfbf 100644 --- a/infra/nnfw/cmake/options/options_x86_64-linux.cmake +++ b/infra/nnfw/cmake/options/options_x86_64-linux.cmake @@ -3,3 +3,4 @@ # option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF) option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF) +option(BUILD_XNNPACK "Build XNNPACK" OFF) diff --git a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake index bf8b280..1e83e4e 100644 --- a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake +++ b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake @@ -8,3 +8,5 @@ option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF) option(BUILD_LOGGING "Build logging runtime" OFF) option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF) option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF) + +option(BUILD_XNNPACK "Build XNNPACK" OFF) diff --git a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake index 408cf85..99ee795 100644 --- a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake +++ b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake @@ -20,6 +20,8 @@ function(_CpuInfo_Build) set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "Build cpuinfo micro-benchmarks") add_extdirectory("${CpuInfoSource_DIR}" cpuinfo EXCLUDE_FROM_ALL) set_target_properties(cpuinfo PROPERTIES POSITION_INDEPENDENT_CODE ON) + # Suppress warnings generated by clog + set_target_properties(clog PROPERTIES COMPILE_FLAGS "-Wno-unused-result") set(CpuInfoSource_DIR ${CpuInfoSource_DIR} PARENT_SCOPE) set(CpuInfo_FOUND TRUE PARENT_SCOPE) endfunction(_CpuInfo_Build) diff --git a/infra/nnfw/cmake/packages/Fp16Config.cmake b/infra/nnfw/cmake/packages/Fp16Config.cmake new file mode 100644 index 0000000..6c31613 --- /dev/null +++ b/infra/nnfw/cmake/packages/Fp16Config.cmake @@ -0,0 +1,30 @@ +function(_Fp16_Build) + nnas_find_package(Fp16Source QUIET) + + # NOTE This line prevents multiple definitions of target + if(TARGET fp16) + set(Fp16Source_DIR ${Fp16Source_DIR} PARENT_SCOPE) + set(Fp16_FOUND TRUE PARENT_SCOPE) + return() + endif(TARGET fp16) + + if(NOT Fp16Source_FOUND) + message(STATUS "FP16: Source not found") + set(Fp16_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT Fp16Source_FOUND) + + set(FP16_BUILD_TESTS OFF CACHE BOOL "Build FP16 unit tests") + set(FP16_BUILD_BENCHMARKS OFF CACHE BOOL "Build FP16 micro-benchmarks") + nnas_find_package(PsimdSource) + set(PSIMD_SOURCE_DIR ${PsimdSource_DIR} CACHE STRING "String to disable download PSIMD on fp16") + add_extdirectory("${Fp16Source_DIR}" FP16 EXCLUDE_FROM_ALL) + set(Fp16Source_DIR ${Fp16Source_DIR} PARENT_SCOPE) + set(Fp16_FOUND TRUE PARENT_SCOPE) +endfunction(_Fp16_Build) + +if(BUILD_FP16) + _Fp16_Build() +else() + set(Fp16_FOUND FALSE) +endif() diff --git a/infra/nnfw/cmake/packages/FxdivConfig.cmake b/infra/nnfw/cmake/packages/FxdivConfig.cmake new file mode 100644 index 0000000..6f268ae --- /dev/null +++ b/infra/nnfw/cmake/packages/FxdivConfig.cmake @@ -0,0 +1,29 @@ +function(_Fxdiv_Build) + nnas_find_package(FxdivSource QUIET) + + # NOTE This line prevents multiple definitions of target + if(TARGET fxdiv) + set(FxdivSource_DIR ${FxdivSource_DIR} PARENT_SCOPE) + set(Fxdiv_FOUND TRUE PARENT_SCOPE) + return() + endif(TARGET fxdiv) + + if(NOT FxdivSource_FOUND) + message(STATUS "FXDIV: Source not found") + set(Fxdiv_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT FxdivSource_FOUND) + + set(FXDIV_BUILD_TESTS OFF CACHE BOOL "Build FXdiv unit tests") + set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "Build FXdiv micro-benchmarks") + + add_extdirectory("${FxdivSource_DIR}" FXDIV EXCLUDE_FROM_ALL) + set(FxdivSource_DIR ${FxdivSource_DIR} PARENT_SCOPE) + set(Fxdiv_FOUND TRUE PARENT_SCOPE) +endfunction(_Fxdiv_Build) + +if(BUILD_FXDIV) + _Fxdiv_Build() +else() + set(Fxdiv_FOUND FALSE) +endif() diff --git a/infra/nnfw/cmake/packages/PsimdConfig.cmake b/infra/nnfw/cmake/packages/PsimdConfig.cmake new file mode 100644 index 0000000..a3587b6 --- /dev/null +++ b/infra/nnfw/cmake/packages/PsimdConfig.cmake @@ -0,0 +1,26 @@ +function(_Psimd_Build) + nnas_find_package(PsimdSource QUIET) + + # NOTE This line prevents multiple definitions of target + if(TARGET psimd) + set(PsimdSource_DIR ${PsimdSource_DIR} PARENT_SCOPE) + set(Psimd_FOUND TRUE PARENT_SCOPE) + return() + endif(TARGET psimd) + + if(NOT PsimdSource_FOUND) + message(STATUS "PSIMD: Source not found") + set(Psimd_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT PsimdSource_FOUND) + + add_extdirectory("${PsimdSource_DIR}" PSIMD EXCLUDE_FROM_ALL) + set(PsimdSource_DIR ${PsimdSource_DIR} PARENT_SCOPE) + set(Psimd_FOUND TRUE PARENT_SCOPE) +endfunction(_Psimd_Build) + +if(BUILD_PSIMD) + _Psimd_Build() +else() + set(Psimd_FOUND FALSE) +endif() diff --git a/infra/nnfw/cmake/packages/PthreadpoolConfig.cmake b/infra/nnfw/cmake/packages/PthreadpoolConfig.cmake new file mode 100644 index 0000000..6283826 --- /dev/null +++ b/infra/nnfw/cmake/packages/PthreadpoolConfig.cmake @@ -0,0 +1,35 @@ +function(_Pthreadpool_Build) + nnas_find_package(PthreadpoolSource QUIET) + + # NOTE This line prevents multiple definitions of target + if(TARGET pthreadpool) + set(PthreadpoolSource_DIR ${PthreadpoolSource_DIR} PARENT_SCOPE) + set(Pthreadpool_FOUND TRUE PARENT_SCOPE) + return() + endif(TARGET pthreadpool) + + if(NOT PthreadpoolSource_FOUND) + message(STATUS "PTHREADPOOL: Source not found") + set(Pthreadpool_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT PthreadpoolSource_FOUND) + + SET(PTHREADPOOL_BUILD_TESTS OFF CACHE BOOL "Build pthreadpool unit tests") + SET(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE BOOL "Build pthreadpool micro-benchmarks") + + nnas_find_package(FxdivSource) + set(FXDIV_SOURCE_DIR ${FxdivSource_DIR} CACHE STRING "String to disable download FXDIV") + + add_extdirectory("${PthreadpoolSource_DIR}" PTHREADPOOL EXCLUDE_FROM_ALL) + set_target_properties(pthreadpool PROPERTIES POSITION_INDEPENDENT_CODE ON) + # Suppress warnings generated by pthreadpool + set_target_properties(pthreadpool PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations") + set(PthreadpoolSource_DIR ${PthreadpoolSource_DIR} PARENT_SCOPE) + set(Pthreadpool_FOUND TRUE PARENT_SCOPE) +endfunction(_Pthreadpool_Build) + +if(BUILD_PTHREADPOOL) + _Pthreadpool_Build() +else() + set(Pthreadpool_FOUND FALSE) +endif() diff --git a/infra/nnfw/cmake/packages/XnnpackConfig.cmake b/infra/nnfw/cmake/packages/XnnpackConfig.cmake new file mode 100644 index 0000000..191a28f --- /dev/null +++ b/infra/nnfw/cmake/packages/XnnpackConfig.cmake @@ -0,0 +1,38 @@ +function(_Xnnpack_Build) + nnas_find_package(XnnpackSource QUIET) + nnfw_find_package(Fxdiv QUIET) + nnfw_find_package(CpuInfo QUIET) + nnfw_find_package(Pthreadpool QUIET) + nnfw_find_package(Psimd QUIET) + nnfw_find_package(Fp16 QUIET) + + # NOTE This line prevents multiple definitions of cpuinfo target + if(TARGET XNNPACK) + set(XnnpackSource_DIR ${XnnpackSource_DIR} PARENT_SCOPE) + set(Xnnpack_FOUND TRUE PARENT_SCOPE) + return() + endif(TARGET XNNPACK) + + if(NOT XnnpackSource_FOUND) + message(STATUS "XNNPACK: Source not found") + set(Xnnpack_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT XnnpackSource_FOUND) + + set(XNNPACK_BUILD_TESTS OFF CACHE BOOL "Build XNNPACK unit tests") + set(XNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "Build XNNPACK benchmarks") + set(XNNPACK_USE_SYSTEM_LIBS ON CACHE BOOL "Use system-provided dependency libraries") + + add_extdirectory("${XnnpackSource_DIR}" XNNPACK EXCLUDE_FROM_ALL) + set_target_properties(XNNPACK PROPERTIES POSITION_INDEPENDENT_CODE ON) + # Suppress warnings generated by xnnpack + set_target_properties(XNNPACK PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations") + set(XnnpackSource_DIR ${XnnpackSource_DIR} PARENT_SCOPE) + set(Xnnpack_FOUND TRUE PARENT_SCOPE) +endfunction(_Xnnpack_Build) + +if(BUILD_XNNPACK) + _Xnnpack_Build() +else(BUILD_XNNPACK) + set(Xnnpack_FOUND FALSE) +endif(BUILD_XNNPACK) diff --git a/infra/nnfw/command/build b/infra/nnfw/command/build index b0301d2..4a3601e 100644 --- a/infra/nnfw/command/build +++ b/infra/nnfw/command/build @@ -8,4 +8,4 @@ if [[ ! -d "${BUILD_PATH}" ]]; then fi cd ${BUILD_PATH} -make "$@" +cmake --build . -- "$@" diff --git a/infra/scripts/build_android_runtime_release.sh b/infra/scripts/build_android_runtime_release.sh index fe933c6..c9a3b1b 100755 --- a/infra/scripts/build_android_runtime_release.sh +++ b/infra/scripts/build_android_runtime_release.sh @@ -18,4 +18,5 @@ fi export TARGET_OS=android export CROSS_BUILD=1 -make -f Makefile.template +export BUILD_TYPE=release +make -f Makefile.template install diff --git a/infra/scripts/docker_build_cross_aarch64_runtime.sh b/infra/scripts/docker_build_cross_aarch64_runtime.sh index 011d14c..607526b 100755 --- a/infra/scripts/docker_build_cross_aarch64_runtime.sh +++ b/infra/scripts/docker_build_cross_aarch64_runtime.sh @@ -22,6 +22,8 @@ else fi # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/docker_build_cross_arm_runtime.sh b/infra/scripts/docker_build_cross_arm_runtime.sh index 551fb57..07b5ca4 100755 --- a/infra/scripts/docker_build_cross_arm_runtime.sh +++ b/infra/scripts/docker_build_cross_arm_runtime.sh @@ -22,6 +22,8 @@ else fi # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/docker_build_cross_arm_runtime_release.sh b/infra/scripts/docker_build_cross_arm_runtime_release.sh index 876f318..8d04438 100755 --- a/infra/scripts/docker_build_cross_arm_runtime_release.sh +++ b/infra/scripts/docker_build_cross_arm_runtime_release.sh @@ -22,6 +22,8 @@ else fi # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/docker_build_cross_coverage.sh b/infra/scripts/docker_build_cross_coverage.sh index f42251b..e03ea75 100755 --- a/infra/scripts/docker_build_cross_coverage.sh +++ b/infra/scripts/docker_build_cross_coverage.sh @@ -22,6 +22,8 @@ else fi # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh index 5fd49a4..e65feb5 100755 --- a/infra/scripts/docker_build_nncc.sh +++ b/infra/scripts/docker_build_nncc.sh @@ -35,6 +35,8 @@ if [ -d $ONNXRUNTIME_PREFIX ]; then fi # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh index 16fcf3f..0d2395b 100755 --- a/infra/scripts/docker_build_test_x64.sh +++ b/infra/scripts/docker_build_test_x64.sh @@ -14,6 +14,8 @@ else fi # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/docker_build_tizen_cross.sh b/infra/scripts/docker_build_tizen_cross.sh index ee0f183..9a8378f 100755 --- a/infra/scripts/docker_build_tizen_cross.sh +++ b/infra/scripts/docker_build_tizen_cross.sh @@ -22,6 +22,8 @@ else fi # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh index 55adaa1..ef6212a 100755 --- a/infra/scripts/docker_collect_nnpkg_resources.sh +++ b/infra/scripts/docker_collect_nnpkg_resources.sh @@ -40,6 +40,8 @@ if [ -d $ONNXRUNTIME_PREFIX ]; then fi # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/docker_coverage_report.sh b/infra/scripts/docker_coverage_report.sh index 677462d..f0de1de 100755 --- a/infra/scripts/docker_coverage_report.sh +++ b/infra/scripts/docker_coverage_report.sh @@ -8,6 +8,8 @@ CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT_PATH="$CURRENT_PATH/../../" # docker image name +# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial" +# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic" if [[ -z $DOCKER_IMAGE_NAME ]]; then echo "It will use default docker image name" fi diff --git a/infra/scripts/test_ubuntu_runtime_mixed.sh b/infra/scripts/test_ubuntu_runtime_mixed.sh index 40f59eb..6eab90c 100755 --- a/infra/scripts/test_ubuntu_runtime_mixed.sh +++ b/infra/scripts/test_ubuntu_runtime_mixed.sh @@ -58,5 +58,6 @@ export OP_BACKEND_Conv2D="cpu" export OP_BACKEND_MaxPool2D="acl_cl" export OP_BACKEND_AvgPool2D="acl_neon" export ACL_LAYOUT="NCHW" +export RUY_THREADS=4 NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed" TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed" diff --git a/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST b/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST index 1d96cce..3ed12f9 100644 --- a/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST +++ b/nnpackage/examples/one_op_in_tflite/metadata/MANIFEST @@ -1,7 +1,8 @@ { "major-version" : "1", - "minor-version" : "0", + "minor-version" : "1", "patch-version" : "0", + "configs" : [ "config.cfg" ], "models" : [ "add.tflite" ], "model-types" : [ "tflite" ] } diff --git a/nnpackage/examples/one_op_in_tflite/metadata/config.cfg b/nnpackage/examples/one_op_in_tflite/metadata/config.cfg new file mode 100644 index 0000000..776fa70 --- /dev/null +++ b/nnpackage/examples/one_op_in_tflite/metadata/config.cfg @@ -0,0 +1 @@ +BACKENDS="cpu" diff --git a/nnpackage/spec/10_packaging_and_manifest.md b/nnpackage/spec/10_packaging_and_manifest.md index d4e6ec8..4dc3de8 100644 --- a/nnpackage/spec/10_packaging_and_manifest.md +++ b/nnpackage/spec/10_packaging_and_manifest.md @@ -18,11 +18,13 @@ For `model` and `custom_op`, see [20_model_and_operators.md](20_model_and_operat nnpackage ├── custom_op ├── metadata -│   └── MANIFEST +│   ├── MANIFEST +│   └── config.cfg └── mymodel.model ``` - `mymodel.model` is a model file that has computation graph and weights. +- `config.cfg` is a configuration file that has parameters to configure onert. - `metadata` is a directory that contains all metadata including `MANIFEST`. - `MANIFEST` is a collection of attributes about this package. - `custom_op` is a directory that contains implementation objects. @@ -61,6 +63,11 @@ For detail, see [semantic versioning 2.0.0](https://semver.org/) `patch-version` is the patch version of `nnpackage`. +#### configs + +`configs` is an array of configuration file names placed in `metadata` folder. This can be empty or +attribute itself can be omitted. As of now we only support only one item. + #### models `models` is an array of path to model files, which is relative path from top level directory of this package. @@ -84,9 +91,25 @@ Here is an example of `MANIFEST`. ``` { "major-version" : "1", - "minor-version" : "0", + "minor-version" : "1", "patch-version" : "0", + "configs" : [ "model.cfg" ], "models" : [ "mymodel.model", "yourmodel.model" ], "model-types" : [ "tflite", "circle" ] } ``` + +## 5. Configuration file + +Configuration file is a human readable plain text file having one `key=value` in each line. +- `#` is used as comment and will be ignored afterwards. +- all leading and trailing white spaces will be ignored in both `key` and `value`. + +For example +``` +BACKENDS=cpu +# leading/trailing space is ignored + EXCUTOR=Linear # some comment +``` + +Refer `runtime/onert/core/include/util/Config.lst` file for more information of `key`. diff --git a/packaging/FP16.tar.gz b/packaging/FP16.tar.gz new file mode 100644 index 0000000..ebd2764 Binary files /dev/null and b/packaging/FP16.tar.gz differ diff --git a/packaging/FXDIV.tar.gz b/packaging/FXDIV.tar.gz new file mode 100644 index 0000000..7c1b825 Binary files /dev/null and b/packaging/FXDIV.tar.gz differ diff --git a/packaging/PSIMD.tar.gz b/packaging/PSIMD.tar.gz new file mode 100644 index 0000000..3ae8924 Binary files /dev/null and b/packaging/PSIMD.tar.gz differ diff --git a/packaging/PTHREADPOOL.tar.gz b/packaging/PTHREADPOOL.tar.gz new file mode 100644 index 0000000..6cf42c0 Binary files /dev/null and b/packaging/PTHREADPOOL.tar.gz differ diff --git a/packaging/XNNPACK.tar.gz b/packaging/XNNPACK.tar.gz new file mode 100644 index 0000000..d770c2c Binary files /dev/null and b/packaging/XNNPACK.tar.gz differ diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec index 18150f3..028d88b 100644 --- a/packaging/nnfw.spec +++ b/packaging/nnfw.spec @@ -1,6 +1,6 @@ Name: nnfw Summary: nnfw -Version: 1.11.1 +Version: 1.12.0 Release: 1 Group: Development License: Apache-2.0 and MIT and BSD-2-Clause @@ -13,6 +13,11 @@ Source1003: eigen.tar.gz Source1004: gemmlowp.tar.gz Source1005: ruy.tar.gz Source1006: cpuinfo.tar.gz +Source1007: XNNPACK.tar.gz +Source1008: FXDIV.tar.gz +Source1009: PTHREADPOOL.tar.gz +Source1010: PSIMD.tar.gz +Source1011: FP16.tar.gz Source2001: nnfw.pc.in Source2002: nnfw-plugin.pc.in @@ -116,6 +121,11 @@ tar -xf %{SOURCE1003} -C ./externals tar -xf %{SOURCE1004} -C ./externals tar -xf %{SOURCE1005} -C ./externals tar -xf %{SOURCE1006} -C ./externals +tar -xf %{SOURCE1007} -C ./externals +tar -xf %{SOURCE1008} -C ./externals +tar -xf %{SOURCE1009} -C ./externals +tar -xf %{SOURCE1010} -C ./externals +tar -xf %{SOURCE1011} -C ./externals %build %ifarch arm armv7l aarch64 x86_64 diff --git a/res/CircleRecipes/InstanceNorm_001/test.recipe b/res/CircleRecipes/InstanceNorm_001/test.recipe new file mode 100644 index 0000000..ec647c3 --- /dev/null +++ b/res/CircleRecipes/InstanceNorm_001/test.recipe @@ -0,0 +1,47 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 4 } +} +operand { + name: "gamma" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "explicit" + arg: "0.0123" + arg: "-0.3324" + arg: "0.2324" + arg: "-3.3360" + } +} +operand { + name: "beta" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "explicit" + arg: "0.7023" + arg: "-0.3092" + arg: "0.7552" + arg: "0.2729" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 4 } +} +operation { + type: "InstanceNorm" + input: "ifm" + input: "gamma" + input: "beta" + output: "ofm" + instance_norm_options { + epsilon: 0.001 + activation: NONE + } +} +input: "ifm" +output: "ofm" diff --git a/res/CircleRecipes/InstanceNorm_001/test.reverse b/res/CircleRecipes/InstanceNorm_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.recipe new file mode 100644 index 0000000..bed2563 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm" + type: UINT8 + shape { dim: 1 dim: 8 dim: 8 dim: 4 } + quant { min: -128 max: 127 scale: 1 zero_point: 128 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 2 } + filler { tag: "explicit" arg: "1" arg: "2" } +} +operand { + name: "ofm" + type: UINT8 + shape { dim: 1 dim: 1 dim: 1 dim: 4 } + quant { min: -256 max: 254 scale: 2 zero_point: 128 } + shape_signature { dim: -1 dim: 1 dim: 1 dim: 4 } +} +operation { + type: "Mean" + mean_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/Mean_U8_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.recipe new file mode 100644 index 0000000..a098c62 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.recipe @@ -0,0 +1,29 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 4 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { tag: "explicit" arg: "-1" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 } +} +operation { + type: "Mean" + mean_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/Mean_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.recipe b/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.recipe new file mode 100644 index 0000000..bd1a462 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.recipe @@ -0,0 +1,29 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 4 } + shape_signature { dim: -1 dim: 3 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { tag: "explicit" arg: "1" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 } + shape_signature { dim: -1 dim: 4 } +} +operation { + type: "Mean" + mean_options { + keep_dims: false + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.reverse b/res/TensorFlowLiteRecipes/Mean_dynamic_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.recipe new file mode 100644 index 0000000..e6dee0e --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.recipe @@ -0,0 +1,19 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } + shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } + shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU6" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReLU6_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.recipe new file mode 100644 index 0000000..21c237f --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.recipe @@ -0,0 +1,19 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } + shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } + shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLUN1To1" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReLUN1To1_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.recipe new file mode 100644 index 0000000..fa4293e --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.recipe @@ -0,0 +1,19 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } + shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } + shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU" + input: "ifm" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReLU_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.recipe new file mode 100644 index 0000000..427bd05 --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm" + type: BOOL + shape { dim: 1 dim: 3 dim: 4 } + shape_signature { dim: -1 dim: 3 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "0" arg: "1" arg: "2" + } +} +operand { + name: "ofm" + type: BOOL + shape { } +} +operation { + type: "ReduceAny" + reduce_any_options { + keep_dims: false + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.recipe b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.recipe new file mode 100644 index 0000000..9c3a5e8 --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.recipe @@ -0,0 +1,32 @@ +operand { + name: "ifm" + type: BOOL + shape { dim: 1 dim: 3 dim: 4 } + shape_signature { dim: -1 dim: 3 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { + tag: "explicit" + arg: "1" + } +} +operand { + name: "ofm" + type: BOOL + shape { dim: 1 dim: 4 } + shape_signature { dim: -1 dim: 4 } +} +operation { + type: "ReduceAny" + reduce_any_options { + keep_dims: false + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.reverse b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.recipe b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.recipe new file mode 100644 index 0000000..109a3cb --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm" + type: BOOL + shape { dim: 1 dim: 3 dim: 4 } + shape_signature { dim: -1 dim: 3 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "0" arg: "1" arg: "2" + } +} +operand { + name: "ofm" + type: BOOL + shape { dim: 1 dim: 1 dim: 1 } +} +operation { + type: "ReduceAny" + reduce_any_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.reverse b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_002/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.recipe b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.recipe new file mode 100644 index 0000000..1355f2b --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm" + type: BOOL + shape { dim: 2 dim: 1 dim: 4 } + shape_signature { dim: 2 dim: -1 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { + tag: "explicit" + arg: "1" + } +} +operand { + name: "ofm" + type: BOOL + shape { dim: 2 dim: 1 dim: 4 } +} +operation { + type: "ReduceAny" + reduce_any_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.reverse b/res/TensorFlowLiteRecipes/ReduceAny_dynamic_003/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.recipe new file mode 100644 index 0000000..01669be --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.recipe @@ -0,0 +1,29 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 4 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 } +} +operand { + name: "axis" + type: INT32 + shape { dim: 1 } + filler { tag: "explicit" arg: "-1" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 } +} +operation { + type: "ReduceMax" + reduce_max_options { + keep_dims: true + } + input: "ifm" + input: "axis" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReduceMax_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.recipe new file mode 100644 index 0000000..50603ba --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.recipe @@ -0,0 +1,29 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 4 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 } +} +operand { + name: "axis" + type: INT32 + shape { dim: 1 } + filler { tag: "explicit" arg: "-1" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 } +} +operation { + type: "ReduceMin" + reduce_min_options { + keep_dims: true + } + input: "ifm" + input: "axis" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReduceMin_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.recipe new file mode 100644 index 0000000..e81db67 --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 4 } + shape_signature { dim: -1 dim: 3 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "0" arg: "1" arg: "2" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { } +} +operation { + type: "ReduceProd" + reduce_prod_options { + keep_dims: false + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.recipe b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.recipe new file mode 100644 index 0000000..f2811b3 --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.recipe @@ -0,0 +1,32 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 4 } + shape_signature { dim: -1 dim: 3 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { + tag: "explicit" + arg: "1" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 } + shape_signature { dim: -1 dim: 4 } +} +operation { + type: "ReduceProd" + reduce_prod_options { + keep_dims: false + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.reverse b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_001/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.recipe b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.recipe new file mode 100644 index 0000000..c1e14c5 --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 4 } + shape_signature { dim: -1 dim: 3 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 3 } + filler { + tag: "explicit" + arg: "0" arg: "1" arg: "2" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 1 dim: 1 } +} +operation { + type: "ReduceProd" + reduce_prod_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.reverse b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_002/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.recipe b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.recipe new file mode 100644 index 0000000..4e4633f --- /dev/null +++ b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.recipe @@ -0,0 +1,31 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 2 dim: 1 dim: 4 } + shape_signature { dim: 2 dim: -1 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { + tag: "explicit" + arg: "1" + } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 2 dim: 1 dim: 4 } +} +operation { + type: "ReduceProd" + reduce_prod_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.reverse b/res/TensorFlowLiteRecipes/ReduceProd_dynamic_003/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.recipe b/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.recipe new file mode 100644 index 0000000..99b089e --- /dev/null +++ b/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.recipe @@ -0,0 +1,29 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 4 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { tag: "explicit" arg: "-1" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 8 dim: 8 dim: 1 } + shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 } +} +operation { + type: "Sum" + sum_options { + keep_dims: true + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.reverse b/res/TensorFlowLiteRecipes/Sum_dynamic_000/test.reverse new file mode 100644 index 0000000..e69de29 diff --git a/res/TensorFlowLiteRecipes/Sum_dynamic_001/test.recipe b/res/TensorFlowLiteRecipes/Sum_dynamic_001/test.recipe new file mode 100644 index 0000000..46fac49 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Sum_dynamic_001/test.recipe @@ -0,0 +1,29 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 4 } + shape_signature { dim: -1 dim: 3 dim: 4 } +} +operand { + name: "reduction_indices" + type: INT32 + shape { dim: 1 } + filler { tag: "explicit" arg: "1" } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 4 } + shape_signature { dim: -1 dim: 4 } +} +operation { + type: "Sum" + sum_options { + keep_dims: false + } + input: "ifm" + input: "reduction_indices" + output: "ofm" +} +input: "ifm" +output: "ofm" diff --git a/runtime/contrib/.clang-format b/runtime/contrib/.clang-format new file mode 120000 index 0000000..f761fe4 --- /dev/null +++ b/runtime/contrib/.clang-format @@ -0,0 +1 @@ +../../.clang-format.8 \ No newline at end of file diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h index 69dfcc7..3d71f89 100644 --- a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h +++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h @@ -47,12 +47,11 @@ extern "C" { } \ } while (0) #else // __TIZEN__ -#define LEVEL_TO_STR(level) \ - (((level) == ERROR) \ - ? "ERROR" \ - : ((level) == WARNING) \ - ? "WARNING" \ - : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT") +#define LEVEL_TO_STR(level) \ + (((level) == ERROR) ? "ERROR" \ + : ((level) == WARNING) \ + ? "WARNING" \ + : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT") #define TFLITE_NATIVE_LOG(log_level, format, args...) \ do \ { \ diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h index b099ba9..2fb98cc 100644 --- a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h +++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h @@ -26,7 +26,8 @@ extern "C" { #endif /*__cplusplus*/ -typedef enum { +typedef enum +{ /** 32-bit signed integer. */ INT32 = 1, diff --git a/runtime/contrib/android/api/Prebuilt.mk b/runtime/contrib/android/api/Prebuilt.mk index 7d9f565..c00c7d3 100644 --- a/runtime/contrib/android/api/Prebuilt.mk +++ b/runtime/contrib/android/api/Prebuilt.mk @@ -21,14 +21,6 @@ LOCAL_SRC_FILES := \ $(ONERT_PREBUILT_LIB_DIR)/libtflite_loader.so include $(PREBUILT_SHARED_LIBRARY) -# libtensorflowlite_jni -include $(CLEAR_VARS) -LOCAL_MODULE := tensorflowlite_jni -PREBUILT_LIB += tensorflowlite_jni -LOCAL_SRC_FILES := \ - $(ONERT_PREBUILT_LIB_DIR)/libtensorflowlite_jni.so -include $(PREBUILT_SHARED_LIBRARY) - # libnnfw include $(CLEAR_VARS) LOCAL_MODULE := nnfw-dev diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle index d383b2d..6bb7a56 100644 --- a/runtime/contrib/android/api/build.gradle +++ b/runtime/contrib/android/api/build.gradle @@ -8,7 +8,7 @@ android { minSdkVersion 26 targetSdkVersion 29 versionCode 1 - versionName "1.11.1" + versionName "1.12.0" externalNativeBuild { ndkBuild { diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.cpp b/runtime/contrib/android/api/src/main/native/onert-native-api.cpp index 209264d..72e73be 100644 --- a/runtime/contrib/android/api/src/main/native/onert-native-api.cpp +++ b/runtime/contrib/android/api/src/main/native/onert-native-api.cpp @@ -52,7 +52,7 @@ JNIEXPORT void JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeCloseSe } JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeLoadModelFromFile( - JNIEnv *env, jobject, jlong handle, jstring jnnpkg_path) + JNIEnv *env, jobject, jlong handle, jstring jnnpkg_path) { if (jni_helper::verifyHandle(handle) == JNI_FALSE) return JNI_FALSE; @@ -103,7 +103,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeRun } JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInput( - JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize) + JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize) { if (jni_helper::verifyHandle(handle) == JNI_FALSE) return JNI_FALSE; @@ -129,7 +129,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet } JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutput( - JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize) + JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize) { if (jni_helper::verifyHandle(handle) == JNI_FALSE) return JNI_FALSE; @@ -156,7 +156,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet } JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInputLayout( - JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout) + JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout) { if (jni_helper::verifyHandle(handle) == JNI_FALSE) return JNI_FALSE; @@ -178,7 +178,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet } JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutputLayout( - JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout) + JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout) { if (jni_helper::verifyHandle(handle) == JNI_FALSE) return JNI_FALSE; @@ -234,7 +234,7 @@ JNIEXPORT jint JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutp } JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetAvailableBackends( - JNIEnv *env, jobject, jlong handle, jstring jbackends) + JNIEnv *env, jobject, jlong handle, jstring jbackends) { if (jni_helper::verifyHandle(handle) == JNI_FALSE) return JNI_FALSE; @@ -255,7 +255,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet } JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetInputTensorInfo( - JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo) + JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo) { if (jni_helper::verifyHandle(handle) == JNI_FALSE) return JNI_FALSE; @@ -277,7 +277,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet } JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutputTensorInfo( - JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo) + JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo) { if (jni_helper::verifyHandle(handle) == JNI_FALSE) return JNI_FALSE; diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.h b/runtime/contrib/android/api/src/main/native/onert-native-api.h index 13768d4..7997530 100644 --- a/runtime/contrib/android/api/src/main/native/onert-native-api.h +++ b/runtime/contrib/android/api/src/main/native/onert-native-api.h @@ -46,7 +46,7 @@ JNIEXPORT void JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeCloseSe * Signature: (JLjava/lang/String;)Z */ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeLoadModelFromFile( - JNIEnv *, jobject, jlong, jstring); + JNIEnv *, jobject, jlong, jstring); /* * Class: com_samsung_onert_NativeSessionWrapper @@ -71,7 +71,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeRun * Signature: (JIILjava/nio/ByteBuffer;I)Z */ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInput( - JNIEnv *, jobject, jlong, jint, jint, jobject, jint); + JNIEnv *, jobject, jlong, jint, jint, jobject, jint); /* * Class: com_samsung_onert_NativeSessionWrapper @@ -79,7 +79,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet * Signature: (JIILjava/nio/ByteBuffer;I)Z */ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutput( - JNIEnv *, jobject, jlong, jint, jint, jobject, jint); + JNIEnv *, jobject, jlong, jint, jint, jobject, jint); /* * Class: com_samsung_onert_NativeSessionWrapper @@ -87,7 +87,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet * Signature: (JII)Z */ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInputLayout( - JNIEnv *, jobject, jlong, jint, jint); + JNIEnv *, jobject, jlong, jint, jint); /* * Class: com_samsung_onert_NativeSessionWrapper @@ -95,7 +95,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSet * Signature: (JII)Z */ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutputLayout( - JNIEnv *, jobject, jlong, jint, jint); + JNIEnv *, jobject, jlong, jint, jint); /* * Class: com_samsung_onert_NativeSessionWrapper @@ -121,7 +121,7 @@ JNIEXPORT jint JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutp * Signature: (JILcom/samsung/onert/NativeSessionWrapper/InternalTensorInfo;)Z */ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetInputTensorInfo( - JNIEnv *, jobject, jlong, jint, jobject); + JNIEnv *, jobject, jlong, jint, jobject); /* * Class: com_samsung_onert_NativeSessionWrapper @@ -129,7 +129,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet * Signature: (JILcom/samsung/onert/NativeSessionWrapper/InternalTensorInfo;)Z */ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutputTensorInfo( - JNIEnv *, jobject, jlong, jint, jobject); + JNIEnv *, jobject, jlong, jint, jobject); /* * Class: com_samsung_onert_NativeSessionWrapper @@ -137,7 +137,7 @@ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGet * Signature: (JLjava/lang/String;)Z */ JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetAvailableBackends( - JNIEnv *, jobject, jlong, jstring); + JNIEnv *, jobject, jlong, jstring); #ifdef __cplusplus } diff --git a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp index 4b0e439..8df179a 100644 --- a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp +++ b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp @@ -173,7 +173,7 @@ inline void runBenchmark(JNIEnv *env, jobject thisObj, Activity &act) } JNIEXPORT void JNICALL Java_com_ndk_tflbench_MainActivity_runInterpreterBenchmark( - JNIEnv *env, jobject thisObj, jobject model_buffer) + JNIEnv *env, jobject thisObj, jobject model_buffer) { setTitle(env, thisObj, "Running Interpreter Benchmark"); diff --git a/runtime/contrib/android_tflite/builtin_ops_jni.cc b/runtime/contrib/android_tflite/builtin_ops_jni.cc index 5770701..597f11a 100644 --- a/runtime/contrib/android_tflite/builtin_ops_jni.cc +++ b/runtime/contrib/android_tflite/builtin_ops_jni.cc @@ -24,7 +24,7 @@ namespace tflite std::unique_ptr CreateOpResolver() { return std::unique_ptr<::nnfw::tflite::BuiltinOpResolver>( - new ::nnfw::tflite::BuiltinOpResolver()); + new ::nnfw::tflite::BuiltinOpResolver()); } } // namespace tflite diff --git a/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc b/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc index d9d2700..2affbe0 100644 --- a/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc +++ b/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc @@ -31,8 +31,8 @@ cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void static auto isOriginalFunctionCallSuccessful = [](cl_mem result) -> bool { return result; }; static auto originalFunction = - findFunctionByName( - "clCreateBuffer"); + findFunctionByName( + "clCreateBuffer"); cl_mem result = originalFunction(context, flags, size, host_ptr, errcode_ret); if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive()) { diff --git a/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h index 89797ad..3186c7f 100644 --- a/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h +++ b/runtime/contrib/heap_trace/src/memory_pool_for_symbol_searcher_internals.h @@ -60,7 +60,7 @@ private: { uint8_t *ptr_to_the_free_space_after_allocation = _ptr_to_free_space_start + size; size_t size_of_reserved_space_after_allocation = - ptr_to_the_free_space_after_allocation - _buffer; + ptr_to_the_free_space_after_allocation - _buffer; if (size_of_reserved_space_after_allocation >= MAX_SIZE) { return false; diff --git a/runtime/contrib/heap_trace/src/trace.cc b/runtime/contrib/heap_trace/src/trace.cc index 020aeb9..39a0c46 100644 --- a/runtime/contrib/heap_trace/src/trace.cc +++ b/runtime/contrib/heap_trace/src/trace.cc @@ -72,7 +72,7 @@ void Trace::logAllocationEvent(cl_mem memory_ptr, size_t size_of_allocated_space if (found_memory_space_description == _memory_in_use_on_gpu.end()) { _memory_in_use_on_gpu.insert( - std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes))); + std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes))); _total_allocated_bytes_on_gpu += size_of_allocated_space_in_bytes; if (_peak_heap_usage_on_gpu < _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu) { diff --git a/runtime/contrib/heap_trace/src/trace.h b/runtime/contrib/heap_trace/src/trace.h index 647c51d..33e67e5 100644 --- a/runtime/contrib/heap_trace/src/trace.h +++ b/runtime/contrib/heap_trace/src/trace.h @@ -31,7 +31,7 @@ class Trace size_t size; MemoryTraits(size_t init_counter_value, size_t size_of_allocated_memory) - : ref_counter(init_counter_value), size(size_of_allocated_memory) + : ref_counter(init_counter_value), size(size_of_allocated_memory) { } }; diff --git a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc index 49b8fd9..a5700b2 100644 --- a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc +++ b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc @@ -94,9 +94,9 @@ TEST_F(ClReleaseMemObjectStub, must_log_deallocation_event_only_if_reference_cou clReleaseMemObject(mem); GlobalTrace.reset(); ASSERT_STREQ( - getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(), - "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn " - "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n"); + getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(), + "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn " + "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n"); } TEST_F(ClReleaseMemObjectStub, must_not_log_deallocation_event_if_original_function_failed) diff --git a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc index ea3eb82..182f52c 100644 --- a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc +++ b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc @@ -87,8 +87,8 @@ TEST_F(MallocStub, should_allocate_memory_from_pool_for_symbol_searcher_internal } TEST_F( - MallocStub, - should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero) + MallocStub, + should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero) { void *p = malloc(0); free(p); diff --git a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc index 59660fa..e81c5dc 100644 --- a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc +++ b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc @@ -86,16 +86,16 @@ TEST_F(ReallocStub, should_work_as_malloc_when_incoming_ptr_is_equal_to_nullptr) ASSERT_TRUE(p); ASSERT_STREQ( - getContentOfFile("./realloc_interception_test.log").c_str(), - "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn " - "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n"); + getContentOfFile("./realloc_interception_test.log").c_str(), + "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn " + "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n"); free(p); } TEST_F( - ReallocStub, - should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null) + ReallocStub, + should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null) { void *p = realloc(nullptr, 0); free(p); diff --git a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc index 59fdeed..9ed9331 100644 --- a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc +++ b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc @@ -70,7 +70,7 @@ TEST_F(SymbolSearcher, fs::path pathToTestSample2 = exePath() / "libtest_sample2.so"; void *test_sample2_handle = dlopen(pathToTestSample2.c_str(), RTLD_NOW); void *func_addr_in_test_sample2 = - dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3"); + dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3"); ASSERT_TRUE(test_sample2_handle); ASSERT_TRUE((void *)funcDefinedInTestSample3_ButWrappedInTestSample1 != diff --git a/runtime/contrib/heap_trace/tests/src/trace_test.cc b/runtime/contrib/heap_trace/tests/src/trace_test.cc index 1cf4c53..4f359bb 100644 --- a/runtime/contrib/heap_trace/tests/src/trace_test.cc +++ b/runtime/contrib/heap_trace/tests/src/trace_test.cc @@ -114,15 +114,15 @@ TEST_F(Trace, should_work_correctly_in_multithreaded_environment) GlobalTrace.reset(); string thisShouldBeInLogFile = - "Total allocated: " + - to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + - " B, Total deallocated: " + - to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n"; + "Total allocated: " + + to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + + " B, Total deallocated: " + + to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n"; string andThisToo = - "Total allocated: " + - to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + - " B, Total deallocated: " + - to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n"; + "Total allocated: " + + to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + + " B, Total deallocated: " + + to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n"; ASSERT_TRUE(getContentOfFile("./trace_test.log").find(thisShouldBeInLogFile) != string::npos); ASSERT_TRUE(getContentOfFile("./trace_test.log").find(andThisToo) != string::npos); } diff --git a/runtime/contrib/labs/jniacl/src/jniacl_main.cc b/runtime/contrib/labs/jniacl/src/jniacl_main.cc index 01b9289..1a34aa7 100644 --- a/runtime/contrib/labs/jniacl/src/jniacl_main.cc +++ b/runtime/contrib/labs/jniacl/src/jniacl_main.cc @@ -36,12 +36,13 @@ Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject) TargetHint target_hint = TargetHint::OPENCL; bool autoinc = true; - graph << target_hint << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32), - std::unique_ptr(new InputAccessor(autoinc))) + graph << target_hint + << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32), + std::unique_ptr(new InputAccessor(autoinc))) << arm_compute::graph::ConvolutionLayer( - 3U, 3U, 1U, std::unique_ptr(new WeightAccessor(autoinc)), - std::unique_ptr(new BiasAccessor()), - arm_compute::PadStrideInfo(1, 1, 0, 0)) + 3U, 3U, 1U, std::unique_ptr(new WeightAccessor(autoinc)), + std::unique_ptr(new BiasAccessor()), + arm_compute::PadStrideInfo(1, 1, 0, 0)) << Tensor(std::unique_ptr(new OutputAccessor())); graph.run(); diff --git a/runtime/contrib/labs/opencl_test/src/opencl_test.cc b/runtime/contrib/labs/opencl_test/src/opencl_test.cc index 1faa914..6838183 100644 --- a/runtime/contrib/labs/opencl_test/src/opencl_test.cc +++ b/runtime/contrib/labs/opencl_test/src/opencl_test.cc @@ -199,7 +199,7 @@ void checkContextMem() try { auto kernel_functor = cl::KernelFunctor( - gpu.program_, "memory_test"); // name should be same as cl function name + gpu.program_, "memory_test"); // name should be same as cl function name // create a queue per device and queue a kernel job @@ -256,7 +256,7 @@ void printHelp() std::cout << "opencl information: \n\n"; std::cout << "\t -h : help\n"; std::cout - << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n"; + << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n"; std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n"; } @@ -270,7 +270,7 @@ void printHelp() int kernel_idx[MAX_DEVICE_NUM]; unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed. unsigned char - kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed + kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed int device_num; std::mutex kernel_complete_handler_mutex; @@ -319,7 +319,7 @@ void testSync() try { auto kernel_functor = cl::KernelFunctor( - gpu.program_, "test"); // name should be same as cl function name + gpu.program_, "test"); // name should be same as cl function name // variable init cl::Event ev[MAX_DEVICE_NUM]; diff --git a/runtime/contrib/labs/tflite_examples/src/conv.cpp b/runtime/contrib/labs/tflite_examples/src/conv.cpp index e8542c3..0b5f946 100644 --- a/runtime/contrib/labs/tflite_examples/src/conv.cpp +++ b/runtime/contrib/labs/tflite_examples/src/conv.cpp @@ -217,7 +217,7 @@ int main(int argc, char **argv) // Configure Filter const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W; float kernel_data[kernel_size] = { - 0.0f, + 0.0f, }; // Fill kernel data in NHWC order @@ -243,13 +243,13 @@ int main(int argc, char **argv) } interp.SetTensorParametersReadOnly( - 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */, - quantization, reinterpret_cast(kernel_data), sizeof(kernel_data)); + 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */, + quantization, reinterpret_cast(kernel_data), sizeof(kernel_data)); // Configure Bias const uint32_t bias_size = bias.size(); float bias_data[bias_size] = { - 0.0f, + 0.0f, }; // Fill bias data diff --git a/runtime/contrib/style_transfer_app/src/bitmap_helper.cc b/runtime/contrib/style_transfer_app/src/bitmap_helper.cc index 6211ea4..0f687b2 100644 --- a/runtime/contrib/style_transfer_app/src/bitmap_helper.cc +++ b/runtime/contrib/style_transfer_app/src/bitmap_helper.cc @@ -49,10 +49,10 @@ unsigned char *BitmapHelper::createBitmapFileHeader(int height, int width, int p int fileSize = fileHeaderSize + infoHeaderSize + (bytesPerPixel * width + paddingSize) * height; static unsigned char fileHeader[] = { - 0, 0, /// signature - 0, 0, 0, 0, /// image file size in bytes - 0, 0, 0, 0, /// reserved - 0, 0, 0, 0, /// start of pixel array + 0, 0, /// signature + 0, 0, 0, 0, /// image file size in bytes + 0, 0, 0, 0, /// reserved + 0, 0, 0, 0, /// start of pixel array }; fileHeader[0] = (unsigned char)('B'); @@ -69,17 +69,17 @@ unsigned char *BitmapHelper::createBitmapFileHeader(int height, int width, int p unsigned char *BitmapHelper::createBitmapInfoHeader(int height, int width) { static unsigned char infoHeader[] = { - 0, 0, 0, 0, /// header size - 0, 0, 0, 0, /// image width - 0, 0, 0, 0, /// image height - 0, 0, /// number of color planes - 0, 0, /// bits per pixel - 0, 0, 0, 0, /// compression - 0, 0, 0, 0, /// image size - 0, 0, 0, 0, /// horizontal resolution - 0, 0, 0, 0, /// vertical resolution - 0, 0, 0, 0, /// colors in color table - 0, 0, 0, 0, /// important color count + 0, 0, 0, 0, /// header size + 0, 0, 0, 0, /// image width + 0, 0, 0, 0, /// image height + 0, 0, /// number of color planes + 0, 0, /// bits per pixel + 0, 0, 0, 0, /// compression + 0, 0, 0, 0, /// image size + 0, 0, 0, 0, /// horizontal resolution + 0, 0, 0, 0, /// vertical resolution + 0, 0, 0, 0, /// colors in color table + 0, 0, 0, 0, /// important color count }; // Minus height means top to bottom write @@ -191,7 +191,7 @@ int BitmapHelper::read_bmp(const std::string &input_bmp_name, std::vector // Decode image, allocating tensor once the image size is known const uint8_t *bmp_pixels = &img_bytes[header_size]; std::vector bmp = - decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down); + decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down); for (uint32_t j = 0; j < bmp.size(); j++) { input.push_back(static_cast(bmp[j])); diff --git a/runtime/contrib/style_transfer_app/src/jpeg_helper.cc b/runtime/contrib/style_transfer_app/src/jpeg_helper.cc index ed5ae25..1554524 100644 --- a/runtime/contrib/style_transfer_app/src/jpeg_helper.cc +++ b/runtime/contrib/style_transfer_app/src/jpeg_helper.cc @@ -26,7 +26,7 @@ namespace StyleTransferApp { JpegHelper::JpegHelper(int bytes_per_pixel, J_COLOR_SPACE color_space) - : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space) + : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space) { // DO NOTHING } diff --git a/runtime/contrib/style_transfer_app/src/style_transfer_app.cc b/runtime/contrib/style_transfer_app/src/style_transfer_app.cc index eed0c42..ab8735d 100644 --- a/runtime/contrib/style_transfer_app/src/style_transfer_app.cc +++ b/runtime/contrib/style_transfer_app/src/style_transfer_app.cc @@ -68,10 +68,10 @@ uint64_t num_elems(const nnfw_tensorinfo *ti) NNFW_STATUS resolve_op_backend(nnfw_session *session) { static std::unordered_map operation_map = { - {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"}, - {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"}, - {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"}, - {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}}; + {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"}, + {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"}, + {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"}, + {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}}; for (auto i : operation_map) { diff --git a/runtime/contrib/tflite_classify/src/ImageClassifier.cc b/runtime/contrib/tflite_classify/src/ImageClassifier.cc index fae4f06..1d92d6c 100644 --- a/runtime/contrib/tflite_classify/src/ImageClassifier.cc +++ b/runtime/contrib/tflite_classify/src/ImageClassifier.cc @@ -24,9 +24,9 @@ ImageClassifier::ImageClassifier(const std::string &model_file, const std::strin const int input_size, const int image_mean, const int image_std, const std::string &input_name, const std::string &output_name, const bool use_nnapi) - : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size), - _image_mean(image_mean), _image_std(image_std), _input_name(input_name), - _output_name(output_name) + : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size), + _image_mean(image_mean), _image_std(image_std), _input_name(input_name), + _output_name(output_name) { // Load label std::ifstream label_stream(label_file.c_str()); diff --git a/runtime/contrib/tflite_classify/src/InferenceInterface.cc b/runtime/contrib/tflite_classify/src/InferenceInterface.cc index 1609434..562ff2a 100644 --- a/runtime/contrib/tflite_classify/src/InferenceInterface.cc +++ b/runtime/contrib/tflite_classify/src/InferenceInterface.cc @@ -20,7 +20,7 @@ using namespace tflite; using namespace tflite::ops::builtin; InferenceInterface::InferenceInterface(const std::string &model_file, const bool use_nnapi) - : _interpreter(nullptr), _model(nullptr), _sess(nullptr) + : _interpreter(nullptr), _model(nullptr), _sess(nullptr) { // Load model StderrReporter error_reporter; diff --git a/runtime/contrib/tflite_classify/src/tflite_classify.cc b/runtime/contrib/tflite_classify/src/tflite_classify.cc index 51758e2..7bed778 100644 --- a/runtime/contrib/tflite_classify/src/tflite_classify.cc +++ b/runtime/contrib/tflite_classify/src/tflite_classify.cc @@ -60,9 +60,8 @@ int main(const int argc, char **argv) } // Create ImageClassifier - std::unique_ptr classifier( - new ImageClassifier(MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME, - OUTPUT_NAME, use_nnapi)); + std::unique_ptr classifier(new ImageClassifier( + MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME, OUTPUT_NAME, use_nnapi)); // Cam setting cv::VideoCapture cap(0); diff --git a/runtime/libs/.clang-format b/runtime/libs/.clang-format new file mode 120000 index 0000000..f761fe4 --- /dev/null +++ b/runtime/libs/.clang-format @@ -0,0 +1 @@ +../../.clang-format.8 \ No newline at end of file diff --git a/runtime/libs/benchmark/src/CsvWriter.cpp b/runtime/libs/benchmark/src/CsvWriter.cpp index 5f47c65..6233129 100644 --- a/runtime/libs/benchmark/src/CsvWriter.cpp +++ b/runtime/libs/benchmark/src/CsvWriter.cpp @@ -35,7 +35,7 @@ CsvWriter::CsvWriter(const std::string &csv_filename) : CsvWriter(csv_filename, } CsvWriter::CsvWriter(const std::string &csv_filename, const std::vector &header) - : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0) + : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0) { assert(csv_filename.empty() == false); assert(header.size() != 0); diff --git a/runtime/libs/benchmark/src/MemoryPoller.cpp b/runtime/libs/benchmark/src/MemoryPoller.cpp index 050b5b1..2f3c855 100644 --- a/runtime/libs/benchmark/src/MemoryPoller.cpp +++ b/runtime/libs/benchmark/src/MemoryPoller.cpp @@ -27,7 +27,7 @@ namespace benchmark { MemoryPoller::MemoryPoller(std::chrono::milliseconds duration, bool gpu_poll) - : _duration(duration), _run(false), _term(false), _gpu_poll(gpu_poll) + : _duration(duration), _run(false), _term(false), _gpu_poll(gpu_poll) { if (prepareMemoryPolling() == false) throw std::runtime_error("failed to prepare memory pooling"); diff --git a/runtime/libs/benchmark/src/Result.cpp b/runtime/libs/benchmark/src/Result.cpp index e6cafb9..0356687 100644 --- a/runtime/libs/benchmark/src/Result.cpp +++ b/runtime/libs/benchmark/src/Result.cpp @@ -77,9 +77,9 @@ uint32_t averageMemoryKb(const benchmark::Phase &phase, int type) return average(phase.memory[type]); } -uint32_t peakMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE] - [benchmark::MemoryType::END_OF_MEM_TYPE], - int type) +uint32_t peakMemory( + const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE][benchmark::MemoryType::END_OF_MEM_TYPE], + int type) { using namespace benchmark; // tricky. handle WARMUP as EXECUTE @@ -88,7 +88,7 @@ uint32_t peakMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE] } void printResultTime( - const double time[benchmark::PhaseEnum::END_OF_PHASE][benchmark::FigureType::END_OF_FIG_TYPE]) + const double time[benchmark::PhaseEnum::END_OF_PHASE][benchmark::FigureType::END_OF_FIG_TYPE]) { using namespace benchmark; @@ -119,8 +119,8 @@ void printResultTime( std::cout << "===================================" << std::endl; } -void printResultMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE] - [benchmark::MemoryType::END_OF_MEM_TYPE]) +void printResultMemory( + const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE][benchmark::MemoryType::END_OF_MEM_TYPE]) { using namespace benchmark; diff --git a/runtime/libs/misc/include/misc/feature/Index.h b/runtime/libs/misc/include/misc/feature/Index.h index a361d8d..09d65a5 100644 --- a/runtime/libs/misc/include/misc/feature/Index.h +++ b/runtime/libs/misc/include/misc/feature/Index.h @@ -62,7 +62,7 @@ public: * @param[in] col The width index */ Index(int32_t batch, int32_t ch, int32_t row, int32_t col) - : _batch{batch}, _ch{ch}, _row{row}, _col{col} + : _batch{batch}, _ch{ch}, _row{row}, _col{col} { // DO NOTHING } diff --git a/runtime/libs/misc/include/misc/feature/Shape.h b/runtime/libs/misc/include/misc/feature/Shape.h index 09881f5..2c31b45 100644 --- a/runtime/libs/misc/include/misc/feature/Shape.h +++ b/runtime/libs/misc/include/misc/feature/Shape.h @@ -64,7 +64,7 @@ struct Shape * @param[in] width The width value */ Shape(int32_t batch, int32_t depth, int32_t height, int32_t width) - : N{batch}, C{depth}, H{height}, W{width} + : N{batch}, C{depth}, H{height}, W{width} { // DO NOTHING } diff --git a/runtime/libs/misc/include/misc/kernel/Shape.h b/runtime/libs/misc/include/misc/kernel/Shape.h index 27d6a8b..176db0a 100644 --- a/runtime/libs/misc/include/misc/kernel/Shape.h +++ b/runtime/libs/misc/include/misc/kernel/Shape.h @@ -55,7 +55,7 @@ struct Shape * @param[in] width The width index */ Shape(int32_t count, int32_t depth, int32_t height, int32_t width) - : N{count}, C{depth}, H{height}, W{width} + : N{count}, C{depth}, H{height}, W{width} { // DO NOTHING } diff --git a/runtime/libs/misc/include/misc/tensor/Object.h b/runtime/libs/misc/include/misc/tensor/Object.h index cba4f1b..15ad6da 100644 --- a/runtime/libs/misc/include/misc/tensor/Object.h +++ b/runtime/libs/misc/include/misc/tensor/Object.h @@ -74,9 +74,8 @@ public: _values.resize(_shape.dim(0) * _stride.at(0)); // Set 'value' - iterate(_shape) << [this, &fn](const Index &index) { - _values.at(_stride.offset(index)) = fn(_shape, index); - }; + iterate(_shape) << + [this, &fn](const Index &index) { _values.at(_stride.offset(index)) = fn(_shape, index); }; } } diff --git a/runtime/libs/misc/include/misc/tensor/Zipper.h b/runtime/libs/misc/include/misc/tensor/Zipper.h index 8f0ec4a..b1ca3d0 100644 --- a/runtime/libs/misc/include/misc/tensor/Zipper.h +++ b/runtime/libs/misc/include/misc/tensor/Zipper.h @@ -48,7 +48,7 @@ public: * @param[in] rhs @c Reader object of a tensor */ Zipper(const Shape &shape, const Reader &lhs, const Reader &rhs) - : _shape{shape}, _lhs{lhs}, _rhs{rhs} + : _shape{shape}, _lhs{lhs}, _rhs{rhs} { // DO NOTHING } @@ -63,7 +63,7 @@ public: template void zip(Callable cb) const { iterate(_shape) << - [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); }; + [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); }; } private: diff --git a/runtime/libs/misc/src/tensor/Comparator.cpp b/runtime/libs/misc/src/tensor/Comparator.cpp index 80a18c1..5fcf38c 100644 --- a/runtime/libs/misc/src/tensor/Comparator.cpp +++ b/runtime/libs/misc/src/tensor/Comparator.cpp @@ -33,18 +33,18 @@ std::vector> Comparator::compare(const Shape &shape, const Reader> res; zip(shape, expected, obtained) << - [&](const Index &index, float expected_value, float obtained_value) { - if (!_compare_fn(expected_value, obtained_value)) - { - res.emplace_back(index, expected_value, obtained_value); - } - - // Update max_diff_index, if necessary - if (observer != nullptr) - { - observer->notify(index, expected_value, obtained_value); - } - }; + [&](const Index &index, float expected_value, float obtained_value) { + if (!_compare_fn(expected_value, obtained_value)) + { + res.emplace_back(index, expected_value, obtained_value); + } + + // Update max_diff_index, if necessary + if (observer != nullptr) + { + observer->notify(index, expected_value, obtained_value); + } + }; return res; } diff --git a/runtime/libs/nnapi/include/NeuralNetworksShim.h b/runtime/libs/nnapi/include/NeuralNetworksShim.h index 9cf52aa..2e8ccdb 100644 --- a/runtime/libs/nnapi/include/NeuralNetworksShim.h +++ b/runtime/libs/nnapi/include/NeuralNetworksShim.h @@ -225,8 +225,8 @@ inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int * @return ANEURALNETWORKS_NO_ERROR if successful. */ inline int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams( - ANeuralNetworksModel *model, int32_t index, - const ANeuralNetworksSymmPerChannelQuantParams *channelQuant) + ANeuralNetworksModel *model, int32_t index, + const ANeuralNetworksSymmPerChannelQuantParams *channelQuant) { LOAD_FUNCTION(ANeuralNetworksModel_setOperandSymmPerChannelQuantParams); EXECUTE_FUNCTION_RETURN(model, index, channelQuant); @@ -1218,7 +1218,7 @@ inline int ANeuralNetworksModel_setOperandExtensionData(ANeuralNetworksModel *mo LOAD_FUNCTION(ANeuralNetworksModel_setOperandExtensionData); EXECUTE_FUNCTION_RETURN(model, index, data, length); } - +#if __ANDROID_API__ >= 30 /** * Create a {@link ANeuralNetworksMemoryDesc} with no properties. * @@ -1548,7 +1548,7 @@ inline int ANeuralNetworksMemory_copy(const ANeuralNetworksMemory *src, LOAD_FUNCTION(ANeuralNetworksMemory_copy); EXECUTE_FUNCTION_RETURN(src, dst); } - +#endif // __ANDROID_API__ >= 30 /**/ #endif // __NEURAL_NETWORKS_SHIM_H__ diff --git a/runtime/libs/nnapi/include/NeuralNetworksTypes.h b/runtime/libs/nnapi/include/NeuralNetworksTypes.h index 2e05687..35c7a58 100644 --- a/runtime/libs/nnapi/include/NeuralNetworksTypes.h +++ b/runtime/libs/nnapi/include/NeuralNetworksTypes.h @@ -56,12 +56,12 @@ typedef int (*ANeuralNetworksModel_setOperandValue_fn)(ANeuralNetworksModel *mod const void *buffer, size_t length); typedef int (*ANeuralNetworksModel_setOperandSymmPerChannelQuantParams_fn)( - ANeuralNetworksModel *model, int32_t index, - const ANeuralNetworksSymmPerChannelQuantParams *channelQuant); + ANeuralNetworksModel *model, int32_t index, + const ANeuralNetworksSymmPerChannelQuantParams *channelQuant); typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)( - ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset, - size_t length); + ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset, + size_t length); typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model, ANeuralNetworksOperationType type, @@ -88,8 +88,8 @@ typedef int (*ANeuralNetworksExecution_setInput_fn)(ANeuralNetworksExecution *ex const void *buffer, size_t length); typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)( - ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, - const ANeuralNetworksMemory *memory, size_t offset, size_t length); + ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, + const ANeuralNetworksMemory *memory, size_t offset, size_t length); typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution, int32_t index, @@ -97,8 +97,8 @@ typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *e void *buffer, size_t length); typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)( - ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, - const ANeuralNetworksMemory *memory, size_t offset, size_t length); + ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, + const ANeuralNetworksMemory *memory, size_t offset, size_t length); typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution, ANeuralNetworksEvent **event); @@ -125,35 +125,39 @@ typedef int (*ANeuralNetworksDevice_getFeatureLevel_fn)(const ANeuralNetworksDev int64_t *featureLevel); typedef int (*ANeuralNetworksModel_getSupportedOperationsForDevices_fn)( - const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, - uint32_t numDevices, bool *supportedOps); + const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, + uint32_t numDevices, bool *supportedOps); typedef int (*ANeuralNetworksCompilation_createForDevices_fn)( - ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices, - ANeuralNetworksCompilation **compilation); + ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices, + ANeuralNetworksCompilation **compilation); typedef int (*ANeuralNetworksCompilation_setCaching_fn)(ANeuralNetworksCompilation *compilation, const char *cacheDir, const uint8_t *token); +#if __ANDROID_API__ >= 30 typedef int (*ANeuralNetworksCompilation_setTimeout_fn)(ANeuralNetworksCompilation *compilation, uint64_t duration); typedef int (*ANeuralNetworksCompilation_setPriority_fn)(ANeuralNetworksCompilation *compilation, int priority); +#endif // __ANDROID_API__ >= 30 typedef int (*ANeuralNetworksExecution_compute_fn)(ANeuralNetworksExecution *execution); +#if __ANDROID_API__ >= 30 typedef int (*ANeuralNetworksExecution_setTimeout_fn)(ANeuralNetworksExecution *execution, uint64_t duration); typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)(ANeuralNetworksExecution *execution, uint64_t duration); +#endif // __ANDROID_API__ >= 30 typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(ANeuralNetworksExecution *execution, int32_t index, uint32_t *rank); typedef int (*ANeuralNetworksExecution_getOutputOperandDimensions_fn)( - ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions); + ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions); typedef int (*ANeuralNetworksBurst_create_fn)(ANeuralNetworksCompilation *compilation, ANeuralNetworksBurst **burst); @@ -182,24 +186,25 @@ typedef int (*ANeuralNetworksModel_getExtensionOperandType_fn)(ANeuralNetworksMo int32_t *type); typedef int (*ANeuralNetworksModel_getExtensionOperationType_fn)( - ANeuralNetworksModel *model, const char *extensionName, uint16_t operationCodeWithinExtension, - ANeuralNetworksOperationType *type); + ANeuralNetworksModel *model, const char *extensionName, uint16_t operationCodeWithinExtension, + ANeuralNetworksOperationType *type); typedef int (*ANeuralNetworksModel_setOperandExtensionData_fn)(ANeuralNetworksModel *model, int32_t index, const void *data, size_t length); +#if __ANDROID_API__ >= 30 typedef int (*ANeuralNetworksMemoryDesc_create_fn)(ANeuralNetworksMemoryDesc **desc); typedef void (*ANeuralNetworksMemoryDesc_free_fn)(ANeuralNetworksMemoryDesc *desc); typedef int (*ANeuralNetworksMemoryDesc_addInputRole_fn)( - ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, int32_t index, - float frequency); + ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, int32_t index, + float frequency); typedef int (*ANeuralNetworksMemoryDesc_addOutputRole_fn)( - ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index, - float frequency); + ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index, + float frequency); typedef int (*ANeuralNetworksMemoryDesc_setDimensions_fn)(ANeuralNetworksMemoryDesc *desc, uint32_t rank, @@ -212,5 +217,5 @@ typedef int (*ANeuralNetworksMemory_createFromDesc_fn)(const ANeuralNetworksMemo typedef int (*ANeuralNetworksMemory_copy_fn)(const ANeuralNetworksMemory *src, const ANeuralNetworksMemory *dst); - +#endif // __ANDROID_API__ >= 30 #endif // __NEURAL_NETWORKS_TYPES_H__ diff --git a/runtime/libs/rua/dyn/src/DynamicBinder.cpp b/runtime/libs/rua/dyn/src/DynamicBinder.cpp index fa3f0bb..f49892d 100644 --- a/runtime/libs/rua/dyn/src/DynamicBinder.cpp +++ b/runtime/libs/rua/dyn/src/DynamicBinder.cpp @@ -97,8 +97,8 @@ typedef int (*ANeuralNetworksModel_setOperandValue_fn)(ANeuralNetworksModel *mod const void *buffer, size_t length); typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)( - ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset, - size_t length); + ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset, + size_t length); typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model, ANeuralNetworksOperationType type, @@ -242,8 +242,8 @@ typedef int (*ANeuralNetworksExecution_setInput_fn)(ANeuralNetworksExecution *ex const void *buffer, size_t length); typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)( - ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, - const ANeuralNetworksMemory *memory, size_t offset, size_t length); + ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, + const ANeuralNetworksMemory *memory, size_t offset, size_t length); typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution, int32_t index, @@ -251,8 +251,8 @@ typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *e void *buffer, size_t length); typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)( - ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, - const ANeuralNetworksMemory *memory, size_t offset, size_t length); + ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type, + const ANeuralNetworksMemory *memory, size_t offset, size_t length); typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution, ANeuralNetworksEvent **event); diff --git a/runtime/libs/tflite/include/tflite/Diff.h b/runtime/libs/tflite/include/tflite/Diff.h index fdc1a31..1c35b34 100644 --- a/runtime/libs/tflite/include/tflite/Diff.h +++ b/runtime/libs/tflite/include/tflite/Diff.h @@ -47,7 +47,7 @@ public: * @param[in] comparator Comparator object for tensor comparation */ TfLiteInterpMatchApp(const nnfw::misc::tensor::Comparator &comparator) - : _verbose{false}, _comparator(comparator) + : _verbose{false}, _comparator(comparator) { // DO NOTHING } diff --git a/runtime/libs/tflite/include/tflite/RandomTestRunner.h b/runtime/libs/tflite/include/tflite/RandomTestRunner.h index c0b304c..abbf3b2 100644 --- a/runtime/libs/tflite/include/tflite/RandomTestRunner.h +++ b/runtime/libs/tflite/include/tflite/RandomTestRunner.h @@ -55,7 +55,7 @@ public: * @param[in] quantization TfLiteQuantizationParams type to represent quantization value */ RandomTestRunner(uint32_t seed, const RandomTestParam ¶m) - : _randgen{seed, 0.0f, 2.0f}, _param{param} + : _randgen{seed, 0.0f, 2.0f}, _param{param} { // DO NOTHING } diff --git a/runtime/libs/tflite/include/tflite/TensorLogger.h b/runtime/libs/tflite/include/tflite/TensorLogger.h index a824c34..0837dfc 100644 --- a/runtime/libs/tflite/include/tflite/TensorLogger.h +++ b/runtime/libs/tflite/include/tflite/TensorLogger.h @@ -107,9 +107,8 @@ private: const TfLiteTensor *tensor = interp.tensor(id); _outfile << "# tensor name: " << tensor->name << std::endl; - _outfile << "# tflite::interpreter.tensor(" << id << ") -> " - "tensor_value_gen[" - << log_index << "]" << std::endl; + _outfile << "# tflite::interpreter.tensor(" << id << ") -> tensor_value_gen[" << log_index + << "]" << std::endl; if (tensor->type == kTfLiteInt32) { diff --git a/runtime/libs/tflite/src/Diff.cpp b/runtime/libs/tflite/src/Diff.cpp index 39f9943..2d2b66e 100644 --- a/runtime/libs/tflite/src/Diff.cpp +++ b/runtime/libs/tflite/src/Diff.cpp @@ -29,9 +29,9 @@ class DiffSummary : public nnfw::misc::tensor::Comparator::Observer { public: DiffSummary() - : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f}, - max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f}, - max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f} + : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f}, + max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f}, + max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f} { // DO NOTHING } @@ -86,12 +86,12 @@ bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorVie using nnfw::misc::tensor::zip; zip(expected.shape(), expected, obtained) - << [&](const Index &index, T expected_value, T obtained_value) { - if (expected_value != obtained_value) - { - diffs.emplace_back(index, expected_value, obtained_value); - } - }; + << [&](const Index &index, T expected_value, T obtained_value) { + if (expected_value != obtained_value) + { + diffs.emplace_back(index, expected_value, obtained_value); + } + }; // TODO Unify summary generation code if (diffs.size() == 0) @@ -121,8 +121,8 @@ bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorVie template <> bool TfLiteInterpMatchApp::compareSingleTensorView( - const nnfw::tflite::TensorView &expected, - const nnfw::tflite::TensorView &obtained, int id) const + const nnfw::tflite::TensorView &expected, const nnfw::tflite::TensorView &obtained, + int id) const { DiffSummary summary; diff --git a/runtime/libs/tflite/src/RandomTestRunner.cpp b/runtime/libs/tflite/src/RandomTestRunner.cpp index f7fccbf..3fa9a97 100644 --- a/runtime/libs/tflite/src/RandomTestRunner.cpp +++ b/runtime/libs/tflite/src/RandomTestRunner.cpp @@ -68,12 +68,12 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) int32_t value = 0; nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - // TODO Generate random values - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - ++value; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + tfl_interp_view.at(ind) = value; + nnapi_view.at(ind) = value; + ++value; + }; }; // Generate singed 32-bit integer (s32) input @@ -89,11 +89,11 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) int32_t value = 0; nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - // TODO Generate random values - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + tfl_interp_view.at(ind) = value; + nnapi_view.at(ind) = value; + }; }; initializers[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { @@ -106,19 +106,19 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) assert(tfl_interp_view.shape() == nnapi_view.shape()); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tfl_interp_view.shape(), std::bind(fp, _randgen, _1, _2)); assert(tfl_interp_view.shape() == data.shape()); nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); + << [&](const nnfw::misc::tensor::Index &ind) { + const auto value = data.at(ind); - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; + tfl_interp_view.at(ind) = value; + nnapi_view.at(ind) = value; + }; }; reseters[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { @@ -131,8 +131,8 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) assert(tfl_interp_view.shape() == nnapi_view.shape()); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tfl_interp_view.shape(), std::bind(fp, _randgen, _1, _2)); assert(tfl_interp_view.shape() == data.shape()); @@ -140,10 +140,10 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) uint8_t value = 0; nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + tfl_interp_view.at(ind) = value; + nnapi_view.at(ind) = value; + }; }; initializers[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { @@ -156,20 +156,20 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) assert(tfl_interp_view.shape() == nnapi_view.shape()); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tfl_interp_view.shape(), std::bind(fp, _randgen, _1, _2)); assert(tfl_interp_view.shape() == data.shape()); nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); + << [&](const nnfw::misc::tensor::Index &ind) { + const auto value = data.at(ind); - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; + tfl_interp_view.at(ind) = value; + nnapi_view.at(ind) = value; + }; }; reseters[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { @@ -182,8 +182,8 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) assert(tfl_interp_view.shape() == nnapi_view.shape()); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tfl_interp_view.shape(), std::bind(fp, _randgen, _1, _2)); @@ -192,10 +192,10 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) float value = 0; nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + tfl_interp_view.at(ind) = value; + nnapi_view.at(ind) = value; + }; }; initializers[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { @@ -208,20 +208,20 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) assert(tfl_interp_view.shape() == nnapi_view.shape()); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tfl_interp_view.shape(), std::bind(fp, _randgen, _1, _2)); assert(tfl_interp_view.shape() == data.shape()); nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); + << [&](const nnfw::misc::tensor::Index &ind) { + const auto value = data.at(ind); - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; + tfl_interp_view.at(ind) = value; + nnapi_view.at(ind) = value; + }; }; reseters[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) { @@ -234,8 +234,8 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) assert(tfl_interp_view.shape() == nnapi_view.shape()); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tfl_interp_view.shape(), std::bind(fp, _randgen, _1, _2)); @@ -244,10 +244,10 @@ void RandomTestRunner::compile(const nnfw::tflite::Builder &builder) bool value = false; nnfw::misc::tensor::iterate(tfl_interp_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - tfl_interp_view.at(ind) = value; - nnapi_view.at(ind) = value; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + tfl_interp_view.at(ind) = value; + nnapi_view.at(ind) = value; + }; }; // Fill IFM with random numbers diff --git a/runtime/onert/api/.clang-format b/runtime/onert/api/.clang-format new file mode 120000 index 0000000..83185fe --- /dev/null +++ b/runtime/onert/api/.clang-format @@ -0,0 +1 @@ +../../../.clang-format.8 \ No newline at end of file diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h index 76380b4..6eb7e6b 100644 --- a/runtime/onert/api/include/nnfw.h +++ b/runtime/onert/api/include/nnfw.h @@ -64,7 +64,8 @@ typedef struct nnfw_session nnfw_session; * * The type of tensor represented in {@link nnfw_tensorinfo} */ -typedef enum { +typedef enum +{ /** A tensor of 32 bit floating point */ NNFW_TYPE_TENSOR_FLOAT32 = 0, /** A tensor of 32 bit signed integer */ @@ -96,7 +97,8 @@ typedef enum { /** * @brief Result values returned from a call to an API function */ -typedef enum { +typedef enum +{ /** Successful */ NNFW_STATUS_NO_ERROR = 0, /** @@ -117,7 +119,8 @@ typedef enum { /** * @brief Data format of a tensor */ -typedef enum { +typedef enum +{ /** Don't care layout */ NNFW_LAYOUT_NONE = 0, /** @@ -135,7 +138,8 @@ typedef enum { /** * @brief Information ID for retrieving information on nnfw (e.g. version) */ -typedef enum { +typedef enum +{ /** nnfw runtime version * Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch. */ diff --git a/runtime/onert/api/include/nnfw_internal.h b/runtime/onert/api/include/nnfw_internal.h index eb4b6d6..a88e324 100644 --- a/runtime/onert/api/include/nnfw_internal.h +++ b/runtime/onert/api/include/nnfw_internal.h @@ -35,4 +35,13 @@ NNFW_STATUS nnfw_get_config(nnfw_session *session, const char *key, char *value, */ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size); +/** + * @brief Load a tflite/circle model from file. + * + * @param[in] session session + * @param[in] file_path Path to model file. Model type(tflite/circle) is decided by file extension + * @return NFNFW_STATUS + */ +NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path); + #endif // __NNFW_INTERNAL_H__ diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h index 31c3890..28703c0 100644 --- a/runtime/onert/api/include/nnfw_version.h +++ b/runtime/onert/api/include/nnfw_version.h @@ -21,6 +21,6 @@ * NNFW_VERSION is a uint32 value representing nnfw runtime version * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch */ -#define NNFW_VERSION 0x01000b01 +#define NNFW_VERSION 0x01000c00 #endif // __NNFW_VERSION_H__ diff --git a/runtime/onert/api/src/CustomKernel.cc b/runtime/onert/api/src/CustomKernel.cc index 3f3a5d8..56525fe 100644 --- a/runtime/onert/api/src/CustomKernel.cc +++ b/runtime/onert/api/src/CustomKernel.cc @@ -65,7 +65,7 @@ public: }; Kernel::Kernel(const nnfw_custom_eval evalFunction) - : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction) + : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction) { } diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc index 835b207..4eba4ec 100644 --- a/runtime/onert/api/src/nnfw_api.cc +++ b/runtime/onert/api/src/nnfw_api.cc @@ -90,7 +90,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session) NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path) { NNFW_RETURN_ERROR_IF_NULL(session); - return session->load_model_from_file(pacakge_file_path); + return session->load_model_from_nnpackage(pacakge_file_path); } /* @@ -350,6 +350,12 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, return session->load_circle_from_buffer(buffer, size); } +NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->load_model_from_modelfile(file_path); +} + NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index) { NNFW_RETURN_ERROR_IF_NULL(session); diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc index a4c69eb..c3fdb13 100644 --- a/runtime/onert/api/src/nnfw_api_internal.cc +++ b/runtime/onert/api/src/nnfw_api_internal.cc @@ -19,17 +19,19 @@ #include "compiler/Compiler.h" #include "util/ConfigSource.h" #include "util/Exceptions.h" +#include "util/logging.h" #include "exec/Execution.h" #include "circle_loader.h" #include "tflite_loader.h" #include "json/json.h" #include "ir/OpCode.h" +#include "util/TracingCtx.h" + #include #include #include #include #include -#include #include /* @@ -40,8 +42,11 @@ #define MAX_PATH_LENGTH 1024 #define MAX_TENSOR_NAME_LENGTH 64 +namespace +{ + // Is null-terminating in length ? -static bool null_terminating(const char *str, uint32_t length) +bool null_terminating(const char *str, uint32_t length) { for (uint32_t i = 0; i < length; i++) { @@ -53,7 +58,7 @@ static bool null_terminating(const char *str, uint32_t length) return false; } -static onert::ir::Layout convertLayout(NNFW_LAYOUT layout) +onert::ir::Layout convertLayout(NNFW_LAYOUT layout) { if (layout == NNFW_LAYOUT_CHANNELS_LAST) { @@ -92,9 +97,70 @@ NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensor } } +std::string trim(const std::string &value) +{ + std::string whitespace = " \t"; + auto begin = value.find_first_not_of(whitespace); + if (begin == std::string::npos) + return ""; // no content + + auto end = value.find_last_not_of(whitespace); + auto range = end - begin + 1; + return value.substr(begin, range); +} + +using CfgKeyValues = std::unordered_map; + +bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues) +{ + std::ifstream ifs(cfgfile); + if (ifs.is_open()) + { + std::string line; + while (std::getline(ifs, line)) + { + auto cmtpos = line.find('#'); + if (cmtpos != std::string::npos) + { + line = line.substr(0, cmtpos); + } + std::istringstream isline(line); + std::string key; + if (std::getline(isline, key, '=')) + { + std::string value; + if (std::getline(isline, value)) + { + key = trim(key); + keyValues[key] = trim(value); + } + } + } + ifs.close(); + return true; + } + return false; +} + +void setConfigKeyValues(const CfgKeyValues &keyValues) +{ + auto configsrc = std::make_unique(); + + for (auto it = keyValues.begin(); it != keyValues.end(); ++it) + { + VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl; + configsrc->set(it->first, it->second); + } + + onert::util::config_source_ext(std::move(configsrc)); +} + +} // namespace + nnfw_session::nnfw_session() - : _subgraphs{nullptr}, _execution{nullptr}, - _kernel_registry{std::make_shared()} + : _subgraphs{nullptr}, _execution{nullptr}, + _kernel_registry{std::make_shared()}, _tracing_ctx{ + nullptr} { // DO NOTHING } @@ -122,13 +188,65 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size) return NNFW_STATUS_ERROR; } - _compiler = std::make_unique(_subgraphs); + _tracing_ctx = std::make_unique(_subgraphs.get()); + + _compiler = std::make_unique(_subgraphs, _tracing_ctx.get()); + + _state = State::MODEL_LOADED; + return NNFW_STATUS_NO_ERROR; +} + +NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path) +{ + if (!isStateInitialized()) + return NNFW_STATUS_INVALID_STATE; + + if (!model_file_path) + { + std::cerr << "Model file path is null." << std::endl; + return NNFW_STATUS_UNEXPECTED_NULL; + } + + std::string filename{model_file_path}; + if (filename.size() < 8) // .tflite or .circle + { + std::cerr << "Invalid model file path." << std::endl; + return NNFW_STATUS_ERROR; + } + + std::string model_type = filename.substr(filename.size() - 7, 7); + + try + { + if (model_type == ".tflite") + { + _subgraphs = onert::tflite_loader::loadModel(filename.c_str()); + } + else if (model_type == ".circle") + { + _subgraphs = onert::circle_loader::loadModel(filename.c_str()); + } + else + { + std::cerr << "Unsupported model type" << std::endl; + return NNFW_STATUS_ERROR; + } + } + catch (const std::exception &e) + { + std::cerr << "Error during model loading : " << e.what() << std::endl; + return NNFW_STATUS_ERROR; + } + + _tracing_ctx = std::make_unique(_subgraphs.get()); + + _compiler = std::make_unique(_subgraphs, _tracing_ctx.get()); _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; } -NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir) +NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir) { if (!isStateInitialized()) return NNFW_STATUS_INVALID_STATE; @@ -166,6 +284,18 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir) mfs >> root; const Json::Value &models = root["models"]; const Json::Value &model_types = root["model-types"]; + const Json::Value &configs = root["configs"]; + + if (!configs.empty() && !configs[0].empty()) + { + auto filepath = package_dir + std::string("/metadata/") + configs[0].asCString(); + + CfgKeyValues keyValues; + if (loadConfigure(filepath, keyValues)) + { + setConfigKeyValues(keyValues); + } + } auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model auto model_type = model_types[0].asString(); // first model's type @@ -190,7 +320,9 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir) return NNFW_STATUS_ERROR; } - _compiler = std::make_unique(_subgraphs); + _tracing_ctx = std::make_unique(_subgraphs.get()); + + _compiler = std::make_unique(_subgraphs, _tracing_ctx.get()); _state = State::MODEL_LOADED; return NNFW_STATUS_NO_ERROR; @@ -225,7 +357,7 @@ NNFW_STATUS nnfw_session::prepare() { _subgraphs.reset(); std::shared_ptr executors = _compiler->compile(); - _execution = std::make_shared(executors); + _execution = std::make_unique(executors); } catch (const std::exception &e) { @@ -308,8 +440,8 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo if (!buffer && length != 0) { std::cerr - << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0" - << std::endl; + << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0" + << std::endl; return NNFW_STATUS_ERROR; } @@ -337,8 +469,8 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b if (!buffer && length != 0) { std::cerr - << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0" - << std::endl; + << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0" + << std::endl; return NNFW_STATUS_ERROR; } diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h index 604ba38..a50ac72 100644 --- a/runtime/onert/api/src/nnfw_api_internal.h +++ b/runtime/onert/api/src/nnfw_api_internal.h @@ -21,6 +21,7 @@ #include "nnfw_experimental.h" #include +#include #include #include @@ -100,7 +101,7 @@ public: nnfw_session(); ~nnfw_session(); - NNFW_STATUS load_model_from_file(const char *package_file_path); + NNFW_STATUS load_model_from_nnpackage(const char *package_file_path); NNFW_STATUS prepare(); NNFW_STATUS run(); @@ -132,6 +133,7 @@ public: NNFW_STATUS set_config(const char *key, const char *value); NNFW_STATUS get_config(const char *key, char *value, size_t value_size); NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size); + NNFW_STATUS load_model_from_modelfile(const char *file_path); // // Experimental API @@ -154,8 +156,10 @@ private: State _state{State::INITIALIZED}; std::shared_ptr _subgraphs; std::unique_ptr _compiler; - std::shared_ptr _execution; + std::unique_ptr _execution; std::shared_ptr _kernel_registry; + + std::unique_ptr _tracing_ctx; }; #endif // __API_NNFW_API_INTERNAL_H__ diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt index 42d622a..dc038c9 100644 --- a/runtime/onert/backend/CMakeLists.txt +++ b/runtime/onert/backend/CMakeLists.txt @@ -4,3 +4,5 @@ add_subdirectory(cpu) add_subdirectory(acl_cl) add_subdirectory(acl_neon) add_subdirectory(acl_common) +add_subdirectory(ruy) +add_subdirectory(xnnpack) diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h index 5c50413..4f48314 100644 --- a/runtime/onert/backend/acl_cl/Backend.h +++ b/runtime/onert/backend/acl_cl/Backend.h @@ -20,6 +20,7 @@ #include #include +#include "BackendContext.h" #include "Config.h" #include "ConstantInitializer.h" #include "KernelGenerator.h" @@ -41,21 +42,20 @@ public: std::shared_ptr config() const override { return _config; } - std::unique_ptr newContext(const ir::Graph &graph, - const std::shared_ptr &, - bool is_linear_executor) const override + std::unique_ptr + newContext(const ir::Graph &graph, const std::shared_ptr &, + bool is_linear_executor) const override { const auto &operands = graph.operands(); const auto &operations = graph.operations(); - auto context = std::make_unique(this, &graph); + auto context = std::make_unique(this, &graph); auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared>(tm); - auto tb = std::make_shared(operands, tm, tr); + auto tb = std::make_shared(operands, tm); context->tensor_registry = tr; context->tensor_builder = tb; context->constant_initializer = std::make_shared(operands, tr); context->kernel_gen = std::make_shared(operands, operations, tb, tr); - context->tensor_register = nullptr; context->optimizer = std::make_shared(context.get()); return context; } diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc new file mode 100644 index 0000000..a6f228a --- /dev/null +++ b/runtime/onert/backend/acl_cl/BackendContext.cc @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "Optimizer.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" + +namespace onert +{ +namespace backend +{ +namespace acl_cl +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +void BackendContext::planTensors(const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info) +{ + ir::OperandIndexMap uses_map; + ir::OperandIndexMap def_map; + ir::OperandIndexSequence constants; + + // Prepare scanning + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + const auto &li = lower_info.operand.at(ind); + if (li->def_factors().getOnlyElement().backend() != backend()) + continue; + + // Ignore unused tensor + if (li->def_factors().size() == 0 && li->use_factors().size() == 0) + { + VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + auto factor = li->def_factors().getOnlyElement(); + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + const auto info = obj.info(); + const auto backend_layout = factor.layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + for (const auto &op_idx : op_seq.operations()) + { + auto &op = graph()->operations().at(op_idx); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph()->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(lower_info.operand.at(ind)->def_factors().size() == 1 && + lower_info.operand.at(ind)->use_factors().size() == 1); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + } + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair it) { return it.second == 0; })); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + optimizer->optimize(); + + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (const auto op_ind : op_seq) + { + bool op_assigned = [&]() { + for (auto &op_info : operation_list()) + if (op_info.index == op_ind) + return true; + return false; + }(); + if (!op_assigned) + continue; + + const auto &op = graph()->operations().at(op_ind); + for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED) + { + if (!tensor_builder->isRegistered(index) && !model_io.contains(index) && + find(operand_list().begin(), operand_list().end(), index) != operand_list().end()) + { + const auto &operand_lower_info = + lower_info.operand.at(index)->def_factors().getOnlyElement(); + + // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) + // op.getOutputs() of permute (CPU) returns tensor A + // but tensor A belongs to the backend of acl_cl. + // So, we have to make this tensor NOT registered for CPU. + if (operand_lower_info.backend() != backend()) + continue; + + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = op_seq.getLayout(); + const auto backend_layout = operand_lower_info.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + } + } + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + planTensors(order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + tensor_builder->allocate(); + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { + ifunc.prepare(); + tensor_builder->postFunctionPrepare(); + }); + } + + return ret; +} + +} // namespace acl_cl +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h new file mode 100644 index 0000000..662d767 --- /dev/null +++ b/runtime/onert/backend/acl_cl/BackendContext.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ + +#include +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +namespace onert +{ +namespace backend +{ +namespace acl_cl +{ + +class Optimizer; + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr tensor_registry = nullptr, + std::shared_ptr tensor_builder = nullptr, + std::shared_ptr constant_initializer = nullptr, + std::shared_ptr kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen} + { + } + + ITensorRegistry *genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) override; + +private: + void initConsts(); + void planTensors(const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + std::shared_ptr tensor_builder; + std::shared_ptr constant_initializer; + std::shared_ptr kernel_gen; + std::shared_ptr optimizer; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc index b45b910..413a7cc 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc @@ -112,7 +112,7 @@ void ConstantInitializer::visit(const ir::operation::Reverse &node) const auto &axis_obj = _operands.at(axis_index); const auto ifm_rank = input_obj.shape().rank(); - const auto frontend_layout = this->_current_op_seq_layout; + const auto frontend_layout = this->_current_layout; auto output_tensor = this->_tensor_reg->getITensor(output_index); const auto backend_layout = output_tensor->layout(); diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h index 9f3acb4..fc0eca8 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.h +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ #include "AclConstantInitializer.h" @@ -45,4 +45,4 @@ public: } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc index e7690af..3a5ea5a 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.cc +++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc @@ -49,7 +49,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr &tensor_builder, const std::shared_ptr> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -62,7 +62,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq = std::make_unique(); _return_fn_seq->enableDynamicShapeInferer(false); - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -78,6 +78,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + const auto NNApiInputs = 2; + if (node.getInputs().size() != NNApiInputs) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + if (!_ctx.at(crops_index).isConstant()) + { + throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND"); + } + + auto crops = _ctx.at(crops_index).asVector(); + for (auto crop : crops) + { + if (crop != 0) + { + throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND"); + } + } + } + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index); @@ -152,8 +171,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -189,8 +208,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -255,7 +274,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) else { const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); @@ -277,7 +296,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node) auto fn = acl_common::kernelGenFullyConnected( - node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_layout); _return_fn = std::make_unique( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } @@ -296,7 +315,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); const auto input_rank = _ctx.at(input_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = input_tensor->layout(); std::unique_ptr fn; @@ -329,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || frontend_layout == backend_layout); @@ -388,7 +407,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -455,7 +474,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -557,7 +576,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); const auto &perms = _ctx.at(perm_idx); @@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx); const size_t output_rank = _ctx.at(output_idx).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); @@ -887,7 +906,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : input_indexes) inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout(); if (axis < 0) @@ -923,8 +942,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) void KernelGenerator::visit(const ir::operation::Pool2D &node) { auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_reg, _current_op_seq_layout, - acl_common::convertPoolType(node.param().op_type)); + node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type)); const auto ofm_index{node.getOutputs().at(0)}; auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); @@ -1169,9 +1187,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout); const auto stride = node.param().stride; @@ -1270,7 +1288,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) UNUSED_RELEASE(backend_layout); assert(backend_layout == ifm_tensor->layout()); assert(backend_layout == indices_tensor->layout()); - assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); + assert(ifm_rank < 4 || _current_layout == backend_layout); // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; @@ -1306,11 +1324,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node) _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; auto ifm_shape = _ctx.at(ifm_index).shape(); auto ofm_shape = _ctx.at(ofm_index).shape(); @@ -1320,7 +1338,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); - auto frontend_layout = _current_op_seq_layout; + auto frontend_layout = _current_layout; auto backend_layout = ifm_tensor->layout(); int axis_value = _ctx.at(axis_index).asScalar(); @@ -1331,10 +1349,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto acl_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); - + auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX + : ::arm_compute::ReductionOperation::ARG_IDX_MIN; auto fn = acl_common::generateLayer( - ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), - ::arm_compute::ReductionOperation::ARG_IDX_MAX); + ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type); _return_fn = asAclFunction(std::move(fn)); } @@ -1400,7 +1418,7 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &ofm_ind : output_indexes) output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); auto axis = _ctx.at(axis_index).asScalar(); if (axis < 0) @@ -1439,7 +1457,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node) { int32_t split_dim = split_dim_op.asScalar(); uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions()) @@ -1483,7 +1501,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); if (axis < 0) axis += input_rank; @@ -1526,7 +1544,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) auto input = _tensor_reg->getAclTensor(input_index)->handle(); auto output = _tensor_reg->getAclTensor(output_index)->handle(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); ::arm_compute::PaddingList padding_list; diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h index e8a9226..22a7c18 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.h +++ b/runtime/onert/backend/acl_cl/KernelGenerator.h @@ -17,7 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__ -#include +#include #include "ir/Operands.h" #include "TensorBuilder.h" @@ -31,7 +31,7 @@ namespace backend namespace acl_cl { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -39,60 +39,61 @@ public: const std::shared_ptr> &_tensor_reg); void visit(const ir::OpSequence &) override; + + void visit(const ir::operation::ArgMinMax &) override; void visit(const ir::operation::BatchToSpaceND &) override; void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::ConvertFp16ToFp32 &) override; + void visit(const ir::operation::ConvertFp32ToFp16 &) override; + void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::Reshape &) override; - void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Softmax &) override; - void visit(const ir::operation::Slice &) override; - void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::Transpose &) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; + void visit(const ir::operation::EmbeddingLookup &) override; void visit(const ir::operation::ExpandDims &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::HashtableLookup &) override; void visit(const ir::operation::InstanceNorm &) override; - void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::LocalResponseNormalization &) override; void visit(const ir::operation::LSTM &) override; void visit(const ir::operation::OneHot &) override; void visit(const ir::operation::Pack &) override; - void visit(const ir::operation::Pool2D &) override; + void visit(const ir::operation::Pad &) override; void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::Pool2D &) override; + void visit(const ir::operation::PReLU &) override; + void visit(const ir::operation::Reduce &) override; + void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::ResizeBilinear &) override; void visit(const ir::operation::ResizeNearestNeighbor &) override; + void visit(const ir::operation::Reverse &) override; void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; - void visit(const ir::operation::EmbeddingLookup &) override; - void visit(const ir::operation::L2Normalization &) override; - void visit(const ir::operation::HashtableLookup &) override; - void visit(const ir::operation::PReLU &) override; - void visit(const ir::operation::TransposeConv &) override; - void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::TopKV2 &) override; - void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::ArgMax &) override; - void visit(const ir::operation::LocalResponseNormalization &) override; - void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::Split &) override; void visit(const ir::operation::SplitV &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::TopKV2 &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Unpack &) override; - void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::ConvertFp32ToFp16 &) override; - void visit(const ir::operation::ConvertFp16ToFp32 &) override; - void visit(const ir::operation::Reverse &) override; private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr _tensor_builder; std::shared_ptr> _tensor_reg; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; }; } // namespace acl_cl diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h index 18d38ec..ad51548 100644 --- a/runtime/onert/backend/acl_cl/Optimizer.h +++ b/runtime/onert/backend/acl_cl/Optimizer.h @@ -17,8 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__ #define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__ -#include -#include +#include "BackendContext.h" #include "TensorBuilder.h" namespace onert @@ -28,12 +27,12 @@ namespace backend namespace acl_cl { -class Optimizer : public IOptimizer +class Optimizer { public: Optimizer(BackendContext *context); - void optimize() override; + void optimize(); private: BackendContext *_context; diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc index 88378b1..82cbde0 100644 --- a/runtime/onert/backend/acl_cl/acl_cl.cc +++ b/runtime/onert/backend/acl_cl/acl_cl.cc @@ -14,20 +14,11 @@ * limitations under the License. */ -#include - #include "Backend.h" extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'acl_cl' loaded\n"; - return new onert::backend::acl_cl::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc index 21f41a3..921d107 100644 --- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc @@ -25,7 +25,7 @@ namespace acl_common AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands, const std::shared_ptr &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} + : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg} { // DO NOTHING } diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h index 52f4c54..894e2e7 100644 --- a/runtime/onert/backend/acl_common/AclConstantInitializer.h +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h @@ -17,7 +17,7 @@ #ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ -#include +#include #include #include "AclTensorRegistry.h" @@ -28,7 +28,7 @@ namespace backend namespace acl_common { -class AclConstantInitializer : public IConstantInitializer +class AclConstantInitializer : public cpu_common::ConstantInitializerBase { public: AclConstantInitializer(const ir::Operands &operands, diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h index bb7abc9..12e9ab8 100644 --- a/runtime/onert/backend/acl_common/AclTensorBuilder.h +++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h @@ -21,7 +21,6 @@ #include #include -#include #include "ir/OperandIndexMap.h" #include #include "AclTensorManager.h" @@ -43,14 +42,12 @@ enum class UsesType LAST }; -template -class AclTensorBuilder : public ITensorBuilder +template class AclTensorBuilder { public: using T_AclTensorManager = AclTensorManager; - AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr, - const std::shared_ptr> &tensor_reg); + AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr); /** * @brief Register tensor information to allocate on ACL-CL backend @@ -59,16 +56,16 @@ public: * @param[in] layout Tensor data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override; + void prepare(void); + void allocate(); + void postFunctionPrepare(); T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); } @@ -105,7 +102,6 @@ private: ir::OperandIndexMap _uses_count_map; std::unique_ptr _tensor_mgr; - std::shared_ptr> _tensor_reg; // for linear executor std::vector> _lifetime_seq; @@ -133,10 +129,9 @@ namespace acl_common { template -AclTensorBuilder::AclTensorBuilder( - const ir::Operands &operands, T_AclTensorManager *tensor_mgr, - const std::shared_ptr> &tensor_reg) - : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg} +AclTensorBuilder::AclTensorBuilder(const ir::Operands &operands, + T_AclTensorManager *tensor_mgr) + : _operands{operands}, _tensor_mgr{tensor_mgr} { assert(_tensor_mgr); } diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc index 67d9d71..7d3a690 100644 --- a/runtime/onert/backend/acl_common/Convert.cc +++ b/runtime/onert/backend/acl_common/Convert.cc @@ -109,13 +109,19 @@ namespace acl_common case ir::DataType::UINT8: return ::arm_compute::DataType::U8; case ir::DataType::QUANT_INT8_SYMM: - return ::arm_compute::DataType::S8; + return ::arm_compute::DataType::QSYMM8; + case ir::DataType::QUANT_INT8_ASYMM: + return ::arm_compute::DataType::QASYMM8_SIGNED; case ir::DataType::FLOAT16: return ::arm_compute::DataType::F16; case ir::DataType::INT64: return ::arm_compute::DataType::S64; + case ir::DataType::QUANT_INT16_ASYMM: + return ::arm_compute::DataType::QASYMM16; + case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL: + return ::arm_compute::DataType::QSYMM8_PER_CHANNEL; default: - throw std::runtime_error("Not supported, yet"); + throw std::runtime_error("Not supported internal data type, yet"); break; } } @@ -175,7 +181,7 @@ namespace acl_common return ::arm_compute::ActivationLayerInfo{ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f}; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported internal activation, yet"}; break; } } @@ -219,7 +225,7 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, return ::arm_compute::ActivationLayerInfo{ ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha}; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported internal elementwise activation, yet"}; break; } } @@ -295,6 +301,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type) return ir::DataType::UINT32; case ::arm_compute::DataType::QASYMM8: return ir::DataType::QUANT_UINT8_ASYMM; + case ::arm_compute::DataType::QASYMM8_SIGNED: + return ir::DataType::QUANT_INT8_ASYMM; case ::arm_compute::DataType::U8: return ir::DataType::UINT8; case ::arm_compute::DataType::QSYMM8: @@ -304,7 +312,7 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type) case ::arm_compute::DataType::S64: return ir::DataType::INT64; default: - throw std::runtime_error{"Not supported, yet"}; + throw std::runtime_error{"Not supported acl data type, yet"}; break; } } diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h index 35d6e4e..b11c197 100644 --- a/runtime/onert/backend/acl_neon/Backend.h +++ b/runtime/onert/backend/acl_neon/Backend.h @@ -21,6 +21,7 @@ #include #include +#include "BackendContext.h" #include "Config.h" #include "ConstantInitializer.h" #include "KernelGenerator.h" @@ -41,21 +42,20 @@ public: std::shared_ptr config() const override { return _config; } - std::unique_ptr newContext(const ir::Graph &graph, - const std::shared_ptr &, - bool is_linear_executor) const override + std::unique_ptr + newContext(const ir::Graph &graph, const std::shared_ptr &, + bool is_linear_executor) const override { const auto &operands = graph.operands(); const auto &operations = graph.operations(); - auto context = std::make_unique(this, &graph); + auto context = std::make_unique(this, &graph); auto tm = createTensorManager(is_linear_executor); auto tr = std::make_shared>(tm); - auto tb = std::make_shared(operands, tm, tr); + auto tb = std::make_shared(operands, tm); context->tensor_registry = tr; context->tensor_builder = tb; context->constant_initializer = std::make_shared(operands, tr); context->kernel_gen = std::make_shared(operands, operations, tb, tr); - context->tensor_register = nullptr; context->optimizer = std::make_shared(context.get()); return context; } diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc new file mode 100644 index 0000000..8b53171 --- /dev/null +++ b/runtime/onert/backend/acl_neon/BackendContext.cc @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "Optimizer.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" + +namespace onert +{ +namespace backend +{ +namespace acl_neon +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +void BackendContext::planTensors(const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info) +{ + ir::OperandIndexMap uses_map; + ir::OperandIndexMap def_map; + ir::OperandIndexSequence constants; + + // Prepare scanning + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + const auto &li = lower_info.operand.at(ind); + if (li->def_factors().getOnlyElement().backend() != backend()) + continue; + + // Ignore unused tensor + if (li->def_factors().size() == 0 && li->use_factors().size() == 0) + { + VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + auto factor = li->def_factors().getOnlyElement(); + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + const auto info = obj.info(); + const auto backend_layout = factor.layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl; + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + for (const auto &op_idx : op_seq.operations()) + { + auto &op = graph()->operations().at(op_idx); + auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph()->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(lower_info.operand.at(ind)->def_factors().size() == 1 && + lower_info.operand.at(ind)->use_factors().size() == 1); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + } + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair it) { return it.second == 0; })); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + optimizer->optimize(); + + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (const auto op_ind : op_seq) + { + bool op_assigned = [&]() { + for (auto &op_info : operation_list()) + if (op_info.index == op_ind) + return true; + return false; + }(); + if (!op_assigned) + continue; + + const auto &op = graph()->operations().at(op_ind); + for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED) + { + if (!tensor_builder->isRegistered(index) && !model_io.contains(index) && + find(operand_list().begin(), operand_list().end(), index) != operand_list().end()) + { + const auto &operand_lower_info = + lower_info.operand.at(index)->def_factors().getOnlyElement(); + + // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) + // op.getOutputs() of permute (CPU) returns tensor A + // but tensor A belongs to the backend of acl_cl. + // So, we have to make this tensor NOT registered for CPU. + if (operand_lower_info.backend() != backend()) + continue; + + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = op_seq.getLayout(); + const auto backend_layout = operand_lower_info.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + } + } + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + planTensors(order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + tensor_builder->allocate(); + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { + ifunc.prepare(); + tensor_builder->postFunctionPrepare(); + }); + } + + return ret; +} + +} // namespace neon +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h new file mode 100644 index 0000000..dd764c0 --- /dev/null +++ b/runtime/onert/backend/acl_neon/BackendContext.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ + +#include +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +namespace onert +{ +namespace backend +{ +namespace acl_neon +{ + +class Optimizer; + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr tensor_registry = nullptr, + std::shared_ptr tensor_builder = nullptr, + std::shared_ptr constant_initializer = nullptr, + std::shared_ptr kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen} + { + } + + ITensorRegistry *genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) override; + +private: + void initConsts(); + void planTensors(const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr tensor_builder; + std::shared_ptr constant_initializer; + std::shared_ptr kernel_gen; + std::shared_ptr optimizer; +}; + +} // namespace acl_neon +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h index c7d71cd..9723ba0 100644 --- a/runtime/onert/backend/acl_neon/ConstantInitializer.h +++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ #include "AclConstantInitializer.h" @@ -41,4 +41,4 @@ public: } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc index ffaee3b..e712dfa 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.cc +++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc @@ -48,7 +48,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr &tensor_builder, const std::shared_ptr> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -61,7 +61,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq = std::make_unique(); _return_fn_seq->enableDynamicShapeInferer(false); - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -70,17 +70,17 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) } } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); - auto frontend_layout = _current_op_seq_layout; + auto frontend_layout = _current_layout; auto backend_layout = ifm_tensor->layout(); int axis_value = _ctx.at(axis_index).asScalar(); @@ -91,10 +91,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) assert(axis_value >= 0 && axis_value < ifm_rank); const auto fixed_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); + auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX + : ::arm_compute::ReductionOperation::ARG_IDX_MIN; auto fn = acl_common::generateLayer( - ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), - arm_compute::ReductionOperation::ARG_IDX_MAX); + ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type); _return_fn = asAclFunction(std::move(fn)); } @@ -106,6 +107,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + const auto NNApiInputs = 2; + if (node.getInputs().size() != NNApiInputs) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + if (!_ctx.at(crops_index).isConstant()) + { + throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND"); + } + + auto crops = _ctx.at(crops_index).asVector(); + for (auto crop : crops) + { + if (crop != 0) + { + throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND"); + } + } + } + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index); auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index); @@ -178,8 +198,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -232,8 +252,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -297,7 +317,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) else { const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); @@ -495,7 +515,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node) auto fn = acl_common::kernelGenFullyConnected( - node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_layout); _return_fn = std::make_unique( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } @@ -552,7 +572,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. assert(backend_layout == ifm_tensor->layout()); assert(backend_layout == indices_tensor->layout()); - assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); + assert(ifm_rank < 4 || _current_layout == backend_layout); // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; @@ -686,7 +706,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : input_indexes) inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout(); if (axis < 0) @@ -738,7 +758,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) { const int32_t *from = reinterpret_cast(pad_base) + (n * 2); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); const auto axis = acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); @@ -762,8 +782,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) void KernelGenerator::visit(const ir::operation::Pool2D &node) { auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_reg, _current_op_seq_layout, - acl_common::convertPoolType(node.param().op_type)); + node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type)); const auto ofm_index{node.getOutputs().at(0)}; auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index); @@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); const auto input_rank = _ctx.at(input_index).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = input_tensor->layout(); const auto reduce_axes = acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout); @@ -873,7 +892,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) || frontend_layout == backend_layout); @@ -1047,7 +1066,7 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &ofm_ind : output_indexes) output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); auto axis = _ctx.at(axis_index).asScalar(); if (axis < 0) @@ -1085,7 +1104,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -1150,7 +1169,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) auto outputData_tensor = _tensor_reg->getAclTensor(output_index); auto inputData_tensor = _tensor_reg->getAclTensor(input_index); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = inputData_tensor->layout(); // Set initializers for indices data such as order of inputData @@ -1244,9 +1263,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)}; const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)}; - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout); const auto stride = node.param().stride; @@ -1285,7 +1304,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx); const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = ifm_tensor->layout(); const auto rank = _ctx.at(ifm_idx).shape().rank(); @@ -1340,7 +1359,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout(); if (axis < 0) axis += input_rank; @@ -1413,7 +1432,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx); const size_t output_rank = _ctx.at(out_idx).shape().rank(); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto backend_layout = output_tensor->layout(); int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h index 4d269cd..2a4b307 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.h +++ b/runtime/onert/backend/acl_neon/KernelGenerator.h @@ -17,7 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__ -#include +#include #include "ir/Operands.h" #include "TensorBuilder.h" @@ -31,7 +31,7 @@ namespace backend namespace acl_neon { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -39,17 +39,20 @@ public: const std::shared_ptr> &_tensor_reg); void visit(const ir::OpSequence &) override; - void visit(const ir::operation::ArgMax &) override; + + void visit(const ir::operation::ArgMinMax &) override; void visit(const ir::operation::BatchToSpaceND &) override; void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::EmbeddingLookup &) override; + void visit(const ir::operation::ExpandDims &) override; void visit(const ir::operation::FullyConnected &) override; void visit(const ir::operation::Gather &) override; void visit(const ir::operation::HashtableLookup &) override; @@ -57,36 +60,34 @@ public: void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::LocalResponseNormalization &) override; void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::OneHot &) override; void visit(const ir::operation::Pack &) override; void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Permute &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::PReLU &) override; void visit(const ir::operation::Reduce &) override; void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::ResizeBilinear &) override; void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::Slice &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; void visit(const ir::operation::Split &) override; void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Squeeze &) override; void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Unpack &) override; - void visit(const ir::operation::ExpandDims &) override; - void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::OneHot &) override; private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr _tensor_builder; std::shared_ptr> _tensor_reg; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; }; } // namespace acl_neon diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h index 5fe0d51..b8fb343 100644 --- a/runtime/onert/backend/acl_neon/Optimizer.h +++ b/runtime/onert/backend/acl_neon/Optimizer.h @@ -17,8 +17,7 @@ #ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__ #define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__ -#include -#include +#include "BackendContext.h" #include "TensorBuilder.h" namespace onert @@ -28,12 +27,12 @@ namespace backend namespace acl_neon { -class Optimizer : public IOptimizer +class Optimizer { public: Optimizer(BackendContext *context); - void optimize() override; + void optimize(); private: BackendContext *_context; diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc index f490d13..6535fb2 100644 --- a/runtime/onert/backend/acl_neon/acl_neon.cc +++ b/runtime/onert/backend/acl_neon/acl_neon.cc @@ -14,20 +14,11 @@ * limitations under the License. */ -#include - #include "Backend.h" extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'acl_neon' loaded\n"; - return new onert::backend::acl_neon::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h index fc8574b..0b416a7 100644 --- a/runtime/onert/backend/cpu/Backend.h +++ b/runtime/onert/backend/cpu/Backend.h @@ -54,8 +54,6 @@ public: context->constant_initializer = std::make_shared(operands, tr); context->kernel_gen = std::make_shared(operands, operations, tb, tr, kb, context->external_context()); - context->tensor_register = nullptr; - context->optimizer = nullptr; return context; } diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc new file mode 100644 index 0000000..6b958c1 --- /dev/null +++ b/runtime/onert/backend/cpu/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace cpu +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h index e90b210..0a4106d 100644 --- a/runtime/onert/backend/cpu/BackendContext.h +++ b/runtime/onert/backend/cpu/BackendContext.h @@ -18,6 +18,9 @@ #define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__ #include +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" #include "ExternalContext.h" namespace onert @@ -32,21 +35,35 @@ class BackendContext : public onert::backend::BackendContext public: BackendContext(const Backend *backend, const ir::Graph *graph, std::shared_ptr tensor_registry = nullptr, - std::shared_ptr tensor_builder = nullptr, - std::shared_ptr constant_initializer = nullptr, - std::shared_ptr kernel_gen = nullptr, - std::shared_ptr tensor_register = nullptr, - std::shared_ptr optimizer = nullptr) - : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder, - constant_initializer, kernel_gen, tensor_register, - optimizer), - _external_context(new ExternalContext) + std::shared_ptr tensor_builder = nullptr, + std::shared_ptr constant_initializer = nullptr, + std::shared_ptr kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(new ExternalContext) { } + ITensorRegistry *genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + FunctionMap genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) override; + std::shared_ptr external_context() { return _external_context; } private: + void initConsts(); + void planTensors(const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr tensor_builder; + std::shared_ptr constant_initializer; + std::shared_ptr kernel_gen; + +private: // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, // the thread pool is also created in duplicate // TODO Create one ruy context for session diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h index c016c83..d7858c0 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.h +++ b/runtime/onert/backend/cpu/ConstantInitializer.h @@ -14,13 +14,10 @@ * limitations under the License. */ -#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ -#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ -#include "backend/cpu_common/TensorRegistry.h" - -#include -#include +#include namespace onert { @@ -29,35 +26,10 @@ namespace backend namespace cpu { -class ConstantInitializer : public IConstantInitializer -{ -public: - ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr &tensor_reg); - -public: - void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override; - - // TODO: For now the only cpu backend supports constant tensor to use data from external - // If the other backend supports (to do this, - // ExternalTensor should be abstract such as IExternal, maybe), - // this can be an interface of IConstantInitializer - void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &); - -public: - void visit(const ir::operation::Conv2D &) override; - void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::FullyConnected &) override; - -private: - std::shared_ptr tensor_registry() const override { return _tensor_reg; } - -private: - std::shared_ptr _tensor_reg; -}; +using ConstantInitializer = cpu_common::ConstantInitializer; } // namespace cpu } // namespace backend } // namespace onert -#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h index 32e249f..f5d11f4 100644 --- a/runtime/onert/backend/cpu/ExternalContext.h +++ b/runtime/onert/backend/cpu/ExternalContext.h @@ -17,7 +17,6 @@ #ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__ #define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__ -#include #include #include @@ -33,7 +32,7 @@ namespace backend namespace cpu { -class ExternalContext : public IExternalContext +class ExternalContext { public: ExternalContext() : _ruy_context(new ruy::Context) diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index 451815b..25756ec 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -23,6 +23,7 @@ #include "ops/CompareLayer.h" #include "ops/ConcatLayer.h" #include "ops/ConvolutionLayer.h" +#include "ops/DepthToSpaceLayer.h" #include "ops/DepthwiseConvolutionLayer.h" #include "ops/EinsumLayer.h" #include "ops/ElementwiseActivationLayer.h" @@ -108,12 +109,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type { switch (type_ir) { + case ir::operation::ElementwiseActivation::Type::ELU: + return ops::ElementwiseActivationType::kElu; case ir::operation::ElementwiseActivation::Type::LOGISTIC: return ops::ElementwiseActivationType::kLogistic; case ir::operation::ElementwiseActivation::Type::RELU: return ops::ElementwiseActivationType::kReLU; case ir::operation::ElementwiseActivation::Type::TANH: return ops::ElementwiseActivationType::kTanh; + case ir::operation::ElementwiseActivation::Type::LEAKY_RELU: + return ops::ElementwiseActivationType::kLeakyReLU; default: throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); } @@ -124,6 +129,8 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary { switch (type_ir) { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND: + return ops::ElementwiseBinaryType::kLogicalAnd; case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: return ops::ElementwiseBinaryType::kLogicalOr; case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: @@ -167,6 +174,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise return ops::ElementwiseUnaryType::kRSqrt; case ir::operation::ElementwiseUnary::Type::SIN: return ops::ElementwiseUnaryType::kSin; + case ir::operation::ElementwiseUnary::Type::SQRT: + return ops::ElementwiseUnaryType::kSqrt; + case ir::operation::ElementwiseUnary::Type::SQUARE: + return ops::ElementwiseUnaryType::kSquare; case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE: return ops::ElementwiseUnaryType::kZerosLike; default: @@ -217,7 +228,7 @@ KernelGenerator::KernelGenerator( const std::shared_ptr &external_context) : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), - _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context) + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) { // DO NOTHING } @@ -260,7 +271,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); } - _current_op_seq_layout = op_seq.getLayout(); + _current_layout = op_seq.getLayout(); for (const auto &operation_idx : op_seq.operations()) { const auto &node = _operations_ctx.at(operation_idx); @@ -314,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) _return_fn = std::move(fn); return; } - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -342,8 +353,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); // Kernel format is [1, kernel_height, kernel_width, depth_out]. const auto &ker_shape = _ctx.at(ker_index).shape(); const auto ker_height = ker_shape.dim(1); @@ -364,7 +375,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width, - dilation_height, activation, ofm_tensor); + dilation_height, activation, ofm_tensor, _external_context); _return_fn = std::move(fn); } @@ -374,7 +385,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node) const auto ofm_index{node.getOutputs().at(0)}; const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); auto output_tensor = _tensor_reg->getPortableTensor(ofm_index); @@ -418,16 +429,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) void KernelGenerator::visit(const ir::operation::Fill &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)}; + // SHAPE input is used for shape inference const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)}; auto output_tensor = _tensor_reg->getPortableTensor(output_index); - auto input_tensor = _tensor_reg->getPortableTensor(input_index); auto value_tensor = _tensor_reg->getPortableTensor(value_index); auto fn = std::make_unique(); - fn->configure(input_tensor, value_tensor, output_tensor); + fn->configure(value_tensor, output_tensor); _return_fn = std::move(fn); } @@ -576,7 +586,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) assert(backend_layout == indices_tensor->layout()); const auto &input_shape = _ctx.at(input_index).shape(); UNUSED_RELEASE(input_shape); - assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout); + assert(input_shape.rank() < 4 || _current_layout == backend_layout); const auto axis_raw = node.param().axis; const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw); @@ -640,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node) for (auto &idx : opSeq) { const auto &operand = _ctx.at(idx); - // TODO make sure using `_current_op_seq_layout` is correct for custom operations + // TODO make sure using `_current_layout` is correct for custom operations types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()}); auto in_tensor = _tensor_reg->getPortableTensor(idx); tensors.emplace_back(in_tensor); @@ -713,15 +723,14 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node) { const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; + // AXIS input is used for output shape inference auto output_tensor = _tensor_reg->getPortableTensor(output_index); auto input_tensor = _tensor_reg->getPortableTensor(input_index); - auto axis_tensor = _tensor_reg->getPortableTensor(axis_index); auto fn = std::make_unique(); - fn->configure(input_tensor, axis_tensor, output_tensor); + fn->configure(input_tensor, output_tensor); _return_fn = std::move(fn); } @@ -731,7 +740,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) const auto ofm_index{node.getOutputs().at(0)}; const auto rank = _ctx.at(ofm_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); assert(-rank <= axis && axis < rank); @@ -753,7 +762,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) const auto input_index{node.getInputs().at(0)}; const auto rank = _ctx.at(input_index).shape().rank(); - const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); + const auto axis = ops::getAxis(rank, node.param().axis, _current_layout); assert(rank == 0 || (-rank <= axis && axis < rank)); @@ -1004,11 +1013,11 @@ void KernelGenerator::visit(const ir::operation::Reverse &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::ArgMax &node) +void KernelGenerator::visit(const ir::operation::ArgMinMax &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)}; + const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)}; auto output_tensor = _tensor_reg->getPortableTensor(output_index); auto input_tensor = _tensor_reg->getPortableTensor(input_index); @@ -1016,7 +1025,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto fn = std::make_unique(); - fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true); + fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max); _return_fn = std::move(fn); } @@ -1029,8 +1038,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node) const auto kh = node.param().kh; const auto kw = node.param().kw; const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); const auto activation = node.param().activation; @@ -1255,6 +1264,21 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) _return_fn = std::move(fn); } +void KernelGenerator::visit(const ir::operation::DepthToSpace &node) +{ + const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + auto block_size = node.param().block_size; + + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + + auto fn = std::make_unique(); + + fn->configure(input_tensor, block_size, output_tensor); + _return_fn = std::move(fn); +} + void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) { const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h index 5df7760..3a4cfbf 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.h +++ b/runtime/onert/backend/cpu/KernelGenerator.h @@ -23,7 +23,7 @@ #include "Tensor.h" #include -#include +#include #include #include @@ -34,7 +34,7 @@ namespace backend namespace cpu { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, @@ -43,59 +43,59 @@ public: const std::shared_ptr &kernel_builder, const std::shared_ptr &external_context); - using IKernelGenerator::visit; + void visit(const ir::OpSequence &) override; void visit(const ir::operation::AddN &) override; - void visit(const ir::OpSequence &) override; + void visit(const ir::operation::ArgMinMax &) override; + void visit(const ir::operation::BatchMatMul &) override; + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::BinaryArithmetic &) override; + void visit(const ir::operation::BroadcastTo &) override; + void visit(const ir::operation::Comparison &) override; + void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::Custom &node) override; + void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Concat &) override; - void visit(const ir::operation::Fill &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Reshape &) override; - void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Softmax &) override; - void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Einsum &) override; - void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::Custom &node) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::ExpandDims &) override; + void visit(const ir::operation::Fill &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::FusedBatchNorm &) override; + void visit(const ir::operation::Gather &) override; + void visit(const ir::operation::L2Normalization &) override; + void visit(const ir::operation::LogSoftmax &) override; void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Pack &) override; - void visit(const ir::operation::Unpack &) override; + void visit(const ir::operation::MatrixBandPart &) override; void visit(const ir::operation::OneHot &) override; - void visit(const ir::operation::Transpose &) override; - void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::Select &) override; - void visit(const ir::operation::Slice &) override; - void visit(const ir::operation::StridedSlice &) override; - void visit(const ir::operation::Split &) override; - void visit(const ir::operation::Shape &) override; - void visit(const ir::operation::ResizeBilinear &node) override; - void visit(const ir::operation::Reverse &) override; - void visit(const ir::operation::ArgMax &) override; + void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Pad &) override; void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Pow &) override; - void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::Tile &) override; - void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::Range &) override; void visit(const ir::operation::Rank &) override; - void visit(const ir::operation::MatrixBandPart &) override; - void visit(const ir::operation::BatchMatMul &) override; - void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::BroadcastTo &) override; - void visit(const ir::operation::FusedBatchNorm &) override; - void visit(const ir::operation::LogSoftmax &) override; + void visit(const ir::operation::Reduce &) override; + void visit(const ir::operation::Reshape &) override; + void visit(const ir::operation::ResizeBilinear &node) override; + void visit(const ir::operation::Reverse &) override; + void visit(const ir::operation::Select &) override; + void visit(const ir::operation::Shape &) override; + void visit(const ir::operation::Slice &) override; + void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; - void visit(const ir::operation::StatelessRandomUniform &) override; + void visit(const ir::operation::Split &) override; void visit(const ir::operation::SplitV &) override; + void visit(const ir::operation::SquaredDifference &) override; + void visit(const ir::operation::Squeeze &) override; + void visit(const ir::operation::StatelessRandomUniform &) override; + void visit(const ir::operation::StridedSlice &) override; + void visit(const ir::operation::Tile &) override; + void visit(const ir::operation::Transpose &) override; + void visit(const ir::operation::Unpack &) override; private: const ir::Operands &_ctx; @@ -103,7 +103,7 @@ private: std::shared_ptr _tensor_builder; std::shared_ptr _tensor_reg; std::shared_ptr _kernel_builder; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; const std::shared_ptr _external_context; }; diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc deleted file mode 100644 index 3edac89..0000000 --- a/runtime/onert/backend/cpu/StaticTensorManager.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "StaticTensorManager.h" -#include "Tensor.h" - -#include - -namespace onert -{ -namespace backend -{ -namespace cpu -{ - -StaticTensorManager::StaticTensorManager(const std::shared_ptr ®, - cpu_common::DynamicTensorManager *dynamic_tensor_manager) - : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg}, - _dynamic_tensor_manager{dynamic_tensor_manager} -{ - // DO NOTHING -} - -void StaticTensorManager::allocateNonconsts(void) -{ - _nonconst_mgr->allocate(); - - for (auto &pair : _tensors->native_tensors()) - { - const auto &ind = pair.first; - auto tensor = pair.second.get(); - if (!_as_constants[ind] && !tensor->is_dynamic()) - { - auto *buffer = _nonconst_mgr->getBuffer(ind); - tensor->setBuffer(buffer); - - VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value() - << "): " << static_cast(buffer) << std::endl; - } - } -} - -void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } - -void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, - const ir::OperandInfo &tensor_info, ir::Layout backend_layout, - bool as_const) -{ - assert(!_tensors->getITensor(ind)); - if (as_const) - { - auto tensor = std::make_unique(tensor_info, backend_layout); - _tensors->setNativeTensor(ind, std::move(tensor)); - } - else - { - auto tensor = std::make_unique(tensor_info, backend_layout, - _dynamic_tensor_manager->dynamic_mem_mgr().get()); - _tensors->setNativeTensor(ind, std::move(tensor)); - } - _as_constants[ind] = as_const; -} - -void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) -{ - assert(_tensors->getITensor(ind)); - - // This method is called only when a tensor has proper shape - assert(!_tensors->getITensor(ind)->is_dynamic()); - - if (!_as_constants[ind]) - _nonconst_mgr->claimPlan(ind, size); -} - -void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) -{ - assert(_tensors->getITensor(ind)); - - // This method is called only when a tensor has proper shape - assert(!_tensors->getITensor(ind)->is_dynamic()); - - if (!_as_constants[ind]) - _nonconst_mgr->releasePlan(ind); -} - -void StaticTensorManager::iterate(const std::function &fn) -{ - for (const auto &it : _tensors->native_tensors()) - fn(it.first); -} - -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h index 2af61e4..d07f0c8 100644 --- a/runtime/onert/backend/cpu/StaticTensorManager.h +++ b/runtime/onert/backend/cpu/StaticTensorManager.h @@ -17,13 +17,7 @@ #ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__ #define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__ -#include "backend/IStaticTensorManager.h" -#include "backend/cpu_common/DynamicTensorManager.h" -#include "backend/cpu_common/MemoryManager.h" -#include "backend/cpu_common/TensorRegistry.h" -#include "backend/ITensorManager.h" -#include "ir/OperandIndexMap.h" -#include "ir/OperandInfo.h" +#include "backend/cpu_common/StaticTensorManager.h" namespace onert { @@ -32,30 +26,7 @@ namespace backend namespace cpu { -class StaticTensorManager : public backend::IStaticTensorManager -{ -public: - StaticTensorManager(const std::shared_ptr ®, - cpu_common::DynamicTensorManager *dynamic_tensor_manager); - virtual ~StaticTensorManager() = default; - - void allocateNonconsts(void); - void deallocateNonconsts(void); - - void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, - ir::Layout backend_layout, bool as_const); - - void claimPlan(const ir::OperandIndex &ind, uint32_t size); - void releasePlan(const ir::OperandIndex &ind); - - void iterate(const std::function &fn); - -private: - std::unique_ptr _nonconst_mgr; - const std::shared_ptr _tensors; - ir::OperandIndexMap _as_constants; - cpu_common::DynamicTensorManager *_dynamic_tensor_manager; -}; +using StaticTensorManager = cpu_common::StaticTensorManager; } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h index 2ad2ad0..d663c3f 100644 --- a/runtime/onert/backend/cpu/Tensor.h +++ b/runtime/onert/backend/cpu/Tensor.h @@ -28,92 +28,7 @@ namespace cpu { using Tensor = cpu_common::Tensor; - -/** - * @brief Class that uses data from external memory that is not managed by a backend - * instead of allocating and copying the data. ExternalTensor's data pointer points to - * an address of memory such as where memory is already allocated, or mmapped area. - * This is meaning that ExternalTensor can take all of types' ir::Data. - * To support this, assume below things no padding, always NHWC layout, - * constant tensor and not dynamic. - */ -class ExternalTensor : public Tensor -{ -public: - ExternalTensor() = delete; - virtual ~ExternalTensor(); - -public: - ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) - : Tensor(info, layout, nullptr) - { - assert(_layout == ir::Layout::NHWC); - assert(_info.isConstant()); - assert(_info.isDynamic() == false); - } - -public: - /** - * @brief set Data to be shared from external so that this ExternalTensor will not be - * allocated on CPU backend - * @param[in] data data of Operand to be set - */ - void setData(const std::shared_ptr data) - { - assert(data != nullptr); - _data = data; - // Note. Some op such as cker::Conv could take buffer as nullptr. - // That's why _buffer also would be used - _buffer = const_cast(_data->base()); - } - -public: - uint8_t *buffer() const override { return _buffer; } - - bool is_constant() const override { return true; } - bool is_dynamic() const override { return false; } - void set_dynamic() override - { - throw std::runtime_error("This tensor does not support changing dynamic"); - } - - void setShape(const ir::Shape &) override - { - throw std::runtime_error("This tensor does not support changing shape"); - } - - void increase_ref() override { ++_num_references; } - - void decrease_ref() override - { - assert(_data != nullptr); - assert(_num_references > 0); - --_num_references; - if (_num_references == 0) - { - _data.reset(); - _buffer = nullptr; - } - } - - /** - * @brief Reset reference count to zero and release data - */ - void reset_ref() override - { - assert(_data != nullptr); - assert(_num_references > 0); - _num_references = 0; - - _data.reset(); - _buffer = nullptr; - } - - int32_t num_references() override { return _num_references; } - -private: - std::shared_ptr _data; -}; +using ExternalTensor = cpu_common::ExternalTensor; } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h index 448abc2..9d8a5de 100644 --- a/runtime/onert/backend/cpu/TensorBuilder.h +++ b/runtime/onert/backend/cpu/TensorBuilder.h @@ -20,7 +20,6 @@ #include #include -#include #include #include "StaticTensorManager.h" @@ -35,7 +34,7 @@ namespace backend namespace cpu { -class TensorBuilder : public ITensorBuilder +class TensorBuilder { public: TensorBuilder(const std::shared_ptr &tensor_reg); @@ -47,18 +46,18 @@ public: * @param[in] layout Operand data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override { /* DO NOTHING */} + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} - IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); } + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } private: const std::shared_ptr _tensor_reg; diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc index 5385bb2..55538e2 100644 --- a/runtime/onert/backend/cpu/cpu.cc +++ b/runtime/onert/backend/cpu/cpu.cc @@ -16,18 +16,9 @@ #include "Backend.h" -#include - extern "C" { -onert::backend::Backend *onert_backend_create() -{ - VERBOSE(onert_backend_create) << "'cpu' loaded\n"; - return new onert::backend::cpu::Backend; -} -void onert_backend_destroy(onert::backend::Backend *backend) -{ - VERBOSE(onert_backend_create) << "'cpu' unloaded\n"; - delete backend; -} +onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } } diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc index 2fd284c..d5ffdef 100644 --- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc +++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc @@ -79,6 +79,9 @@ void ArgMinMaxLayer::run() case ir::DataType::UINT8: TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t); break; + case ir::DataType::QUANT_INT8_ASYMM: + TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t); + break; case ir::DataType::INT32: TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t); break; @@ -97,6 +100,9 @@ void ArgMinMaxLayer::run() case ir::DataType::UINT8: TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t); break; + case ir::DataType::QUANT_INT8_ASYMM: + TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t); + break; case ir::DataType::INT32: TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t); break; diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc index 7ef0237..ba96559 100644 --- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc +++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc @@ -67,7 +67,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens void BatchMatMulLayer::run() { - if (_lhs->data_type() == OperandType::FLOAT32) + if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32)) { batchMatMulFloat32(); } diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc index d26ed73..edfdfc1 100644 --- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc @@ -117,24 +117,26 @@ void ConcatLayer::configure(const std::vector &inputs, void ConcatLayer::run() { - if (_output->data_type() == OperandType::FLOAT32) + switch (_output->data_type()) { - concatenationGeneral(); + case OperandType::FLOAT32: + concatenationGeneral(); + break; + case OperandType::QUANT_UINT8_ASYMM: + concatenationQuant8(); + break; + case OperandType::QUANT_INT8_ASYMM: + concatenationGeneral(); + break; + case OperandType::INT32: + concatenationGeneral(); + break; + case OperandType::INT64: + concatenationGeneral(); + break; + default: + throw std::runtime_error("Concat: unsupported data type"); } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - concatenationQuant8(); - } - else if (_output->data_type() == OperandType::INT32) - { - concatenationGeneral(); - } - else if (_output->data_type() == OperandType::INT64) - { - concatenationGeneral(); - } - else - throw std::runtime_error("Concat: unsupported data type"); } } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc index 799e9e2..c964e38 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc @@ -203,8 +203,6 @@ void ConvolutionLayer::prepare() _prepare = true; } -#undef ANDROID_NN_CONV_PARAMETERS - } // namespace ops } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc new file mode 100644 index 0000000..d265d0a --- /dev/null +++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthToSpaceLayer.h" + +#include "OperationUtils.h" + +#include + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ +DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr) +{ + // DO NOTHING +} + +template void DepthToSpaceLayer::depthToSpace() +{ + nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast(_input->buffer()), + getTensorShape(_output), reinterpret_cast(_output->buffer()), + _block_size); +} + +void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size, + IPortableTensor *output) +{ + _input = input; + _block_size = block_size; + _output = output; +} + +void DepthToSpaceLayer::run() +{ + switch (_input->data_type()) + { + case OperandType::FLOAT32: + depthToSpace(); + break; + case OperandType::INT32: + depthToSpace(); + break; + case OperandType::INT64: + depthToSpace(); + break; + case OperandType::QUANT_UINT8_ASYMM: + depthToSpace(); + break; + case OperandType::QUANT_INT8_ASYMM: + depthToSpace(); + break; + default: + throw std::runtime_error{"DepthToSpace: unsupported data type"}; + } +} + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h new file mode 100644 index 0000000..32e0171 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in riting, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ + +#include + +#include + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ +class DepthToSpaceLayer : public ::onert::exec::IFunction +{ +public: + DepthToSpaceLayer(); + + void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output); + + void run() override; + +private: + template void depthToSpace(); + + const IPortableTensor *_input; + int32_t _block_size; + IPortableTensor *_output; +}; + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc index f1dc110..85553d1 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc @@ -43,11 +43,12 @@ void DepthwiseConvolutionLayer::convFloat32() op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; - nnfw::cker::DepthwiseConv( + nnfw::cker::DepthwiseConv( op_params, getTensorShape(_input), reinterpret_cast(_input->buffer()), getTensorShape(_kernel), reinterpret_cast(_kernel->buffer()), getTensorShape(_bias), reinterpret_cast(_bias->buffer()), - getTensorShape(_output), reinterpret_cast(_output->buffer())); + getTensorShape(_output), reinterpret_cast(_output->buffer()), + _external_context->ruy_context()); } void DepthwiseConvolutionLayer::convQuant8() @@ -79,11 +80,12 @@ void DepthwiseConvolutionLayer::convQuant8() op_params.quantized_activation_min = output_activation_min; op_params.quantized_activation_max = output_activation_max; - nnfw::cker::DepthwiseConv( + nnfw::cker::DepthwiseConv( op_params, getTensorShape(_input), reinterpret_cast(_input->buffer()), getTensorShape(_kernel), reinterpret_cast(_kernel->buffer()), getTensorShape(_bias), reinterpret_cast(_bias->buffer()), - getTensorShape(_output), reinterpret_cast(_output->buffer())); + getTensorShape(_output), reinterpret_cast(_output->buffer()), + _external_context->ruy_context()); } void DepthwiseConvolutionLayer::configure( @@ -91,7 +93,8 @@ void DepthwiseConvolutionLayer::configure( const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, - const ir::Activation activation, IPortableTensor *output) + const ir::Activation activation, IPortableTensor *output, + const std::shared_ptr &external_context) { _input = input; _kernel = kernel; @@ -107,6 +110,7 @@ void DepthwiseConvolutionLayer::configure( _dilationHeight = dilationHeight; _activation = activation; _output = output; + _external_context = external_context; } void DepthwiseConvolutionLayer::run() diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h index fb032ec..fe1fcc1 100644 --- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h @@ -19,6 +19,7 @@ #include #include "OperationUtils.h" +#include "../ExternalContext.h" #include @@ -47,7 +48,7 @@ public: const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight, const ir::Activation activation, - IPortableTensor *output); + IPortableTensor *output, const std::shared_ptr &external_context); void run() override; @@ -71,6 +72,8 @@ private: uint32_t _dilationHeight{1}; ir::Activation _activation{ir::Activation::NONE}; + + std::shared_ptr _external_context; }; } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc index c1d6317..3e1da5e 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc @@ -18,6 +18,8 @@ #include "OperationUtils.h" +#include +#include #include #include #include @@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab switch (op_type) { + case ElementwiseActivationType::kElu: + if (input->data_type() == OperandType::FLOAT32) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::ELU(getTensorShape(input), reinterpret_cast(input->buffer()), + getTensorShape(output), reinterpret_cast(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"}; + } + break; case ElementwiseActivationType::kLogistic: if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { @@ -160,6 +175,21 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"}; } break; + case ElementwiseActivationType::kLeakyReLU: + if (_input->data_type() == OperandType::FLOAT32) + { + _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input), + reinterpret_cast(input->buffer()), + getTensorShape(output), + reinterpret_cast(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"}; + } + break; default: throw std::runtime_error("ElementwiseActivationLayer: unsupported op type"); } diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h index 3ef5800..948ab3b 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h @@ -32,9 +32,11 @@ namespace ops enum class ElementwiseActivationType { + kElu, kLogistic, kReLU, - kTanh + kTanh, + kLeakyReLU }; class ElementwiseActivationLayer : public ::onert::exec::IFunction diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc index ea3c1e7..1e17a08 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc @@ -18,6 +18,7 @@ #include "OperationUtils.h" +#include #include #include @@ -33,6 +34,25 @@ namespace ops namespace { template +void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output) +{ + if (!HaveSameShapes(lhs, rhs)) + { + nnfw::cker::LogicalAndBroadcast( + getTensorShape(lhs), reinterpret_cast(lhs->buffer()), getTensorShape(rhs), + reinterpret_cast(rhs->buffer()), getTensorShape(output), + reinterpret_cast(output->buffer())); + } + else + { + nnfw::cker::LogicalAndElementwise( + getTensorShape(lhs), reinterpret_cast(lhs->buffer()), + reinterpret_cast(rhs->buffer()), reinterpret_cast(output->buffer())); + } +} + +template void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output) { @@ -88,6 +108,16 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab switch (op_type) { + case ElementwiseBinaryType::kLogicalAnd: + if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) + { + _kernel = logicalAndGeneric; + } + else + { + throw std::runtime_error{"LogicalOr: Unsupported data type"}; + } + break; case ElementwiseBinaryType::kLogicalOr: if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) { diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc index 066455e..15d7f30 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc @@ -195,6 +195,18 @@ void sinFloat32(const IPortableTensor *input, IPortableTensor *output) getTensorShape(output), reinterpret_cast(output->buffer())); } +void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast(input->buffer()), + getTensorShape(output), reinterpret_cast(output->buffer())); +} + +void squareFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Square(getTensorShape(input), reinterpret_cast(input->buffer()), + getTensorShape(output), reinterpret_cast(output->buffer())); +} + template void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output) { if (!HaveSameShapes(input, output)) @@ -363,6 +375,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen throw std::runtime_error{"Sin: Unsupported data type"}; } break; + case ElementwiseUnaryType::kSqrt: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = sqrtFloat32; + } + else + { + throw std::runtime_error{"Sqrt: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kSquare: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = squareFloat32; + } + else + { + throw std::runtime_error{"Square: Unsupported data type"}; + } + break; case ElementwiseUnaryType::kZerosLike: if (input->data_type() == OperandType::FLOAT32) { diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h index c1765b5..54a6fc0 100644 --- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h @@ -46,6 +46,8 @@ enum class ElementwiseUnaryType kRound, kRSqrt, kSin, + kSqrt, + kSquare, kZerosLike }; diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc index b545e67..5ea0ea8 100644 --- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc +++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc @@ -25,22 +25,19 @@ namespace cpu namespace ops { -ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr) +ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr) { // DO NOTHING } -void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis, - IPortableTensor *output) +void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output) { _input = input; - _axis = axis; _output = output; } void ExpandDimsLayer::run() { - // TODO use _axis to calculate shape of output when _axis is not constant size_t count = _input->total_size(); memcpy(_output->buffer(), _input->buffer(), count); } diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h index b5d4938..1b7ead0 100644 --- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h +++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h @@ -36,14 +36,12 @@ public: ExpandDimsLayer(); public: - void configure(const IPortableTensor *input, const IPortableTensor *axis, - IPortableTensor *output); + void configure(const IPortableTensor *input, IPortableTensor *output); void run() override; private: const IPortableTensor *_input; - const IPortableTensor *_axis; IPortableTensor *_output; }; diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc index df3f8b7..5b7c179 100644 --- a/runtime/onert/backend/cpu/ops/FillLayer.cc +++ b/runtime/onert/backend/cpu/ops/FillLayer.cc @@ -29,15 +29,13 @@ namespace cpu namespace ops { -FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr) +FillLayer::FillLayer() : _value(nullptr), _output(nullptr) { // DO NOTHING } -void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value, - IPortableTensor *output) +void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output) { - _input = input; _value = value; _output = output; } @@ -47,28 +45,24 @@ void FillLayer::run() switch (_output->data_type()) { case OperandType::FLOAT32: - nnfw::cker::Fill(getTensorShape(_input), reinterpret_cast(_input->buffer()), - reinterpret_cast(_value->buffer()), + nnfw::cker::Fill(reinterpret_cast(_value->buffer()), getTensorShape(_output), reinterpret_cast(_output->buffer())); break; case OperandType::INT32: - nnfw::cker::Fill(getTensorShape(_input), reinterpret_cast(_input->buffer()), - reinterpret_cast(_value->buffer()), + nnfw::cker::Fill(reinterpret_cast(_value->buffer()), getTensorShape(_output), reinterpret_cast(_output->buffer())); break; case OperandType::INT64: - nnfw::cker::Fill(getTensorShape(_input), reinterpret_cast(_input->buffer()), - reinterpret_cast(_value->buffer()), + nnfw::cker::Fill(reinterpret_cast(_value->buffer()), getTensorShape(_output), reinterpret_cast(_output->buffer())); break; case OperandType::UINT32: - nnfw::cker::Fill( - getTensorShape(_input), reinterpret_cast(_input->buffer()), - reinterpret_cast(_value->buffer()), getTensorShape(_output), - reinterpret_cast(_output->buffer())); + nnfw::cker::Fill(reinterpret_cast(_value->buffer()), + getTensorShape(_output), + reinterpret_cast(_output->buffer())); break; default: throw std::runtime_error{"Fill: unsupported data type"}; diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h index 1f17d6b..ce84365 100644 --- a/runtime/onert/backend/cpu/ops/FillLayer.h +++ b/runtime/onert/backend/cpu/ops/FillLayer.h @@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction public: FillLayer(); - void configure(const IPortableTensor *input, const IPortableTensor *value, - IPortableTensor *output); + void configure(const IPortableTensor *value, IPortableTensor *output); void run() override; private: - const IPortableTensor *_input; const IPortableTensor *_value; IPortableTensor *_output; }; diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc index 4921ac7..f130692 100644 --- a/runtime/onert/backend/cpu/ops/MeanLayer.cc +++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc @@ -36,9 +36,24 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee void MeanLayer::MeanFloat32() { - nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast(_input->buffer()), - getTensorShape(_output), reinterpret_cast(_output->buffer()), - getReducerAxes(_axes)); + const auto inputShape = getTensorShape(_input); + const auto axisVec = getReducerAxes(_axes); + bool axis_is_1_and_2 = + _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 && + ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1)); + + if (axis_is_1_and_2) + { + nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast(_input->buffer()), + getTensorShape(_output), + reinterpret_cast(_output->buffer())); + } + else + { + nnfw::cker::Mean(inputShape, reinterpret_cast(_input->buffer()), + getTensorShape(_output), reinterpret_cast(_output->buffer()), + axisVec); + } } void MeanLayer::MeanQuant8() @@ -57,6 +72,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a _axes = axes; _output = output; _keep_dims = keep_dims; + + if (_input->data_type() != OperandType::FLOAT32 && + _input->data_type() != OperandType::QUANT_UINT8_ASYMM) + throw std::runtime_error{"Mean: unsupported data type"}; } void MeanLayer::run() diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h new file mode 100644 index 0000000..bc8a024 --- /dev/null +++ b/runtime/onert/backend/ruy/Backend.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_BACKEND_H__ +#define __ONERT_BACKEND_RUY_BACKEND_H__ + +#include "BackendContext.h" +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +#include + +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class Backend : public ::onert::backend::Backend +{ +public: + Backend() : _config{std::make_shared()} {} + + std::shared_ptr config() const override { return _config; } + + std::unique_ptr + newContext(const ir::Graph &graph, const std::shared_ptr &kb, + bool) const override + { + const auto &operands = graph.operands(); + const auto &operations = graph.operations(); + auto context = std::make_unique(this, &graph); + auto tr = std::make_shared(); + auto tb = std::make_shared(tr); + context->tensor_registry = tr; + context->tensor_builder = tb; + context->constant_initializer = std::make_shared(operands, tr); + context->kernel_gen = std::make_shared(operands, operations, tb, tr, kb, + context->external_context()); + return context; + } + +private: + std::shared_ptr _config; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_BACKEND_H__ diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc new file mode 100644 index 0000000..ef686f4 --- /dev/null +++ b/runtime/onert/backend/ruy/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h new file mode 100644 index 0000000..b965c9a --- /dev/null +++ b/runtime/onert/backend/ruy/BackendContext.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ + +#include +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ExternalContext.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr tensor_registry = nullptr, + std::shared_ptr tensor_builder = nullptr, + std::shared_ptr constant_initializer = nullptr, + std::shared_ptr kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(new ExternalContext) + { + } + + ITensorRegistry *genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + + FunctionMap genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) override; + + std::shared_ptr external_context() { return _external_context; } + +private: + void initConsts(); + void planTensors(const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr tensor_builder; + std::shared_ptr constant_initializer; + std::shared_ptr kernel_gen; + +private: + // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, + // the thread pool is also created in duplicate + // TODO Create one ruy context for session + std::shared_ptr _external_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/ruy/CMakeLists.txt b/runtime/onert/backend/ruy/CMakeLists.txt new file mode 100644 index 0000000..206acbf --- /dev/null +++ b/runtime/onert/backend/ruy/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LIB_ONERT_BACKEND_RUY onert_backend_ruy) + +nnfw_find_package(Ruy REQUIRED) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES}) + +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage) +target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy) + +set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy) + +if(CMAKE_BUILD_TYPE_LC STREQUAL "release") + add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD + COMMAND ${CMAKE_STRIP} "--strip-unneeded" $) +endif() + +install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib) diff --git a/runtime/onert/backend/cpu/Tensor.cc b/runtime/onert/backend/ruy/Config.cc similarity index 79% rename from runtime/onert/backend/cpu/Tensor.cc rename to runtime/onert/backend/ruy/Config.cc index dac8f89..179caa9 100644 --- a/runtime/onert/backend/cpu/Tensor.cc +++ b/runtime/onert/backend/ruy/Config.cc @@ -14,18 +14,18 @@ * limitations under the License. */ -#include "Tensor.h" +#include "Config.h" namespace onert { namespace backend { -namespace cpu +namespace ruy { -// `dynamic_cast` not working across library boundaries on NDK -// With this as a key function, `dynamic_cast` works across dl -ExternalTensor::~ExternalTensor() {} +bool Config::initialize() { return true; } + +ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; } } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/ruy/Config.h b/runtime/onert/backend/ruy/Config.h new file mode 100644 index 0000000..9160dd5 --- /dev/null +++ b/runtime/onert/backend/ruy/Config.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_CONFIG_H__ +#define __ONERT_BACKEND_RUY_CONFIG_H__ + +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class Config : public IConfig +{ +public: + std::string id() override { return "ruy"; } + bool initialize() override; + ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + bool supportPermutation() override { return true; } + bool supportDynamicTensor() override { return true; } + bool supportFP16() override { return false; } + + std::unique_ptr timer() override { return std::make_unique(); } +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_CONFIG_H__ diff --git a/runtime/onert/backend/ruy/ConstantInitializer.h b/runtime/onert/backend/ruy/ConstantInitializer.h new file mode 100644 index 0000000..24b4d92 --- /dev/null +++ b/runtime/onert/backend/ruy/ConstantInitializer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ + +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using ConstantInitializer = cpu_common::ConstantInitializer; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h new file mode 100644 index 0000000..f51facc --- /dev/null +++ b/runtime/onert/backend/ruy/ExternalContext.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ +#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ + +#include +#include + +namespace +{ +const int kDefaultNumThreadpoolThreads = 4; +} + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class ExternalContext +{ +public: + ExternalContext() : _ruy_context(new ::ruy::Context) + { + setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS)); + } + + void setMaxNumThreads(int max_num_threads) + { + const int target_num_threads = + max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads; + _ruy_context->set_max_num_threads(target_num_threads); + } + + ::ruy::Context *ruy_context() const { return _ruy_context.get(); } + +private: + const std::unique_ptr<::ruy::Context> _ruy_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__ diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc new file mode 100644 index 0000000..cd28250 --- /dev/null +++ b/runtime/onert/backend/ruy/KernelGenerator.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "ops/ConvolutionLayer.h" +#include "ops/FullyConnectedLayer.h" + +#include +#include +#include +#include +#include +#include + +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr &tensor_builder, + const std::shared_ptr &tensor_reg, + const std::shared_ptr &kernel_builder, + const std::shared_ptr &external_context) + : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), + _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + assert(!_return_fn_seq); + assert(_tensor_builder->dynamicTensorManager()); + assert(_tensor_reg); + + auto dyn_shape_inferer = std::make_shared(_ctx, _tensor_reg); + + _return_fn_seq = std::make_unique(); + + // Prepare to handle dynamic tensors later + auto dyn_ctx = std::make_shared(); + { + dyn_ctx->op_seq = &op_seq; + dyn_ctx->operations = &_operations_ctx; + dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); + dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); + + _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); + } + + _current_layout = op_seq.getLayout(); + for (const auto &operation_idx : op_seq.operations()) + { + const auto &node = _operations_ctx.at(operation_idx); + node.accept(*this); + _return_fn_seq->append(releaseFunction()); + + for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) + { + auto portable_tensor = _tensor_reg->getPortableTensor(ind); + if (portable_tensor) + { + assert(portable_tensor->layout() == ir::Layout::NHWC); + } + + auto tensor = _tensor_reg->getNativeTensor(ind); + if (tensor) + { + tensor->increase_ref(); + } + } + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + const auto stride = node.param().stride; + const auto activation = node.param().activation; + const auto param_padding = node.param().padding; + const auto dilation = node.param().dilation; + auto fn = std::make_unique(); + + if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic()) + { + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left, + param_padding.param.right, param_padding.param.top, param_padding.param.bottom, + stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor, + activation, ofm_tensor, _external_context); + + _return_fn = std::move(fn); + return; + } + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + dilation.width_factor, dilation.height_factor); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + dilation.width_factor, dilation.height_factor, activation, ofm_tensor, + _external_context); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + const auto activation = node.param().activation; + const auto weights_format = node.param().weights_format; + + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index); + auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique(); + + fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor, + _external_context); + + _return_fn = std::move(fn); +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h new file mode 100644 index 0000000..0f6bd59 --- /dev/null +++ b/runtime/onert/backend/ruy/KernelGenerator.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ + +#include "ExternalContext.h" +#include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" +#include "Tensor.h" + +#include +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class KernelGenerator : public cpu_common::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr &tensor_builder, + const std::shared_ptr &tensor_reg, + const std::shared_ptr &kernel_builder, + const std::shared_ptr &external_context); + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + +private: + const ir::Operands &_ctx; + const ir::Operations &_operations_ctx; + std::shared_ptr _tensor_builder; + std::shared_ptr _tensor_reg; + std::shared_ptr _kernel_builder; + ir::Layout _current_layout; + const std::shared_ptr _external_context; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h new file mode 100644 index 0000000..af2d252 --- /dev/null +++ b/runtime/onert/backend/ruy/StaticTensorManager.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ +#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ + +#include "backend/cpu_common/StaticTensorManager.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using StaticTensorManager = cpu_common::StaticTensorManager; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__ diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h new file mode 100644 index 0000000..60d0fbf --- /dev/null +++ b/runtime/onert/backend/ruy/Tensor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_TENSOR_H__ +#define __ONERT_BACKEND_RUY_TENSOR_H__ + +#include +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +using Tensor = cpu_common::Tensor; +using ExternalTensor = cpu_common::ExternalTensor; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_TENSOR_H__ diff --git a/runtime/onert/backend/ruy/TensorBuilder.cc b/runtime/onert/backend/ruy/TensorBuilder.cc new file mode 100644 index 0000000..c77defc --- /dev/null +++ b/runtime/onert/backend/ruy/TensorBuilder.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" + +#include + +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg) + : _tensor_reg{tensor_reg}, + _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout layout) +{ + _tensor_info_map.emplace(ind, info); + + // CPU backend supports only one layout as NHWC + assert(layout == ir::Layout::NHWC); + if (info.isDynamic()) + { + _dynamic_tensor_mgr->buildTensor(ind, info, layout); + } + else + { + _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant()); + } +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto tensor_info = _tensor_info_map.at(ind); + + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + const auto size = tensor_info.total_size(); + _static_tensor_mgr->claimPlan(ind, size); + } +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) +{ + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + _static_tensor_mgr->releasePlan(ind); + } +} + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); } + +void TensorBuilder::allocate() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. +} + +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h new file mode 100644 index 0000000..91c07bd --- /dev/null +++ b/runtime/onert/backend/ruy/TensorBuilder.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ +#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ + +#include +#include + +#include + +#include "StaticTensorManager.h" +#include "Tensor.h" + +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ + +class TensorBuilder +{ +public: + TensorBuilder(const std::shared_ptr &tensor_reg); + + /** + * @brief Register tensor information to allocate on CPU backend + * @param[in] ind Operand index + * @param[in] info Operand information + * @param[in] layout Operand data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout); + + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); + + bool isRegistered(const ir::OperandIndex &) const; + + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} + + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } + +private: + const std::shared_ptr _tensor_reg; + std::unique_ptr _dynamic_tensor_mgr; + std::unique_ptr _static_tensor_mgr; + ir::OperandIndexMap _tensor_info_map; +}; + +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__ diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc new file mode 100644 index 0000000..d249b2c --- /dev/null +++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "../Tensor.h" +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ +ConvolutionLayer::ConvolutionLayer() + : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1), + _dilationHeightFactor(1), _activation(ir::Activation::NONE), + _conv_kernel(new nnfw::ruy::Conv()), _prepare(false) +{ + // DO NOTHING +} + +ConvolutionLayer::~ConvolutionLayer() = default; + +void ConvolutionLayer::convFloat32() +{ + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + + nnfw::ruy::ConvParams op_params; + op_params.padding_type = getPaddingType(_paddingType); + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::ruy::Conv &kernel = *_conv_kernel; + kernel(op_params, getTensorShape(_input), reinterpret_cast(_input->buffer()), + getTensorShape(_kernel), reinterpret_cast(_kernel->buffer()), + getTensorShape(_bias), reinterpret_cast(_bias->buffer()), + getTensorShape(_output), reinterpret_cast(_output->buffer()), + _external_context->ruy_context()); +} + +void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, const ir::PaddingType paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, + const ir::Activation activation, IPortableTensor *output, + const std::shared_ptr &external_context) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _paddingType = paddingType; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _dilationWidthFactor = dilationWidthFactor; + _dilationHeightFactor = dilationHeightFactor; + _activation = activation; + _output = output; + _external_context = external_context; +} + +void ConvolutionLayer::run() +{ + prepare(); + + if (_input->is_dynamic() || _kernel->is_dynamic()) + { + const auto ifm_shape = _input->getShape().asFeature(_input->layout()); + const auto ofm_shape = _output->getShape().asFeature(_input->layout()); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto ker_shape = _kernel->getShape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + ir::Stride stride; + stride.vertical = _strideWidth; + stride.horizontal = _strideWidth; + + ir::Padding param_padding; + param_padding.type = _paddingType; + param_padding.param.left = _paddingLeft; + param_padding.param.right = _paddingRight; + param_padding.param.top = _paddingTop; + param_padding.param.bottom = _paddingBottom; + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + _dilationWidthFactor, _dilationHeightFactor); + + _paddingLeft = padding.left; + _paddingRight = padding.right; + _paddingTop = padding.top; + _paddingBottom = padding.bottom; + } + if (_input->data_type() == OperandType::FLOAT32) + { + convFloat32(); + } + else + { + throw std::runtime_error{"Conv: unsupported data type"}; + } +} + +void ConvolutionLayer::prepare() +{ + if (_prepare) + return; + + nnfw::ruy::Conv &kernel = *_conv_kernel; + if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant()) + { + kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output), + _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor); + } + _prepare = true; +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.h b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h new file mode 100644 index 0000000..a55387b --- /dev/null +++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ +#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ + +#include +#include "../ExternalContext.h" +#include "OperationUtils.h" + +#include +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +class ConvolutionLayer : public ::onert::exec::IFunction +{ +public: + ConvolutionLayer(); + ~ConvolutionLayer(); + +public: + void convFloat32(); + + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType _paddingType, + const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, const ir::Activation activation, + IPortableTensor *output, const std::shared_ptr &external_context); + + void run() override; + + void prepare() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _paddingType; + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _dilationWidthFactor; + uint32_t _dilationHeightFactor; + + ir::Activation _activation; + + std::unique_ptr _conv_kernel; + + bool _prepare; + + std::shared_ptr _external_context; +}; + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__ diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc new file mode 100644 index 0000000..af693e3 --- /dev/null +++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include "../Tensor.h" +#include +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +FullyConnectedLayer::FullyConnectedLayer() + : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr), + _activation(ir::Activation::NONE), _external_context(nullptr) +{ + // DO NOTHING +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +void FullyConnectedLayer::fullyConnectedFloat32() +{ + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + nnfw::ruy::FullyConnectedParams op_params; + + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + op_params.activation = convertActivationType(_activation); + op_params.lhs_cacheable = _weights->is_constant(); + op_params.rhs_cacheable = _input->is_constant(); + + nnfw::ruy::FullyConnected( + op_params, getTensorShape(_input), reinterpret_cast(_input->buffer()), + getTensorShape(_weights), reinterpret_cast(_weights->buffer()), + getTensorShape(_bias), reinterpret_cast(_bias ? _bias->buffer() : nullptr), + getTensorShape(_output), reinterpret_cast(_output->buffer()), + _external_context->ruy_context()); +} + +void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + ir::FullyConnectedWeightsFormat weights_format, + IPortableTensor *output, + const std::shared_ptr &external_context) +{ + UNUSED_RELEASE(weights_format); + _input = input; + _weights = weights; + _bias = bias; + _activation = activation; + _output = output; + _external_context = external_context; +} + +void FullyConnectedLayer::run() +{ + if (_input->data_type() == OperandType::FLOAT32) + { + fullyConnectedFloat32(); + } + else + { + throw std::runtime_error{"FullyConnected: unsupported data type"}; + } +} + +void FullyConnectedLayer::prepare() +{ + if (_bias && _bias->is_constant()) + { + const int bias_size = getTensorShape(_bias).FlatSize(); + if (nnfw::ruy::IsZeroVector(reinterpret_cast(_bias->buffer()), bias_size)) + { + _bias = nullptr; + } + } +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h new file mode 100644 index 0000000..33d560f --- /dev/null +++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ +#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ + +#include +#include "../ExternalContext.h" +#include "OperationUtils.h" + +#include + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +class FullyConnectedLayer : public ::onert::exec::IFunction +{ +public: + FullyConnectedLayer(); + ~FullyConnectedLayer(); + +public: + void fullyConnectedFloat32(); + + void configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output, + const std::shared_ptr &external_context); + + void run() override; + + void prepare() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_weights; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::Activation _activation; + + std::shared_ptr _external_context; +}; + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__ diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.cc b/runtime/onert/backend/ruy/ops/OperationUtils.cc new file mode 100644 index 0000000..929107b --- /dev/null +++ b/runtime/onert/backend/ruy/ops/OperationUtils.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationUtils.h" + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type) +{ + switch (ir_padding_type) + { + case ir::PaddingType::EXPLICIT: + return nnfw::ruy::PaddingType::kNone; + case ir::PaddingType::SAME: + return nnfw::ruy::PaddingType::kSame; + case ir::PaddingType::VALID: + return nnfw::ruy::PaddingType::kValid; + default: + throw std::runtime_error("Wrong padding type."); + break; + } +} + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h new file mode 100644 index 0000000..5dfdc7e --- /dev/null +++ b/runtime/onert/backend/ruy/ops/OperationUtils.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ +#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +using OperandType = onert::ir::DataType; + +namespace onert +{ +namespace backend +{ +namespace ruy +{ +namespace ops +{ + +inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor) +{ + if (tensor == nullptr) + return nnfw::ruy::Shape(); + + const ir::Shape &shape = tensor->get_info().shape(); + + assert(tensor->layout() == ir::Layout::NHWC); + + auto rank = shape.rank(); + nnfw::ruy::Shape ret(rank); + auto data = ret.DimsData(); + for (int i = 0; i < rank; ++i) + { + data[i] = shape.dim(i); + } + return ret; +} + +inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation) +{ + switch (activation) + { + case ir::Activation::NONE: + return nnfw::ruy::FusedActivationFunctionType::kNone; + case ir::Activation::RELU: + return nnfw::ruy::FusedActivationFunctionType::kRelu; + case ir::Activation::RELU1: + return nnfw::ruy::FusedActivationFunctionType::kRelu1; + case ir::Activation::RELU6: + return nnfw::ruy::FusedActivationFunctionType::kRelu6; + case ir::Activation::TANH: + return nnfw::ruy::FusedActivationFunctionType::kTanh; + case ir::Activation::SIGMOID: + return nnfw::ruy::FusedActivationFunctionType::kSigmoid; + default: + throw std::runtime_error{"RUY backend: Cannot convert activation type"}; + } +} + +template +void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits::lowest(); + *activation_max = std::numeric_limits::max(); + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type); + +} // namespace ops +} // namespace ruy +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__ diff --git a/runtime/onert/backend/ruy/ruy.cc b/runtime/onert/backend/ruy/ruy.cc new file mode 100644 index 0000000..4f33590 --- /dev/null +++ b/runtime/onert/backend/ruy/ruy.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Backend.h" + +extern "C" { + +onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; } + +void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; } +} diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h new file mode 100644 index 0000000..b7aef16 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Backend.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__ +#define __ONERT_BACKEND_XNNPACK_BACKEND_H__ + +#include "BackendContext.h" +#include "Config.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" + +#include + +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class Backend : public ::onert::backend::Backend +{ +public: + Backend() : _config{std::make_shared()} {} + + std::shared_ptr config() const override { return _config; } + + std::unique_ptr + newContext(const ir::Graph &graph, const std::shared_ptr &kb, + bool) const override + { + const auto &operands = graph.operands(); + const auto &operations = graph.operations(); + auto context = std::make_unique(this, &graph); + auto tr = std::make_shared(); + auto tb = std::make_shared(tr); + context->tensor_registry = tr; + context->tensor_builder = tb; + context->constant_initializer = std::make_shared(operands, tr); + context->kernel_gen = std::make_shared(operands, operations, tb, tr, kb, + context->external_context()); + return context; + } + +private: + std::shared_ptr _config; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__ diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc new file mode 100644 index 0000000..503d088 --- /dev/null +++ b/runtime/onert/backend/xnnpack/BackendContext.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "TensorBuilder.h" +#include "KernelGenerator.h" +#include "util/logging.h" +#include "ir/Index.h" +#include "ir/OperandIndexMap.h" +#include "ir/OperandIndexSequence.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h new file mode 100644 index 0000000..f81175b --- /dev/null +++ b/runtime/onert/backend/xnnpack/BackendContext.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ +#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ + +#include +#include +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" +#include "ExternalContext.h" + +namespace +{ +const int kDefaultNumThreadpoolThreads = 1; +} + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class BackendContext : public onert::backend::BackendContext +{ +public: + BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr tensor_registry = nullptr, + std::shared_ptr tensor_builder = nullptr, + std::shared_ptr constant_initializer = nullptr, + std::shared_ptr kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(nullptr) + { + int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS); + if (num_threads < 1) + num_threads = kDefaultNumThreadpoolThreads; // default num of threads + _external_context.reset(new ExternalContext(static_cast(num_threads))); + } + + ITensorRegistry *genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + + FunctionMap genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) override; + + std::shared_ptr external_context() { return _external_context; } + +private: + void initConsts(); + void planTensors(const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr tensor_builder; + std::shared_ptr constant_initializer; + std::shared_ptr kernel_gen; + +private: + std::shared_ptr _external_context; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__ diff --git a/runtime/onert/backend/xnnpack/CMakeLists.txt b/runtime/onert/backend/xnnpack/CMakeLists.txt new file mode 100644 index 0000000..e3de31e --- /dev/null +++ b/runtime/onert/backend/xnnpack/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack) + +# Unsupported architecture +nnfw_find_package(Xnnpack QUIET) +if(NOT Xnnpack_FOUND) + return() +endif(NOT Xnnpack_FOUND) + +file(GLOB_RECURSE SOURCES "*.cc") + +add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES}) + +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool) +target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK) + +set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack) + +if(CMAKE_BUILD_TYPE_LC STREQUAL "release") + add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD + COMMAND ${CMAKE_STRIP} "--strip-unneeded" $) +endif() + +install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib) diff --git a/runtime/onert/core/include/backend/IOptimizer.h b/runtime/onert/backend/xnnpack/Config.cc similarity index 62% rename from runtime/onert/core/include/backend/IOptimizer.h rename to runtime/onert/backend/xnnpack/Config.cc index 4844d21..4d42a3f 100644 --- a/runtime/onert/core/include/backend/IOptimizer.h +++ b/runtime/onert/backend/xnnpack/Config.cc @@ -14,38 +14,31 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_I_OPTIMIZER_H__ -#define __ONERT_BACKEND_I_OPTIMIZER_H__ +#include "Config.h" -namespace onert -{ -namespace ir -{ -class LoweredGraph; -} -} // namespace onert +#include namespace onert { namespace backend { +namespace xnnpack +{ -/** - * @brief Class for backend optimizations. This is an optional class so not all backends must have - * it. - * - */ -struct IOptimizer +Config::~Config() { xnn_deinitialize(); } + +bool Config::initialize() { - virtual ~IOptimizer() = default; - /** - * @brief Run optimization - * - */ - virtual void optimize() = 0; -}; + xnn_status status = xnn_initialize(nullptr /* allocator */); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to initialize XNNPACK"}; + } + return true; +} + +ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; } +} // namespace cpu } // namespace backend } // namespace onert - -#endif // __ONERT_BACKEND_I_OPTIMIZER_H__ diff --git a/runtime/onert/backend/xnnpack/Config.h b/runtime/onert/backend/xnnpack/Config.h new file mode 100644 index 0000000..2cf7406 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Config.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__ +#define __ONERT_BACKEND_XNNPACK_CONFIG_H__ + +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class Config : public IConfig +{ +public: + virtual ~Config(); + +public: + std::string id() override { return "xnnpack"; } + bool initialize() override; + ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override; + bool supportPermutation() override { return true; } + bool supportDynamicTensor() override { return true; } + bool supportFP16() override { return false; } + + std::unique_ptr timer() override { return std::make_unique(); } +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__ diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h new file mode 100644 index 0000000..45cdd8c --- /dev/null +++ b/runtime/onert/backend/xnnpack/ConstantInitializer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ + +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using ConstantInitializer = cpu_common::ConstantInitializer; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/core/include/backend/IExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.cc similarity index 74% rename from runtime/onert/core/include/backend/IExternalContext.h rename to runtime/onert/backend/xnnpack/ExternalContext.cc index 88ffb50..3a9fe1b 100644 --- a/runtime/onert/core/include/backend/IExternalContext.h +++ b/runtime/onert/backend/xnnpack/ExternalContext.cc @@ -14,21 +14,23 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__ -#define __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__ +#include "ExternalContext.h" + +#include namespace onert { namespace backend { +namespace xnnpack +{ -struct IExternalContext +ExternalContext::ExternalContext(size_t num_threads) + : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy) { - virtual ~IExternalContext() = default; - virtual void setMaxNumThreads(int) = 0; -}; + assert(_threadpool); +} +} // namespace xnnpack } // namespace backend } // namespace onert - -#endif // __ONERT_BACKEND_IEXTERNAL_CONTEXT__ diff --git a/runtime/onert/backend/xnnpack/ExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.h new file mode 100644 index 0000000..682fd2e --- /dev/null +++ b/runtime/onert/backend/xnnpack/ExternalContext.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ +#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ + +#include +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class ExternalContext +{ +public: + ExternalContext(size_t num_threads); + +public: + pthreadpool *getThreadPool() { return _threadpool.get(); } + +private: + std::unique_ptr _threadpool; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__ diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc new file mode 100644 index 0000000..b7d3f60 --- /dev/null +++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "ops/ConvolutionLayer.h" +#include "ops/DepthwiseConvolutionLayer.h" +#include "ops/FullyConnectedLayer.h" + +#include +#include +#include +#include +#include +#include + +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr &tensor_builder, + const std::shared_ptr &tensor_reg, + const std::shared_ptr &kernel_builder, + const std::shared_ptr &external_context) + : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), + _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), + _current_layout(ir::Layout::UNKNOWN), _external_context(external_context) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + assert(!_return_fn_seq); + assert(_tensor_builder->dynamicTensorManager()); + assert(_tensor_reg); + + auto dyn_shape_inferer = std::make_shared(_ctx, _tensor_reg); + + _return_fn_seq = std::make_unique(); + + // Prepare to handle dynamic tensors later + auto dyn_ctx = std::make_shared(); + { + dyn_ctx->op_seq = &op_seq; + dyn_ctx->operations = &_operations_ctx; + dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); + dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); + + _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); + } + + _current_layout = op_seq.getLayout(); + for (const auto &operation_idx : op_seq.operations()) + { + const auto &node = _operations_ctx.at(operation_idx); + node.accept(*this); + _return_fn_seq->append(releaseFunction()); + + for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) + { + auto portable_tensor = _tensor_reg->getPortableTensor(ind); + if (portable_tensor) + { + assert(portable_tensor->layout() == ir::Layout::NHWC); + } + + auto tensor = _tensor_reg->getNativeTensor(ind); + if (tensor) + { + tensor->increase_ref(); + } + } + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + const auto stride = node.param().stride; + const auto activation = node.param().activation; + const auto param_padding = node.param().padding; + const auto dilation = node.param().dilation; + auto fn = std::make_unique(_external_context); + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto padding = + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + dilation.width_factor, dilation.height_factor); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + dilation.width_factor, dilation.height_factor, activation, ofm_tensor); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) +{ + using ir::operation::DepthwiseConv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto dilation_width = node.param().dilation.width_factor; + const auto dilation_height = node.param().dilation.height_factor; + const auto param_padding = node.param().padding; + const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, + ker_height, dilation_width, dilation_height); + const auto multiplier = node.param().multiplier; + const auto activation = node.param().activation; + + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique(_external_context); + + fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, + padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, + multiplier, dilation_width, dilation_height, activation, ofm_tensor); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + const auto activation = node.param().activation; + + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index); + auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique(_external_context); + + fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor); + + _return_fn = std::move(fn); +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h new file mode 100644 index 0000000..2658242 --- /dev/null +++ b/runtime/onert/backend/xnnpack/KernelGenerator.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ +#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ + +#include "ExternalContext.h" +#include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" +#include "Tensor.h" + +#include +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class KernelGenerator : public cpu_common::KernelGeneratorBase +{ +public: + KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr &tensor_builder, + const std::shared_ptr &tensor_reg, + const std::shared_ptr &kernel_builder, + const std::shared_ptr &external_context); + + void visit(const ir::OpSequence &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + +private: + const ir::Operands &_ctx; + const ir::Operations &_operations_ctx; + std::shared_ptr _tensor_builder; + std::shared_ptr _tensor_reg; + std::shared_ptr _kernel_builder; + ir::Layout _current_layout; + const std::shared_ptr _external_context; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__ diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h new file mode 100644 index 0000000..f7344e8 --- /dev/null +++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ +#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ + +#include "backend/cpu_common/StaticTensorManager.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using StaticTensorManager = cpu_common::StaticTensorManager; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__ diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h new file mode 100644 index 0000000..b39cbd2 --- /dev/null +++ b/runtime/onert/backend/xnnpack/Tensor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__ +#define __ONERT_BACKEND_XNNPACK_TENSOR_H__ + +#include +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +using Tensor = cpu_common::Tensor; +using ExternalTensor = cpu_common::ExternalTensor; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__ diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.cc b/runtime/onert/backend/xnnpack/TensorBuilder.cc new file mode 100644 index 0000000..b570144 --- /dev/null +++ b/runtime/onert/backend/xnnpack/TensorBuilder.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "TensorBuilder.h" + +#include + +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg) + : _tensor_reg{tensor_reg}, + _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} +{ + /* empty */ +} + +void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout layout) +{ + _tensor_info_map.emplace(ind, info); + + // XNNPACK backend supports only one layout as NHWC + assert(layout == ir::Layout::NHWC); + if (info.isDynamic()) + { + _dynamic_tensor_mgr->buildTensor(ind, info, layout); + } + else + { + _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant()); + } +} + +void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) +{ + assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + const auto tensor_info = _tensor_info_map.at(ind); + + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + const auto size = tensor_info.total_size(); + _static_tensor_mgr->claimPlan(ind, size); + } +} + +void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) +{ + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) + { + _static_tensor_mgr->releasePlan(ind); + } +} + +bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const +{ + return _tensor_info_map.find(ind) != _tensor_info_map.end(); +} + +void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); } + +void TensorBuilder::allocate() +{ + // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate + // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. +} + +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h new file mode 100644 index 0000000..dddfedb --- /dev/null +++ b/runtime/onert/backend/xnnpack/TensorBuilder.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ +#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ + +#include +#include + +#include + +#include "StaticTensorManager.h" +#include "Tensor.h" + +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ + +class TensorBuilder +{ +public: + TensorBuilder(const std::shared_ptr &tensor_reg); + + /** + * @brief Register tensor information to allocate on XNNPACK backend + * @param[in] ind Operand index + * @param[in] info Operand information + * @param[in] layout Operand data layout + */ + void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, + ir::Layout backend_layout); + + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); + + bool isRegistered(const ir::OperandIndex &) const; + + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} + + IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } + +private: + const std::shared_ptr _tensor_reg; + std::unique_ptr _dynamic_tensor_mgr; + std::unique_ptr _static_tensor_mgr; + ir::OperandIndexMap _tensor_info_map; +}; + +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc new file mode 100644 index 0000000..0612995 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ +ConvolutionLayer::ConvolutionLayer(const std::shared_ptr external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), + _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), + _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, + const ir::Activation activation, IPortableTensor *output) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _padding_type = padding_type; + _padding_left = padding_left; + _padding_right = padding_right; + _padding_top = padding_top; + _padding_bottom = padding_bottom; + _stride_width = stride_width; + _stride_height = stride_height; + _dilation_width_factor = dilation_width_factor; + _dilation_height_factor = dilation_height_factor; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void ConvolutionLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 Convolution operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK Conv: unsupported data type"}; + } +} + +bool ConvolutionLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + + // NHWC + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &kernel_shape = _kernel->getShape(); + uint32_t kernel_height = kernel_shape.dim(1); + uint32_t kernel_width = kernel_shape.dim(2); + uint32_t output_channels = kernel_shape.dim(0); + uint32_t input_channels = kernel_shape.dim(3); + assert(static_cast(_input->getShape().dim(3)) == input_channels); + assert(static_cast(_output->getShape().dim(3)) == output_channels); + + enum xnn_status status = xnn_create_convolution2d_nhwc_f32( + _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width, + _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, + 1 /* groups */, input_channels /* group_input_channels */, + output_channels /* group_output_channels */, input_channels /* input_channel_stride */, + output_channels /* output_channel_stride */, + reinterpret_cast(_kernel->buffer()), + reinterpret_cast(_bias->buffer()), output_activation_min, + output_activation_max, 0, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 Convolution operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool ConvolutionLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t input_width = _input->getShape().dim(2); + uint32_t input_height = _input->getShape().dim(1); + uint32_t batch_size = _input->getShape().dim(0); + enum xnn_status status = xnn_setup_convolution2d_nhwc_f32( + _kernel_op, batch_size, input_height, input_width, + reinterpret_cast(_input->buffer()), + reinterpret_cast(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 Convolution operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h new file mode 100644 index 0000000..6cbaa9f --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ + +#include "Layer.h" + +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class ConvolutionLayer : public Layer +{ +public: + ConvolutionLayer(const std::shared_ptr external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t dilation_width_factor, const uint32_t dilation_height_factor, + const ir::Activation activation, IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _padding_type; + uint32_t _padding_left; + uint32_t _padding_top; + uint32_t _padding_right; + uint32_t _padding_bottom; + + uint32_t _stride_width; + uint32_t _stride_height; + uint32_t _dilation_width_factor; + uint32_t _dilation_height_factor; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc new file mode 100644 index 0000000..947f041 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthwiseConvolutionLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +DepthwiseConvolutionLayer::DepthwiseConvolutionLayer( + const std::shared_ptr external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), + _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1), + _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void DepthwiseConvolutionLayer::configure( + const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, + ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width, + const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output) +{ + _input = input; + _kernel = kernel; + _bias = bias; + _padding_type = padding_type; + _padding_left = padding_left; + _padding_right = padding_right; + _padding_top = padding_top; + _padding_bottom = padding_bottom; + _stride_width = stride_width; + _stride_height = stride_height; + _multiplier = multiplier; + _dilation_width_factor = dilation_width_factor; + _dilation_height_factor = dilation_height_factor; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void DepthwiseConvolutionLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"}; + } +} + +bool DepthwiseConvolutionLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + + // NHWC + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &kernel_shape = _kernel->getShape(); + uint32_t kernel_height = kernel_shape.dim(1); + uint32_t kernel_width = kernel_shape.dim(2); + uint32_t output_channels = kernel_shape.dim(3); + uint32_t input_channels = _input->getShape().dim(3); + assert(static_cast(_output->getShape().dim(3)) == output_channels); + assert(output_channels == input_channels * _multiplier); + + enum xnn_status status = xnn_create_convolution2d_nhwc_f32( + _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width, + _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor, + input_channels /* groups */, 1 /* group_input_channels */, + _multiplier /* group_output_channels */, input_channels /* input_channel_stride */, + output_channels /* output_channel_stride */, + reinterpret_cast(_kernel->buffer()), + reinterpret_cast(_bias->buffer()), output_activation_min, + output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool DepthwiseConvolutionLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t input_width = _input->getShape().dim(2); + uint32_t input_height = _input->getShape().dim(1); + uint32_t batch_size = _input->getShape().dim(0); + enum xnn_status status = xnn_setup_convolution2d_nhwc_f32( + _kernel_op, batch_size, input_height, input_width, + reinterpret_cast(_input->buffer()), + reinterpret_cast(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h new file mode 100644 index 0000000..10f840a --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ + +#include "Layer.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class DepthwiseConvolutionLayer : public Layer +{ +public: + DepthwiseConvolutionLayer(const std::shared_ptr external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *kernel, + const IPortableTensor *bias, ir::PaddingType padding_type, + const uint32_t padding_left, const uint32_t padding_right, + const uint32_t padding_top, const uint32_t padding_bottom, + const uint32_t stride_width, const uint32_t stride_height, + const uint32_t multiplier, const uint32_t dilation_width_factor, + const uint32_t dilation_height_factor, const ir::Activation activation, + IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::PaddingType _padding_type; + uint32_t _padding_left; + uint32_t _padding_top; + uint32_t _padding_right; + uint32_t _padding_bottom; + + uint32_t _stride_width; + uint32_t _stride_height; + uint32_t _multiplier; + uint32_t _dilation_width_factor; + uint32_t _dilation_height_factor; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc new file mode 100644 index 0000000..d595fda --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include "ir/Padding.h" + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr external_context) + : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), + _activation(ir::Activation::NONE) +{ + // DO NOTHING +} + +void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + IPortableTensor *output) +{ + _input = input; + _kernel = weights; + _bias = bias; + _activation = activation; + _output = output; + + // TODO Support not nhwc layer + assert(_input->layout() == ir::Layout::NHWC); + + assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU || + _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6); +} + +void FullyConnectedLayer::run() +{ + assert(_external_context && _external_context->getThreadPool()); + if (!_setup) + { + _setup = setup(); + assert(_setup); + } + + if (_input->data_type() == OperandType::FLOAT32) + { + enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to run FP32 FullyConnected operator"}; + } + } + else + { + throw std::runtime_error{"XNNPACK FC: unsupported data type"}; + } +} + +bool FullyConnectedLayer::create() +{ + float output_activation_min = 0.f, output_activation_max = 0.f; + CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); + + const auto &kernel_shape = _kernel->getShape(); + assert(kernel_shape.rank() == 2); + uint32_t output_channels = kernel_shape.dim(0); + uint32_t input_channels = kernel_shape.dim(1); + + const auto &input_shape = _input->getShape(); + const auto &output_shape = _output->getShape(); + uint32_t flag = 0; + if (input_shape.rank() != output_shape.rank()) + { + flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D; + assert(input_shape.num_elements() % input_channels == 0); + } + else + { + assert(static_cast(input_shape.dim(input_shape.rank() - 1)) == input_channels); + } + + assert(_kernel && _kernel->buffer()); + const float *kernel_buffer = reinterpret_cast(_kernel->buffer()); + const float *bias_buffer = (_bias) ? reinterpret_cast(_bias->buffer()) : nullptr; + + enum xnn_status status = xnn_create_fully_connected_nc_f32( + input_channels, output_channels, input_channels /* input stride */, + output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min, + output_activation_max, flag, &_kernel_op); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 FullyConnected operator"}; + } + assert(_kernel_op != nullptr); + return true; +} + +bool FullyConnectedLayer::setup() +{ + if (_input->buffer() == nullptr || _output->buffer() == nullptr) + { + // it could be models's input or output + return false; + } + + uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1); + enum xnn_status status = xnn_setup_fully_connected_nc_f32( + _kernel_op, batch_size, reinterpret_cast(_input->buffer()), + reinterpret_cast(_output->buffer()), _external_context->getThreadPool()); + if (status != xnn_status_success) + { + throw std::runtime_error{"failed to create FP32 FullyConnected operator"}; + } + return true; +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h new file mode 100644 index 0000000..883607e --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ + +#include "Layer.h" + +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class FullyConnectedLayer : public Layer +{ +public: + FullyConnectedLayer(const std::shared_ptr external_context); + +public: + void configure(const IPortableTensor *input, const IPortableTensor *_kernel, + const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output); + + void run() override; + + bool create() override; + bool setup() override; + +private: + const IPortableTensor *_input; + const IPortableTensor *_kernel; + const IPortableTensor *_bias; + IPortableTensor *_output; + + ir::Activation _activation; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h new file mode 100644 index 0000000..68b610f --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/Layer.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ + +#include +#include +#include "OperationUtils.h" +#include "../ExternalContext.h" +#include "../Tensor.h" + +#include +#include + +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +class Layer : public ::onert::exec::IFunction +{ +public: + Layer(const std::shared_ptr external_context) + : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context} + { + // DO NOTHING + } + + ~Layer() + { + if (_kernel_op) + xnn_delete_operator(_kernel_op); + } + +public: + void prepare() override + { + if (_create) + return; + + _create = create(); + assert(_create); + + _setup = setup(); + } + virtual bool create() = 0; + virtual bool setup() = 0; + +protected: + xnn_operator_t _kernel_op; + bool _create; + bool _setup; + const std::shared_ptr _external_context; +}; + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__ diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h new file mode 100644 index 0000000..5102e32 --- /dev/null +++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ +#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ + +// duplicated from cpu/ops/OperationUtils.h +#include +#include +#include + +namespace onert +{ +namespace backend +{ +namespace xnnpack +{ +namespace ops +{ + +using OperandType = ir::DataType; + +template +void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0; + *activation_max = std::numeric_limits::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0; + *activation_max = 6; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1; + *activation_max = 1; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0; + *activation_max = 1; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits::lowest(); + *activation_max = std::numeric_limits::max(); + } + else + { + throw std::runtime_error{"Unsupported fused activation function"}; + } +} + +} // namespace ops +} // namespace xnnpack +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__ diff --git a/runtime/onert/backend/xnnpack/xnnpack.cc b/runtime/onert/backend/xnnpack/xnnpack.cc new file mode 100644 index 0000000..38a6c55 --- /dev/null +++ b/runtime/onert/backend/xnnpack/xnnpack.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Backend.h" + +#include + +extern "C" { +onert::backend::Backend *onert_backend_create() +{ + VERBOSE(onert_backend_create) << "'xnnpack' loaded\n"; + return new onert::backend::xnnpack::Backend; +} + +void onert_backend_destroy(onert::backend::Backend *backend) +{ + VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n"; + delete backend; +} +} diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h index 1eba295..4d21215 100644 --- a/runtime/onert/core/include/backend/BackendContext.h +++ b/runtime/onert/core/include/backend/BackendContext.h @@ -19,6 +19,8 @@ #include #include "ir/Graph.h" +#include "ir/LowerInfoMap.h" +#include "exec/FunctionSequence.h" namespace onert { @@ -26,12 +28,10 @@ namespace backend { class Backend; -class IConstantInitializer; -class IKernelGenerator; -class ITensorRegister; struct ITensorRegistry; -struct ITensorBuilder; -struct IOptimizer; + +using FunctionMap = + std::vector>>; class BackendContext { @@ -46,15 +46,8 @@ public: public: BackendContext(const Backend *backend, const ir::Graph *graph, - std::shared_ptr tensor_registry = nullptr, - std::shared_ptr tensor_builder = nullptr, - std::shared_ptr constant_initializer = nullptr, - std::shared_ptr kernel_gen = nullptr, - std::shared_ptr tensor_register = nullptr, - std::shared_ptr optimizer = nullptr) - : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry}, - tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, - kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer} + std::shared_ptr tensor_registry = nullptr) + : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry} { } @@ -66,8 +59,19 @@ public: const Backend *backend() const { return _backend; } const ir::Graph *graph() const { return _graph; } - const std::vector &operation_list() { return _operation_list; } - const std::vector &operand_list() { return _operand_list; } + const std::vector &operation_list() const { return _operation_list; } + const std::vector &operand_list() const { return _operand_list; } + + virtual ITensorRegistry *genTensors(const std::vector &, + const ir::OpSequences &, const ir::LowerInfoMap &) + { + return nullptr; + } + virtual FunctionMap genKernels(const std::vector &, + const ir::OpSequences &) + { + return {}; + } private: const Backend *_backend{nullptr}; @@ -77,11 +81,6 @@ private: public: std::shared_ptr tensor_registry; - std::shared_ptr tensor_builder; - std::shared_ptr constant_initializer; - std::shared_ptr kernel_gen; - std::shared_ptr tensor_register; - std::shared_ptr optimizer; }; using BackendContexts = std::unordered_map>; diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h deleted file mode 100644 index 97721cf..0000000 --- a/runtime/onert/core/include/backend/ITensorBuilder.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_ITENSOR_BUILDER_H__ -#define __ONERT_BACKEND_ITENSOR_BUILDER_H__ - -#include - -#include "ir/Index.h" -#include "ir/OperandInfo.h" -#include "ir/Operation.h" -#include "ir/Layout.h" -#include "ITensor.h" -#include "ITensorManager.h" -#include "ITensorRegistry.h" -#include "IDynamicTensorManager.h" - -namespace onert -{ -namespace backend -{ - -struct ITensorBuilder -{ - using IterateFunction = std::function; - - virtual ~ITensorBuilder(void) = default; - - /** - * @brief Register tensor information to allocate on backend - * - * @param ind Index - * @param info Info - * @param backend_layout Backend layout - * @param as_const Whether this tensor is constant - */ - virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) = 0; - - /** - * @brief Check if the tensor has been registered with @c registerTensorInfo - * - * @return true If the tensor has been registered - * @return false Otherwise - */ - virtual bool isRegistered(const ir::OperandIndex &) const = 0; - -public: // methods for static tensor allocation - /** - * @brief Let the tensor builder know first use(start of lifetime) of a tensor - * Must be called before calling @c prepare - * Must be run up to once for each tensor before calling @c notifyLastUse - * NOTE: Useful only for static models - */ - virtual void notifyFirstUse(const ir::OperandIndex &) = 0; - /** - * @brief Let the tensor builder know last use(end of lifetime) of a tensor - * Must be run up to once for each tensor after calling @c notifyFirstUse - * NOTE: Useful only for static models - */ - virtual void notifyLastUse(const ir::OperandIndex &) = 0; - /** - * @brief Prepare the tensors - * Before calling this, all the tensors must be registered - */ - virtual void prepare(void) = 0; - /** - * @brief Allocate the tensors - * Before calling this, @c prepare must be called - */ - virtual void allocate() = 0; - /** - * @brief Some actions after functions' @c IFunction::prepare method. - * This is called right after each function's @c IFunction::prepare function has been - * called. - */ - virtual void postFunctionPrepare() = 0; - -public: // methods for dynamic tensor allocation - /** - * @brief Get dynamicTensorManager. If a backend does not support dynamic tensor, exception - * will be thrown. - * - * @return pointer of IDynamicTensorManager object - * - * @note Since it is a pointer, its life time is from the cration of TensorBuilder - * to the end of execution - */ - virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; } -}; - -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_ITENSOR_BUILDER_H__ diff --git a/runtime/onert/core/include/backend/ITensorRegister.h b/runtime/onert/core/include/backend/ITensorRegister.h deleted file mode 100644 index b8e521c..0000000 --- a/runtime/onert/core/include/backend/ITensorRegister.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_ITENSOR_REGISTER_H__ -#define __ONERT_BACKEND_ITENSOR_REGISTER_H__ - -#include "ir/LowerInfoMap.h" -#include "ITensorBuilder.h" -#include "ir/Layout.h" -#include "ir/OperandIndexSequence.h" -#include "ir/OperandInfo.h" -#include "ir/Operands.h" -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace backend -{ - -class ITensorRegister : public ir::OperationVisitor -{ -public: - virtual ~ITensorRegister() = default; - -public: - void registerTensors(const ir::OpSequence &op_seq, const ir::LowerInfoMap *lower_info_map) - { - _current_op_seq_layout = op_seq.getLayout(); - _lower_info_map = lower_info_map; - assert(_lower_info_map != nullptr); - assert(tensor_builder().get() != nullptr); - op_seq.accept(*this); - } - -protected: - virtual const ir::Operands &operands() const = 0; - virtual std::shared_ptr tensor_builder() const = 0; - -protected: -#define OP(InternalName) \ - void visit(const ir::operation::InternalName &node) override \ - { \ - for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) \ - { \ - defaultRegisterTensorInfo(ind); \ - } \ - } -#include "ir/Operations.lst" -#undef OP - -protected: - void defaultRegisterTensorInfo(const ir::OperandIndex &index) const - { - if (tensor_builder()->isRegistered(index)) - { - return; - } - - const auto &obj = operands().at(index); - const auto frontend_layout = frontendLayout(); - const auto backend_layout = backendLayout(index); - ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), - obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; - tensor_builder()->registerTensorInfo(index, backend_info, backend_layout); - } - -protected: - ir::Layout frontendLayout() const { return _current_op_seq_layout; } - ir::Layout backendLayout(const ir::OperandIndex &index) const - { - assert(_lower_info_map != nullptr); - const auto lower_info = _lower_info_map->operand.at(index).get(); - return lower_info->def_factors().getOnlyElement().layout(); - } - -private: - ir::Layout _current_op_seq_layout; - const ir::LowerInfoMap *_lower_info_map{nullptr}; -}; - -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_ITENSOR_REGISTER_H__ diff --git a/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h new file mode 100644 index 0000000..19e7b7c --- /dev/null +++ b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__ +#define __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__ + +#include + +#include "ir/Index.h" +#include "ir/OpSequences.h" +#include "ir/LowerInfoMap.h" +#include "util/logging.h" + +namespace onert +{ +namespace backend +{ +namespace cpu_common +{ + +// TODO Remove the template param BackendContext once unification of cpu backend context is done +template +void planTensors(const T_BackendContext &ctx, const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info) +{ + auto graph = ctx.graph(); + auto tensor_builder = ctx.tensor_builder; + + ir::OperandIndexMap uses_map; + ir::OperandIndexMap def_map; + ir::OperandIndexSequence constants; + + auto model_io = + (graph->getInputs() + graph->getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED; + + // Prepare scanning + for (auto ind : ctx.operand_list()) + { + if (model_io.contains(ind)) + continue; + const auto &obj = graph->operands().at(ind); + const auto &li = lower_info.operand.at(ind); + if (li->def_factors().getOnlyElement().backend() != ctx.backend()) + continue; + + // Ignore unused tensor + if (li->def_factors().size() == 0 && li->use_factors().size() == 0) + { + VERBOSE_F() << "Operand #" << ind.value() << " will not be used. no more process." + << std::endl; + return; + } + + uses_map[ind] = obj.getUses().size(); + def_map[ind] = obj.getDef().valid() ? 1 : 0; + + if (obj.isConstant()) + constants.append(ind); + + auto factor = li->def_factors().getOnlyElement(); + if (!tensor_builder->isRegistered(ind)) + { + // These tensors do not exist in any op_seq (No use and def) + const auto info = obj.info(); + const auto backend_layout = factor.layout(); + // TODO Change tensor info to have permuted shape + tensor_builder->registerTensorInfo(ind, info, backend_layout); + } + } + + // Start scanning to do notify{First|Last}Use for each tensor + + // If a tensor is a constant, increase the use of the tensor and allocate it first. + // Increasing use count here makes the tensor never be deallocated, i.e it they will be + // deallocated last. + for (const auto &ind : constants) + { + uses_map[ind]++; + tensor_builder->notifyFirstUse(ind); + } + + // At each operation, + // 1. Scan DEF of outputs. If the DEF, allocate it + // 2. Scan DEF of inputs. If variable tensor, allocate it + // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 + for (const auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + for (const auto &op_idx : op_seq.operations()) + { + auto op_inputs = graph->operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED | + ir::Remove::UNDEFINED; + auto op_outputs = graph->operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED | + ir::Remove::UNDEFINED; + + // Define outputs + for (const auto &ind : op_outputs) + { + if (model_io.contains(ind)) + continue; + if (!tensor_builder->isRegistered(ind)) + continue; + assert(def_map.find(ind) != def_map.end()); + if (def_map[ind]) + { + def_map[ind] = 0; + tensor_builder->notifyFirstUse(ind); + } + } + + // Scan variable tensors + // This tensor has features like constant. But OperandInfo and LowerInfo treat them as + // non-constant because of less memory usage by memory planning in here + for (const auto &ind : op_inputs) + { + if (model_io.contains(ind)) + continue; + if (!tensor_builder->isRegistered(ind)) + continue; + const auto &operand = graph->operands().at(ind); + if (operand.info().isVariable()) + { + // The variable tensor with buffer is not supported yet + assert(operand.data() == nullptr); + assert(operand.getUses().size() == 1 && !operand.getDef().valid()); + assert(lower_info.operand.at(ind)->def_factors().size() == 1 && + lower_info.operand.at(ind)->use_factors().size() == 1); + assert(uses_map[ind] == 1 && def_map[ind] == 0); + tensor_builder->notifyFirstUse(ind); + } + } + + for (const auto &ind : op_inputs) + { + if (model_io.contains(ind)) + continue; + if (!tensor_builder->isRegistered(ind)) + continue; + assert(uses_map.find(ind) != uses_map.end()); + assert(uses_map[ind] > 0); + uses_map[ind]--; + if (uses_map[ind] == 0) + { + // plan for deallocation of static tensornode + tensor_builder->notifyLastUse(ind); + + // plan for deallocation of dynamic tensor + auto dyn_tensor_manager = tensor_builder->dynamicTensorManager(); + auto *tensor = ctx.tensor_registry->getITensor(ind); + assert(tensor); + dyn_tensor_manager->planDealloc(op_idx, tensor); + } + } + } + } + + // Dispose and validate + for (const auto &ind : constants) + { + --uses_map[ind]; + if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice + { + tensor_builder->notifyLastUse(ind); + } + } + + assert( + std::all_of(uses_map.begin(), uses_map.end(), + [](std::pair it) { return it.second == 0; })); + + assert( + std::all_of(def_map.begin(), def_map.end(), + [](std::pair it) { return it.second == 0; })); +} + +} // namespace cpu_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__ diff --git a/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h new file mode 100644 index 0000000..6793555 --- /dev/null +++ b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__ +#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__ + +#include "TensorRegistry.h" + +#include "ConstantInitializerBase.h" +#include + +namespace onert +{ +namespace backend +{ +namespace cpu_common +{ + +class ConstantInitializer : public ConstantInitializerBase +{ +public: + ConstantInitializer(const ir::Operands &operands, + const std::shared_ptr &tensor_reg); + +public: + void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override; + + // TODO: For now the only cpu backend supports constant tensor to use data from external + // If the other backend supports (to do this, + // ExternalTensor should be abstract such as IExternal, maybe), + // this can be an interface of cpu_common::ConstantInitializerBase + void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &); + +private: + std::shared_ptr tensor_registry() const override { return _tensor_reg; } + +private: + std::shared_ptr _tensor_reg; +}; + +} // namespace cpu_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h similarity index 90% rename from runtime/onert/core/include/backend/IConstantInitializer.h rename to runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h index 149acec..d4c65de 100644 --- a/runtime/onert/core/include/backend/IConstantInitializer.h +++ b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h @@ -14,20 +14,21 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__ -#define __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__ +#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__ +#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__ #include #include -#include "ITensorBuilder.h" #include "ir/Coordinates.h" #include "ir/Layout.h" #include "ir/Operand.h" #include "ir/Operands.h" #include "ir/OperationVisitor.h" #include "ir/OpSequence.h" +#include "backend/ITensorRegistry.h" #include "util/logging.h" +#include "backend/ITensorRegistry.h" namespace { @@ -153,11 +154,13 @@ namespace onert { namespace backend { +namespace cpu_common +{ -class IConstantInitializer : public ir::OperationVisitor +class ConstantInitializerBase : public ir::OperationVisitor { public: - virtual ~IConstantInitializer() = default; + virtual ~ConstantInitializerBase() = default; public: void run() @@ -178,15 +181,15 @@ public: } public: - IConstantInitializer(const ir::Operands &operands) - : _operands{operands}, _current_op_seq_layout{ir::Layout::UNKNOWN} + ConstantInitializerBase(const ir::Operands &operands) + : _operands{operands}, _current_layout{ir::Layout::UNKNOWN} { } public: using Initializer = std::function; - void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; } + void setLayout(ir::Layout layout) { _current_layout = layout; } protected: virtual std::shared_ptr tensor_registry() const = 0; @@ -221,10 +224,11 @@ public: protected: const ir::Operands &_operands; std::unordered_map _init_map; - ir::Layout _current_op_seq_layout; // TODO Rename this to _current_layout + ir::Layout _current_layout; }; +} // namespace cpu_common } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__ +#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__ diff --git a/runtime/onert/core/include/backend/IKernelGenerator.h b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h similarity index 83% rename from runtime/onert/core/include/backend/IKernelGenerator.h rename to runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h index afc34ec..49a5897 100644 --- a/runtime/onert/core/include/backend/IKernelGenerator.h +++ b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h @@ -14,28 +14,30 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_IKERNEL_GENERATOR_H__ -#define __ONERT_BACKEND_IKERNEL_GENERATOR_H__ +#ifndef __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__ +#define __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__ #include #include #include -#include "ITensorBuilder.h" #include "ir/OperationVisitor.h" #include "ir/OpSequence.h" #include #include "exec/FunctionSequence.h" +#include "backend/ITensorRegistry.h" namespace onert { namespace backend { +namespace cpu_common +{ -class IKernelGenerator : public ir::OperationVisitor +class KernelGeneratorBase : public ir::OperationVisitor { public: - virtual ~IKernelGenerator() = default; + virtual ~KernelGeneratorBase() = default; std::unique_ptr releaseFunction() { @@ -70,7 +72,8 @@ protected: std::unique_ptr _return_fn_seq; // TODO Extract this out }; +} // namespace cpu_common } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_IKERNEL_GENERATOR_H__ +#endif // __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__ diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h index fa50b55..850bcf2 100644 --- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h +++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h @@ -17,9 +17,11 @@ #ifndef __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__ #define __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__ -#include "MemoryManager.h" - #include "backend/IStaticTensorManager.h" +#include "backend/cpu_common/DynamicTensorManager.h" +#include "backend/cpu_common/MemoryManager.h" +#include "backend/cpu_common/TensorRegistry.h" +#include "backend/ITensorManager.h" #include "ir/OperandIndexMap.h" #include "ir/OperandInfo.h" #include "TensorRegistry.h" @@ -37,12 +39,10 @@ class StaticTensorManager : public backend::IStaticTensorManager { public: StaticTensorManager(const std::shared_ptr ®, - DynamicMemoryManager *dynamic_mem_mgr); + DynamicTensorManager *dynamic_tensor_manager); virtual ~StaticTensorManager() = default; - void allocateConsts(void); void allocateNonconsts(void); - void deallocateConsts(void); void deallocateNonconsts(void); void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, @@ -54,11 +54,10 @@ public: void iterate(const std::function &fn); private: - std::unique_ptr _const_mgr; std::unique_ptr _nonconst_mgr; const std::shared_ptr _tensors; ir::OperandIndexMap _as_constants; - DynamicMemoryManager *_dynamic_mem_mgr; + DynamicTensorManager *_dynamic_tensor_manager; }; } // namespace cpu_common diff --git a/runtime/onert/core/include/backend/cpu_common/Tensor.h b/runtime/onert/core/include/backend/cpu_common/Tensor.h index 5fa20e1..5fbf4e7 100644 --- a/runtime/onert/core/include/backend/cpu_common/Tensor.h +++ b/runtime/onert/core/include/backend/cpu_common/Tensor.h @@ -21,6 +21,7 @@ #include #include +#include namespace onert { @@ -177,6 +178,91 @@ private: std::shared_ptr _allocator; }; +/** + * @brief Class that uses data from external memory that is not managed by a backend + * instead of allocating and copying the data. ExternalTensor's data pointer points to + * an address of memory such as where memory is already allocated, or mmapped area. + * This is meaning that ExternalTensor can take all of types' ir::Data. + * To support this, assume below things no padding, always NHWC layout, + * constant tensor and not dynamic. + */ +class ExternalTensor : public Tensor +{ +public: + ExternalTensor() = delete; + virtual ~ExternalTensor(); + +public: + ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) + : Tensor(info, layout, nullptr) + { + assert(_layout == ir::Layout::NHWC); + assert(_info.isConstant()); + assert(_info.isDynamic() == false); + } + +public: + /** + * @brief set Data to be shared from external so that this ExternalTensor will not be + * allocated on CPU backend + * @param[in] data data of Operand to be set + */ + void setData(const std::shared_ptr data) + { + assert(data != nullptr); + _data = data; + // Note. Some op such as cker::Conv could take buffer as nullptr. + // That's why _buffer also would be used + _buffer = const_cast(_data->base()); + } + +public: + uint8_t *buffer() const override { return _buffer; } + + bool is_constant() const override { return true; } + bool is_dynamic() const override { return false; } + void set_dynamic() override + { + throw std::runtime_error("This tensor does not support changing dynamic"); + } + + void setShape(const ir::Shape &) override + { + throw std::runtime_error("This tensor does not support changing shape"); + } + + void increase_ref() override { ++_num_references; } + + void decrease_ref() override + { + assert(_data != nullptr); + assert(_num_references > 0); + --_num_references; + if (_num_references == 0) + { + _data.reset(); + _buffer = nullptr; + } + } + + /** + * @brief Reset reference count to zero and release data + */ + void reset_ref() override + { + assert(_data != nullptr); + assert(_num_references > 0); + _num_references = 0; + + _data.reset(); + _buffer = nullptr; + } + + int32_t num_references() override { return _num_references; } + +private: + std::shared_ptr _data; +}; } // namespace cpu_common } // namespace backend } // namespace onert diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h index af13d13..7850e21 100644 --- a/runtime/onert/core/include/compiler/BackendManager.h +++ b/runtime/onert/core/include/compiler/BackendManager.h @@ -34,7 +34,7 @@ class BackendManager public: using backend_create_t = backend::Backend *(*)(); using backend_destroy_t = void (*)(backend::Backend *); - using dlhandle_destroy_t = void (*)(void *); + using dlhandle_destroy_t = std::function; static BackendManager &get(); diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h index 3098be7..68b862d 100644 --- a/runtime/onert/core/include/compiler/Compiler.h +++ b/runtime/onert/core/include/compiler/Compiler.h @@ -24,6 +24,7 @@ #include "ir/Graph.h" #include "exec/IExecutor.h" +#include "util/TracingCtx.h" namespace onert { @@ -48,7 +49,6 @@ struct CompilerOptions { // GENERAL OPTIONS std::vector backend_list; - bool is_primary_subgraph; // TODO Remove this out of this struct as it is not user-given option // OPTIONS ONLY FOR DEBUGGING/PROFILING std::string trace_filepath; //< File path to save trace records @@ -60,6 +60,8 @@ struct CompilerOptions bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF bool disable_compile; //< Run with Interpreter if true, try compilation otherwise bool fp16_enable; //< Whether fp16 mode ON/OFF + + util::TracingCtx *tracing_ctx; //< Profiling information }; CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs); @@ -73,8 +75,9 @@ public: /** * @brief Construct a new Compiler object * @param[in] subgs All subgraphs of a model + * @param[in] tracing_ctx Profiling information */ - Compiler(const std::shared_ptr &subgs); + Compiler(const std::shared_ptr &subgs, util::TracingCtx *tracing_ctx); public: /** diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h index aadba68..f115ab9 100644 --- a/runtime/onert/core/include/compiler/LoweredGraph.h +++ b/runtime/onert/core/include/compiler/LoweredGraph.h @@ -67,8 +67,7 @@ private: const compiler::BackendResolver &backend_resolver); void manipulateLowerInfo( - ir::OperandIndexMap> &operands_lower_info, - bool is_primary); + ir::OperandIndexMap> &operands_lower_info); void dumpLowerInfo(); bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index, ir::Layout layout, const compiler::BackendResolver &backend_resolver); diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h index 05f2679..33a2f62 100644 --- a/runtime/onert/core/include/compiler/StaticShapeInferer.h +++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h @@ -68,7 +68,7 @@ private: private: // TODO Define visitors for operations. List them in alphabetic order. - void visit(const ir::operation::ArgMax &op) override; + void visit(const ir::operation::ArgMinMax &op) override; void visit(const ir::operation::BatchMatMul &op) override; void visit(const ir::operation::BCQFullyConnected &op) override; void visit(const ir::operation::BCQGather &op) override; diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h index d2eb831..1f3a13b 100644 --- a/runtime/onert/core/include/exec/DynamicShapeInferer.h +++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h @@ -49,7 +49,7 @@ public: public: // TODO Define visitors for operations. List them in alphabetic order. // Remove TODO when any op starting from the alphabet is added - void visit(const ir::operation::ArgMax &op) override; + void visit(const ir::operation::ArgMinMax &op) override; void visit(const ir::operation::BatchMatMul &op) override; void visit(const ir::operation::BCQFullyConnected &op) override; void visit(const ir::operation::BCQGather &op) override; diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h index 1d2831d..345bec8 100644 --- a/runtime/onert/core/include/exec/IExecutor.h +++ b/runtime/onert/core/include/exec/IExecutor.h @@ -18,17 +18,32 @@ * @file IExecutor.h * @brief This file defines interface of Executor */ -#ifndef __ONERT_EXEC_I_EXECUTOR_H_ -#define __ONERT_EXEC_I_EXECUTOR_H_ +#ifndef __ONERT_EXEC_I_EXECUTOR_H__ +#define __ONERT_EXEC_I_EXECUTOR_H__ #include "ir/Graph.h" #include "IFunction.h" #include "IODescription.h" +#include "ir/Index.h" #include "ir/OperationIndexMap.h" -#include "backend/IDynamicTensorManager.h" + +#include +#include +#include namespace onert { +namespace backend +{ +class IPortableTensor; +namespace controlflow +{ +class IOTensor; +} +} +} +namespace onert +{ namespace exec { class IExecutionObserver; @@ -60,11 +75,29 @@ struct IExecutor virtual void setIndexedRanks(std::shared_ptr>) = 0; /** - * @brief Start execution + * @brief Execute with user-given input/output description (for primary subgraph) * @param[in] desc Input and output description * @note This method should be thread-safe */ virtual void execute(const IODescription &desc) = 0; + + /** + * @brief Execute with given input/output tensors + * + * For non-primary subgraphs, input and output tensors must be given. + * + * @param[in] inputs tensors that are passed as inputs + * @param[in] outputs tensors that are passed as outputs + */ + virtual void execute(const std::vector &inputs, + const std::vector &outputs) = 0; + + /** + * @brief Get output tensor objects + * + * @return Vector of @c IOTensor + */ + virtual const std::vector &getOutputTensors() const = 0; }; using ExecutorMap = std::unordered_map>; @@ -72,4 +105,4 @@ using ExecutorMap = std::unordered_map &&source); +void config_source_ext(std::unique_ptr &&source); bool toBool(const std::string &val); int toInt(const std::string &val); diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h index 701b835..b11da90 100644 --- a/runtime/onert/core/include/util/ShapeInference.h +++ b/runtime/onert/core/include/util/ShapeInference.h @@ -42,7 +42,7 @@ using Shapes = std::vector; // Define shape calculation for operations. List them in alphabetic order. -ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank); +ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank); ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape, const ir::operation::BatchMatMul::Param ¶m); @@ -70,7 +70,7 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis); -ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf); +template ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf); ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape); diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h new file mode 100644 index 0000000..a82704c --- /dev/null +++ b/runtime/onert/core/include/util/TracingCtx.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_UTIL_TRACING_CTX_H__ +#define __ONERT_UTIL_TRACING_CTX_H__ + +#include "ir/Graph.h" +#include "ir/Index.h" +#include "ir/Subgraphs.h" + +#include +#include + +namespace onert +{ +namespace util +{ + +/** + * @brief Class to maintain information about profiling per session + */ +class TracingCtx +{ +public: + /** + * @brief Create and store unique session id managed by this class + * Note that this constructor can be called by multiple sessions running in parallely. + * Use this constructor only when there is only one subgraph in a model. + */ + TracingCtx(const ir::Graph *primary_subgraph) + { + decideSessionID(); + _subgraph_indices.emplace(primary_subgraph, 0); + } + + /** + * @brief Create and store unique session id managed by this class + * Note that this constructor can be called by multiple sessions running in parallely. + */ + TracingCtx(const onert::ir::Subgraphs *subgraphs) + { + assert(subgraphs); + + decideSessionID(); + + auto count = subgraphs->count(); + for (size_t i = 0; i < count; i++) + _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i); + } + + uint32_t getSessionId() const { return _session_id; } + + /** + * @brief Set subgraph index of a graph + */ + void setSubgraphIndex(const ir::Graph *g, uint32_t index) { _subgraph_indices.emplace(g, index); } + + /** + * @brief Get subgraph index of a graph. + */ + ir::SubgraphIndex getSubgraphIndex(const ir::Graph *g) const { return _subgraph_indices.at(g); } + +private: + void decideSessionID() + { + std::unique_lock lock{_session_id_mutex}; + + static uint32_t next_session_id = 0; + _session_id = next_session_id++; + } + +private: + std::unordered_map _subgraph_indices; + uint32_t _session_id; + static std::mutex _session_id_mutex; +}; + +} // namespace util +} // namespace onert + +#endif // __ONERT_UTIL_TRACING_CTX_H__ diff --git a/runtime/onert/core/include/util/logging.h b/runtime/onert/core/include/util/logging.h index 76cfb8d..65c3750 100644 --- a/runtime/onert/core/include/util/logging.h +++ b/runtime/onert/core/include/util/logging.h @@ -64,4 +64,11 @@ static Context &ctx = Context::get(); if (::onert::util::logging::ctx.enabled()) \ std::cout << "[" << __func__ << "] " +#define WHEN_LOG_ENABLED(METHOD) \ + if (::onert::util::logging::ctx.enabled()) \ + do \ + { \ + METHOD; \ + } while (0) + #endif // __ONERT_UTIL_LOGGING_H__ diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc index bafa36d..404c3b1 100644 --- a/runtime/onert/core/src/backend/BackendContext.cc +++ b/runtime/onert/core/src/backend/BackendContext.cc @@ -17,7 +17,6 @@ #include "backend/BackendContext.h" #include "ir/Operation.h" -#include "backend/IConstantInitializer.h" namespace onert { @@ -31,25 +30,5 @@ void BackendContext::initialize(const std::vector &operation_list _operand_list = operand_list; } -void BackendContext::initConsts() -{ - for (auto &op : _operation_list) - { - constant_initializer->setLayout(op.layout); - _graph->operations().at(op.index).accept(*constant_initializer); - } - - for (auto ind : _operand_list) - { - const auto &obj = _graph->operands().at(ind); - if (obj.isConstant() && !constant_initializer->exist(ind)) - { - constant_initializer->registerDefaultInitializer(ind, obj); - } - } - - constant_initializer->run(); -} - } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h index cc8346e..3323cf5 100644 --- a/runtime/onert/core/src/backend/controlflow/Backend.h +++ b/runtime/onert/core/src/backend/controlflow/Backend.h @@ -72,8 +72,6 @@ public: context->constant_initializer = std::make_shared(operands, tr); context->kernel_gen = std::make_shared(graph, tb->dynamicTensorManager(), tr, context->external_context()); - context->tensor_register = nullptr; - context->optimizer = nullptr; return context; } diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.cc b/runtime/onert/core/src/backend/controlflow/BackendContext.cc new file mode 100644 index 0000000..366377e --- /dev/null +++ b/runtime/onert/core/src/backend/controlflow/BackendContext.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BackendContext.h" + +#include "KernelGenerator.h" +#include "backend/cpu_common/BackendContextHelpers.h" + +namespace onert +{ +namespace backend +{ +namespace controlflow +{ + +void BackendContext::initConsts() +{ + for (auto &op : operation_list()) + { + constant_initializer->setLayout(op.layout); + graph()->operations().at(op.index).accept(*constant_initializer); + } + + for (auto ind : operand_list()) + { + const auto &obj = graph()->operands().at(ind); + if (obj.isConstant() && !constant_initializer->exist(ind)) + { + constant_initializer->registerDefaultInitializer(ind, obj); + } + } + + constant_initializer->run(); +} + +ITensorRegistry *BackendContext::genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) +{ + auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED | + ir::Remove::DUPLICATED; + for (auto index : operand_list()) + { + if (model_io.contains(index)) + continue; + const auto &obj = graph()->operands().at(index); + const auto frontend_layout = [&]() { + if (obj.getUses().size() == 0) + return ir::Layout::UNKNOWN; + auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses? + for (auto &operation_info : operation_list()) + { + if (operation_info.index == use_op_ind) + return operation_info.layout; + } + return ir::Layout::UNKNOWN; + }(); + const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement(); + if (permute_factor.backend() != backend()) + continue; + const auto backend_layout = permute_factor.layout(); + ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), + obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()}; + tensor_builder->registerTensorInfo(index, backend_info, backend_layout); + } + + // TODO Get compiler options from compiler, and use it rather than getting it from Env + if (util::getConfigString(util::config::EXECUTOR) == "Linear") + { + cpu_common::planTensors(*this, order, op_seqs, lower_info); + } + else + { + // For the executors that does not have fixed linear execution order: + // To make tensors never be deallocated, this is a workaround to use static memory planner + for (auto ind : operand_list()) + { + if (tensor_builder->isRegistered(ind)) + tensor_builder->notifyFirstUse(ind); + } + } + + tensor_builder->prepare(); + + return tensor_registry.get(); +} + +FunctionMap BackendContext::genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) +{ + FunctionMap ret; + + for (auto op_seq_ind : order) + { + const auto &op_seq = op_seqs.at(op_seq_ind); + bool assigned = [&]() { + for (auto op_info : operation_list()) + if (op_seq.exist(op_info.index)) + return true; + return false; + }(); + if (!assigned) + continue; + auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind)); + ret.emplace_back(op_seq_ind, std::move(fn_seq)); + } + + initConsts(); + + // NOTE For memory optimization, we want to free some operand data + for (auto ind : operand_list()) + { + // TODO Remove const_cast + auto &obj = const_cast(graph())->operands().at(ind); + obj.releaseData(); + } + + for (auto &it : ret) + { + auto &fn_seq = it.second; + fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); }); + } + + return ret; +} + +} // namespace controlflow +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.h b/runtime/onert/core/src/backend/controlflow/BackendContext.h index 3647338..a768d5d 100644 --- a/runtime/onert/core/src/backend/controlflow/BackendContext.h +++ b/runtime/onert/core/src/backend/controlflow/BackendContext.h @@ -18,6 +18,9 @@ #define __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__ #include +#include "TensorBuilder.h" +#include "ConstantInitializer.h" +#include "KernelGenerator.h" #include "ExternalContext.h" namespace onert @@ -32,21 +35,36 @@ class BackendContext : public onert::backend::BackendContext public: BackendContext(const Backend *backend, const ir::Graph *graph, std::shared_ptr tensor_registry = nullptr, - std::shared_ptr tensor_builder = nullptr, - std::shared_ptr constant_initializer = nullptr, - std::shared_ptr kernel_gen = nullptr, - std::shared_ptr tensor_register = nullptr, - std::shared_ptr optimizer = nullptr) - : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder, - constant_initializer, kernel_gen, tensor_register, - optimizer), - _external_context(std::make_shared()) + std::shared_ptr tensor_builder = nullptr, + std::shared_ptr constant_initializer = nullptr, + std::shared_ptr kernel_gen = nullptr) + : onert::backend::BackendContext(backend, graph, tensor_registry), + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, _external_context(std::make_shared()) { } + ITensorRegistry *genTensors(const std::vector &order, + const ir::OpSequences &op_seqs, + const ir::LowerInfoMap &lower_info) override; + + FunctionMap genKernels(const std::vector &order, + const ir::OpSequences &op_seqs) override; + std::shared_ptr external_context() { return _external_context; } private: + void initConsts(); + void planTensors(const std::vector &order, + const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info); + +public: + // TODO Make it private + std::shared_ptr tensor_builder; + std::shared_ptr constant_initializer; + std::shared_ptr kernel_gen; + +private: // NOTE ruy context has a thread pool, and when multiple ruy contexts are created, // the thread pool is also created in duplicate // TODO Create one ruy context for session diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h index e21a8f3..ac97ef9 100644 --- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h +++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h @@ -17,10 +17,7 @@ #ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ -#include "TensorRegistry.h" - -#include -#include +#include namespace onert { @@ -29,21 +26,7 @@ namespace backend namespace controlflow { -class ConstantInitializer : public IConstantInitializer -{ -public: - ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} - { - } - -private: - std::shared_ptr tensor_registry() const override { return _tensor_reg; } - -private: - std::shared_ptr _tensor_reg; -}; +using ConstantInitializer = cpu_common::ConstantInitializer; } // namespace controlflow } // namespace backend diff --git a/runtime/onert/core/src/backend/controlflow/ExternalContext.h b/runtime/onert/core/src/backend/controlflow/ExternalContext.h index 3db6829..cfb9831 100644 --- a/runtime/onert/core/src/backend/controlflow/ExternalContext.h +++ b/runtime/onert/core/src/backend/controlflow/ExternalContext.h @@ -17,7 +17,6 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__ #define __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__ -#include #include #include @@ -38,7 +37,7 @@ namespace controlflow { // TODO Unify this with cpu::ExternalContext -class ExternalContext : public IExternalContext +class ExternalContext { public: ExternalContext() : _ruy_context(std::make_unique()) diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.cc b/runtime/onert/core/src/backend/controlflow/IOTensor.cc new file mode 100644 index 0000000..47405ac --- /dev/null +++ b/runtime/onert/core/src/backend/controlflow/IOTensor.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "IOTensor.h" + +#include + +namespace onert +{ +namespace backend +{ +namespace controlflow +{ + +IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout) + : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout} +{ + setUserTensor(nullptr, 0); +} + +void IOTensor::setTensor(IPortableTensor *tensor) +{ + assert(tensor); + assert(tensor != this); + // TODO Handle when layout was changed + assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet + _user_tensor.reset(); + _tensor = tensor; +} + +void IOTensor::setUserTensor(uint8_t *buffer, size_t size) +{ + _user_tensor = std::make_unique(_orig_info, _orig_layout, buffer, size); + _tensor = _user_tensor.get(); +} + +} // namespace controlflow +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.h b/runtime/onert/core/src/backend/controlflow/IOTensor.h new file mode 100644 index 0000000..a7ed84b --- /dev/null +++ b/runtime/onert/core/src/backend/controlflow/IOTensor.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__ +#define __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__ + +#include "backend/IPortableTensor.h" +#include "UserTensor.h" + +namespace onert +{ +namespace backend +{ +namespace controlflow +{ + +/** + * @brief Tensor object that indirects to the tensor it is pointing to. + * + * A model I/O tensor could be two types. + * + * 1. @c UserTensor, if it is the primary graph + * 2. Any other derivative of @c IPortableTensor from another backend, otherwise + * + * To support these, this object indirects everything to the actual tensor pointer. + * Exceptionally if it is UserTensor, this class creates and manages it. + */ +class IOTensor : public IPortableTensor +{ +public: + IOTensor(const ir::OperandInfo &info, ir::Layout layout); + +public: + void setTensor(IPortableTensor *tensor); + void setUserTensor(uint8_t *buffer, size_t size); + ir::OperandInfo orig_info() const { return _orig_info; } + ir::Layout orig_layout() const { return _orig_layout; } + +public: + uint8_t *buffer() const override { return _tensor->buffer(); } + size_t total_size() const override { return _tensor->total_size(); } + size_t dimension(size_t index) const override { return _tensor->dimension(index); } + size_t num_dimensions() const override { return _tensor->num_dimensions(); } + size_t calcOffset(const ir::Coordinates &coords) const override + { + return _tensor->calcOffset(coords); + } + ir::Layout layout() const override { return _tensor->layout(); } + ir::DataType data_type() const override { return _tensor->data_type(); } + float data_scale() const override { return _tensor->data_scale(); } + int32_t data_offset() const override { return _tensor->data_offset(); } + bool is_dynamic() const override { return _is_dynamic || (_tensor && _tensor->is_dynamic()); } + void set_dynamic() override { _is_dynamic = true; } + ir::Shape getShape() const override { return _tensor->getShape(); } + void setShape(const ir::Shape &shape) override + { + // Workaround for IPortableTensor holds _info as its member + _info.shape(shape); + _tensor->setShape(shape); + } + bool is_constant() const override { return _tensor->is_constant(); } + bool applyShape(const ir::Shape &shape) override + { + // Workaround for IPortableTensor holds _info as its member + _info.shape(shape); + return _tensor->applyShape(shape); + } + +private: + const ir::OperandInfo _orig_info; + const ir::Layout _orig_layout; + bool _is_dynamic{false}; + IPortableTensor *_tensor{nullptr}; //< The actual tensor that is indirected + std::unique_ptr _user_tensor; //< If it is a user tensor, it is managed by this object +}; + +} // namespace controlflow +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc index 8e39ee5..2606f04 100644 --- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc +++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc @@ -31,7 +31,7 @@ namespace backend namespace controlflow { -KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager, +KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager, const std::shared_ptr &tensor_reg, const std::shared_ptr &external_context) : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg}, @@ -77,18 +77,17 @@ void KernelGenerator::visit(const ir::operation::If &node) const auto then_subg_index = node.param().then_subg_index; const auto else_subg_index = node.param().else_subg_index; - std::vector input_tensors; + std::vector input_tensors; for (const auto input_index : node.getInputs()) { - auto input_tensor = getTensor(input_index); - + auto input_tensor = getPortableTensor(input_index); input_tensors.emplace_back(input_tensor); } - std::vector output_tensors; + std::vector output_tensors; for (const auto output_index : node.getOutputs()) { - auto output_tensor = getTensor(output_index); + auto output_tensor = getPortableTensor(output_index); output_tensors.emplace_back(output_tensor); } @@ -97,8 +96,8 @@ void KernelGenerator::visit(const ir::operation::If &node) const auto cond_tensor = input_tensors.front(); input_tensors.erase(input_tensors.begin()); auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>( - cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, then_subg_index, - else_subg_index, _executor_map, _external_context); + cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map, + _external_context); _return_fn = std::move(fn); } @@ -124,33 +123,40 @@ void KernelGenerator::visit(const ir::operation::While &node) // This op does not support input as a constant, because controlflow backend does not have // TensorBuilder - std::vector input_tensors; + std::vector input_tensors; for (const auto input_index : node.getInputs()) { - auto input_tensor = getTensor(input_index); - + auto input_tensor = getPortableTensor(input_index); input_tensors.emplace_back(input_tensor); } - std::vector output_tensors; + std::vector output_tensors; for (const auto output_index : node.getOutputs()) { - auto output_tensor = getTensor(output_index); + auto output_tensor = getPortableTensor(output_index); output_tensors.emplace_back(output_tensor); } // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of // creating executor recusively auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>( - input_tensors, output_tensors, node.getOutputs(), _graph, cond_subg_index, body_subg_index, - _executor_map, _external_context); + input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map, + _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context); _return_fn = std::move(fn); } backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index) { - backend::ITensor *ret = _tensor_registries.getITensor(index); + // get Tensor from all tensor registries (for Permute op) + auto ret = _tensor_registries.getITensor(index); + assert(ret != nullptr); + return ret; +} + +backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index) +{ + auto ret = _tensor_reg->getPortableTensor(index); assert(ret != nullptr); return ret; } diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h index c2c1243..7b395d1 100644 --- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h +++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h @@ -17,13 +17,12 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__ #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__ -#include -#include #include #include "ExternalContext.h" #include #include "TensorBuilder.h" #include "compiler/TensorRegistries.h" +#include "backend/cpu_common/KernelGeneratorBase.h" #include "TensorRegistry.h" namespace onert @@ -33,10 +32,10 @@ namespace backend namespace controlflow { -class KernelGenerator : public IKernelGenerator +class KernelGenerator : public cpu_common::KernelGeneratorBase { public: - KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager, + KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager, const std::shared_ptr &tensor_reg, const std::shared_ptr &external_context); @@ -50,8 +49,6 @@ public: _executor_map = executor_map.get(); } - using IKernelGenerator::visit; - void visit(const ir::OpSequence &) override; void visit(const ir::operation::If &) override; void visit(const ir::operation::Permute &) override; @@ -59,10 +56,11 @@ public: private: backend::ITensor *getTensor(const ir::OperandIndex &index); + backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index); private: const ir::Graph &_graph; - IDynamicTensorManager *_dyn_tensor_manager; + DynamicTensorManager *_dyn_tensor_manager; std::shared_ptr _tensor_reg; compiler::TensorRegistries _tensor_registries; exec::ExecutorMap *_executor_map; diff --git a/runtime/onert/core/src/backend/controlflow/Tensor.h b/runtime/onert/core/src/backend/controlflow/Tensor.h index ba5bafd..87951a9 100644 --- a/runtime/onert/core/src/backend/controlflow/Tensor.h +++ b/runtime/onert/core/src/backend/controlflow/Tensor.h @@ -27,6 +27,7 @@ namespace controlflow { using Tensor = cpu_common::Tensor; +using ExternalTensor = cpu_common::ExternalTensor; } // namespace controlflow } // namespace backend diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc index e4b0388..a767f0e 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc @@ -30,8 +30,8 @@ namespace controlflow TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg) : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())}, - _static_tensor_mgr{new cpu_common::StaticTensorManager( - _tensor_reg->base_reg(), _dynamic_tensor_mgr->dynamic_mem_mgr().get())} + _static_tensor_mgr{ + new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())} { /* empty */ } @@ -90,11 +90,7 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const return _tensor_info_map.find(ind) != _tensor_info_map.end(); } -void TensorBuilder::prepare(void) -{ - _static_tensor_mgr->allocateConsts(); - _static_tensor_mgr->allocateNonconsts(); -} +void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); } void TensorBuilder::allocate() { @@ -102,7 +98,7 @@ void TensorBuilder::allocate() // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. } -IDynamicTensorManager *TensorBuilder::dynamicTensorManager(void) +DynamicTensorManager *TensorBuilder::dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); } diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h index 6959947..d2e3076 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h +++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -35,7 +34,7 @@ namespace backend namespace controlflow { -class TensorBuilder : public ITensorBuilder +class TensorBuilder { public: TensorBuilder(const std::shared_ptr &tensor_reg); @@ -47,18 +46,18 @@ public: * @param[in] layout Operand data layout */ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info, - ir::Layout backend_layout) override; + ir::Layout backend_layout); - void notifyFirstUse(const ir::OperandIndex &) override; - void notifyLastUse(const ir::OperandIndex &) override; + void notifyFirstUse(const ir::OperandIndex &); + void notifyLastUse(const ir::OperandIndex &); - bool isRegistered(const ir::OperandIndex &) const override; + bool isRegistered(const ir::OperandIndex &) const; - void prepare(void) override; - void allocate() override; - void postFunctionPrepare() override { /* DO NOTHING */} + void prepare(void); + void allocate(); + void postFunctionPrepare() { /* DO NOTHING */} - IDynamicTensorManager *dynamicTensorManager(void) override; + DynamicTensorManager *dynamicTensorManager(void); /** * @brief Get tensor with a specific OperandIndex. diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h index 94f71bb..901f0ae 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h +++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h @@ -20,7 +20,7 @@ #include "backend/cpu_common/TensorRegistry.h" #include "backend/ITensorRegistry.h" #include "Tensor.h" -#include "UserTensor.h" +#include "IOTensor.h" #include namespace onert @@ -36,9 +36,10 @@ namespace controlflow * This class contains three types of tensors. Two native tensors(tensors that are managed by this * backend) and the other is migrant tensor. * - * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given - * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg ) - * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg ) + * - NativeIOTensor - @c IOTensor managed by this backend ( in @c _base_reg ) + * - NOTE The tensor it actually points to can be from another backend + * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg ) + * - MigrantTensor - @c IPortableTensor managed by other backends * * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager * @@ -53,7 +54,7 @@ public: auto base_tensor = _base_reg->getITensor(ind); if (base_tensor) return base_tensor; - return getNativeUserTensor(ind); + return getNativeIOTensor(ind); } ITensor *getNativeITensor(const ir::OperandIndex &ind) override @@ -61,7 +62,7 @@ public: auto base_tensor = _base_reg->getNativeITensor(ind); if (base_tensor) return base_tensor; - return getNativeUserTensor(ind); + return getNativeIOTensor(ind); } IPortableTensor *getPortableTensor(const ir::OperandIndex &ind) @@ -69,7 +70,7 @@ public: auto base_tensor = _base_reg->getPortableTensor(ind); if (base_tensor) return base_tensor; - return getNativeUserTensor(ind); + return getNativeIOTensor(ind); } IPortableTensor *getNativeTensor(const ir::OperandIndex &ind) @@ -77,7 +78,7 @@ public: auto base_tensor = _base_reg->getNativeTensor(ind); if (base_tensor) return base_tensor; - return getNativeUserTensor(ind); + return getNativeIOTensor(ind); } Tensor *getNativeOwnTensor(const ir::OperandIndex &ind) @@ -85,10 +86,10 @@ public: return _base_reg->getNativeTensor(ind); } - UserTensor *getNativeUserTensor(const ir::OperandIndex &ind) + IOTensor *getNativeIOTensor(const ir::OperandIndex &ind) { - auto tensor = _native_user_tensors.find(ind); - if (tensor != _native_user_tensors.end()) + auto tensor = _native_io_tensors.find(ind); + if (tensor != _native_io_tensors.end()) return tensor->second.get(); return nullptr; } @@ -108,22 +109,22 @@ public: _base_reg->setNativeTensor(ind, std::move(tensor)); } - void setNativeUserTensor(ir::OperandIndex ind, std::unique_ptr &&tensor) + void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr &&tensor) { assert(tensor); assert(!getITensor(ind)); // For the ind, tensor is not registered yet - _native_user_tensors[ind] = std::move(tensor); + _native_io_tensors[ind] = std::move(tensor); } - const ir::OperandIndexMap> &native_user_tensors() + const ir::OperandIndexMap> &native_io_tensors() { - return _native_user_tensors; + return _native_io_tensors; } std::shared_ptr base_reg() { return _base_reg; } private: std::shared_ptr _base_reg; - ir::OperandIndexMap> _native_user_tensors; + ir::OperandIndexMap> _native_io_tensors; }; } // namespace controlflow diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc index de91b85..1d786c4 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc +++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc @@ -18,7 +18,6 @@ #include #include "exec/ExecutorBase.h" -#include #include "PermuteLayer.h" namespace onert @@ -30,16 +29,15 @@ namespace controlflow namespace kernel { -IfLayer::IfLayer(backend::ITensor *cond_tensor, const std::vector input_tensors, - const std::vector output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, +IfLayer::IfLayer(backend::IPortableTensor *cond_tensor, + const std::vector input_tensors, + const std::vector output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, exec::ExecutorMap *executor_map, const std::shared_ptr &external_context) : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors}, - _output_indices{output_indices}, _graph{graph}, _then_subg_index{then_subg_index}, - _else_subg_index{else_subg_index}, _executor_map{executor_map}, - _external_context{external_context} + _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, + _executor_map{executor_map}, _external_context{external_context} { // At this point, executor_map may not have executors of then subg and else subg } @@ -48,79 +46,34 @@ void IfLayer::run() { // Check condition // // If true - // // // Copy _input_tensors -> then subg's inputs - // // // Run then subg - // // // Copy outputs of then subg -> _output_tensors + // // // Set _input_tensors -> then-subg's inputs + // // // Set outputs of then-subg -> _output_tensors + // // // Run then-subg // // Else - // // // Copy _input_tensors -> else subg's inputs if false - // // // Run else subg - // // // Copy outputs of else subg -> _output_tensors - auto getResultCond = [](backend::ITensor *tensor) -> bool { + // // // Set _input_tensors -> else-subg's inputs + // // // Set outputs of else-subg -> _output_tensors + // // // Run else-subg + + auto getResultCond = [](backend::IPortableTensor *tensor) -> bool { bool ret = false; tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast(tensor.buffer()); }); return ret; }; - exec::ExecutorBase *subg_exec = nullptr; + exec::IExecutor *subg_exec = nullptr; bool cond_result = getResultCond(_cond_tensor); if (cond_result) { VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl; - subg_exec = nnfw::misc::polymorphic_downcast( - _executor_map->at(_then_subg_index).get()); + subg_exec = _executor_map->at(_then_subg_index).get(); } else { VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl; - subg_exec = nnfw::misc::polymorphic_downcast( - _executor_map->at(_else_subg_index).get()); - } - - const auto &subg_graph = subg_exec->graph(); - - std::vector src_tensors; - std::vector dst_tensors; - // Add tensors used in subgraph or contained in outputs of subgraph - assert(subg_graph.getInputs().size() == _input_tensors.size()); - assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size()); - for (uint32_t i = 0; i < subg_graph.getInputs().size(); ++i) - { - const auto &subg_input_index = subg_graph.getInputs().at(i); - const auto &subg_input = subg_graph.operands().at(subg_input_index); - if (subg_input.getUses().size() > 0 || subg_graph.getOutputs().contains(subg_input_index)) - { - src_tensors.emplace_back(_input_tensors.at(i)); - dst_tensors.emplace_back(subg_exec->getInputTensors().at(i)); - } + subg_exec = _executor_map->at(_else_subg_index).get(); } - const auto permute_op_input_to_subg_input = - std::make_shared(src_tensors, dst_tensors, _external_context); - - // Add tensors used as output of operation or contained in outputs of operation - src_tensors.clear(); - dst_tensors.clear(); - assert(_output_indices.size() == subg_exec->getOutputTensors().size()); - assert(_output_indices.size() == _output_tensors.size()); - for (uint32_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - src_tensors.emplace_back(subg_exec->getOutputTensors().at(i)); - dst_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_subg_output_to_op_output = - std::make_shared(src_tensors, dst_tensors, _external_context); - - // Remove copying of unused tensor - permute_op_input_to_subg_input->prepare(); - permute_subg_output_to_op_output->prepare(); - // Copy & run - subg_exec->execute(_input_tensors, permute_op_input_to_subg_input); - permute_subg_output_to_op_output->run(); + subg_exec->execute(_input_tensors, _output_tensors); VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index) << std::endl; } diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h index 9e944bc..967552f 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h +++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h @@ -17,7 +17,7 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__ #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__ -#include +#include #include #include "../ExternalContext.h" @@ -33,9 +33,9 @@ namespace kernel class IfLayer : public ::onert::exec::IFunction { public: - IfLayer(backend::ITensor *cond_tensor, const std::vector input_tensors, - const std::vector output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, + IfLayer(backend::IPortableTensor *cond_tensor, + const std::vector input_tensors, + const std::vector output_tensors, const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index, exec::ExecutorMap *executor_map, const std::shared_ptr &external_context); @@ -44,11 +44,9 @@ public: void run() override; private: - backend::ITensor *_cond_tensor; - const std::vector _input_tensors; - const std::vector _output_tensors; - const ir::OperandIndexSequence &_output_indices; - const ir::Graph &_graph; + backend::IPortableTensor *_cond_tensor; + const std::vector _input_tensors; + const std::vector _output_tensors; const ir::SubgraphIndex _then_subg_index; const ir::SubgraphIndex _else_subg_index; exec::ExecutorMap *_executor_map; diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h index 5d0f191..6fb69b6 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h +++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h @@ -17,7 +17,6 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__ #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__ -#include "backend/ITensorBuilder.h" #include "exec/IPermuteFunction.h" #include "exec/IExecutor.h" #include "../ExternalContext.h" diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc index a0d4786..a4b5aa5 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc +++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc @@ -16,6 +16,7 @@ #include "WhileLayer.h" +#include #include #include "exec/ExecutorBase.h" #include @@ -30,16 +31,15 @@ namespace controlflow namespace kernel { -WhileLayer::WhileLayer(const std::vector input_tensors, - const std::vector output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, +WhileLayer::WhileLayer(const std::vector input_tensors, + const std::vector output_tensors, const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map, + cpu_common::DynamicMemoryManager *dyn_memory_manager, const std::shared_ptr &external_context) : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index}, - _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors}, - _output_tensors{output_tensors}, _executor_map{executor_map}, - _external_context{external_context} + _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map}, + _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context} { // At this point, executor_map may not have executors of cond subg and body subg } @@ -56,164 +56,90 @@ void WhileLayer::run() // // Run cond subg // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" -> // "_dst_tensors" - auto cond_exec = nnfw::misc::polymorphic_downcast( - _executor_map->at(_cond_subg_index).get()); - auto body_exec = nnfw::misc::polymorphic_downcast( - _executor_map->at(_body_subg_index).get()); - - const auto &cond_graph = cond_exec->graph(); - const auto &body_graph = body_exec->graph(); - - std::vector input_tensors; - std::vector cond_input_tensors; - std::vector body_input_tensors; - std::vector body_output_tensors; - std::vector output_tensors; - - // Add only used tensors in cond subgraph - assert(cond_graph.getInputs().size() == _input_tensors.size()); - assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size()); - for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i)); - if (cond_input.getUses().size() > 0) - { - input_tensors.emplace_back(_input_tensors.at(i)); - cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i)); - } - } - const auto permute_op_input_to_cond_input = - std::make_shared(input_tensors, cond_input_tensors, _external_context); - - // Add only used tensors among outputs of while operation - assert(_output_indices.size() == _input_tensors.size()); - assert(_output_indices.size() == _output_tensors.size()); - input_tensors.clear(); - output_tensors.clear(); - for (size_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - input_tensors.emplace_back(_input_tensors.at(i)); - output_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_op_input_to_op_output = - std::make_shared(input_tensors, output_tensors, _external_context); - - // Add all tensors with unused tensors in body subgraph because unused input tensors will be - // copied output tensors in body subgraph - assert(_input_tensors.size() == body_exec->getInputTensors().size()); - input_tensors = _input_tensors; - body_input_tensors = body_exec->getInputTensors(); - const auto permute_op_input_to_body_input = - std::make_shared(input_tensors, body_input_tensors, _external_context); - - // Add only used tensors in cond subgraph - assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size()); - assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size()); - body_output_tensors.clear(); - cond_input_tensors.clear(); - for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i) - { - const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i)); - if (cond_input.getUses().size() > 0) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i)); - } - } - const auto permute_body_output_to_cond_input = - std::make_shared(body_output_tensors, cond_input_tensors, _external_context); - - // Add only used tensors in body subgraph - assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size()); - assert(body_graph.getInputs().size() == body_exec->getInputTensors().size()); - body_output_tensors.clear(); - body_input_tensors.clear(); - for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i) - { - const auto &body_input_index = body_graph.getInputs().at(i); - const auto &body_input = body_graph.operands().at(body_input_index); - if (body_input.getUses().size() > 0 && - !body_exec->graph().getOutputs().contains(body_input_index)) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - body_input_tensors.emplace_back(body_exec->getInputTensors().at(i)); - } - } - const auto permute_body_output_to_body_input = - std::make_shared(body_output_tensors, body_input_tensors, _external_context); - - // Add only used tensors among outputs of while operation - assert(_output_indices.size() == body_exec->getOutputTensors().size()); - assert(_output_indices.size() == _output_tensors.size()); - body_output_tensors.clear(); - output_tensors.clear(); - for (size_t i = 0; i < _output_indices.size(); ++i) - { - const auto &output_index = _output_indices.at(i); - const auto &output = _graph.operands().at(output_index); - if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index)) - { - body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i)); - output_tensors.emplace_back(_output_tensors.at(i)); - } - } - const auto permute_body_output_to_op_output = - std::make_shared(body_output_tensors, output_tensors, _external_context); + auto cond_exec = _executor_map->at(_cond_subg_index).get(); + auto body_exec = _executor_map->at(_body_subg_index).get(); - // Remove copying of unused tensor - permute_op_input_to_cond_input->prepare(); - permute_op_input_to_op_output->prepare(); - permute_op_input_to_body_input->prepare(); - permute_body_output_to_cond_input->prepare(); - permute_body_output_to_body_input->prepare(); - permute_body_output_to_op_output->prepare(); + // Need a temp tensor to hold the cond subgraph output + assert(cond_exec->getOutputTensors().size() == 1); + auto cond_output_tensor = [&]() { + auto cond_output = cond_exec->getOutputTensors().at(0); + auto tensor = std::make_unique(cond_output->orig_info(), cond_output->orig_layout(), + _dyn_memory_manager); + tensor->set_dynamic(); + tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size())); + return tensor; + }(); VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl; - cond_exec->execute(_input_tensors, permute_op_input_to_cond_input); + cond_exec->execute(_input_tensors, {cond_output_tensor.get()}); VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl; - assert(cond_exec->getOutputTensors().size() == 1); - auto &cond_output_tensor = cond_exec->getOutputTensors().at(0); auto getResultCond = [](backend::ITensor *tensor) -> bool { bool ret = false; tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast(tensor.buffer()); }); return ret; }; + std::vector op_inputs(_input_tensors.begin(), _input_tensors.end()); + std::vector op_outputs(_output_tensors.begin(), _output_tensors.end()); + // Copying body inputs to outputs when the loop body is never executed + if (!getResultCond(cond_output_tensor.get())) + { + PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context}; + copy_body_inputs_to_op_outputs.run(); + return; + } + + // Need some temp tensors to hold the body subgraph output + std::vector> temp_outputs_o; + std::vector temp_outputs; + for (auto io_tensor : body_exec->getOutputTensors()) + { + auto tensor = std::make_unique(io_tensor->orig_info(), io_tensor->orig_layout(), + _dyn_memory_manager); + tensor->set_dynamic(); + tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size())); + temp_outputs.push_back(tensor.get()); + temp_outputs_o.push_back(std::move(tensor)); + } + + std::vector body_outputs(temp_outputs.begin(), temp_outputs.end()); + PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context}; + const auto body_execute_with_op_inputs = [&]() { VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl; - body_exec->execute(_input_tensors, permute_op_input_to_body_input); + body_exec->execute(_input_tensors, temp_outputs); VERBOSE(While) << "Return from $" << _body_subg_index << std::endl; }; const auto body_execute_with_body_outputs = [&]() { VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl; - body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input); + body_exec->execute(_output_tensors, temp_outputs); VERBOSE(While) << "Return from $" << _body_subg_index << std::endl; }; std::function body_execute = body_execute_with_op_inputs; const auto cond_execute = [&]() { VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl; - cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input); + cond_exec->execute(_output_tensors, {cond_output_tensor.get()}); VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl; }; - auto permute_to_outputs_fn = permute_op_input_to_op_output; // Loop while Cond subgraph's output is true - while (getResultCond(cond_output_tensor)) + while (getResultCond(cond_output_tensor.get())) { body_execute(); + copy_body_outputs_to_op_outputs.run(); cond_execute(); body_execute = body_execute_with_body_outputs; - permute_to_outputs_fn = permute_body_output_to_op_output; } - permute_to_outputs_fn->run(); + + // Clean-up the temp tensors + _dyn_memory_manager->deallocate(cond_output_tensor.get()); + for (auto tensor : temp_outputs) + { + _dyn_memory_manager->deallocate(tensor); + } } } // namespace kernel diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h index 8f82bd9..d3924c8 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h +++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h @@ -17,13 +17,15 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__ #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__ -#include +#include #include #include #include #include #include "../ExternalContext.h" +#include "backend/cpu_common/MemoryManager.h" + namespace onert { namespace backend @@ -36,11 +38,10 @@ namespace kernel class WhileLayer : public ::onert::exec::IFunction { public: - WhileLayer(const std::vector input_tensors, - const std::vector output_tensors, - const ir::OperandIndexSequence &output_indices, const ir::Graph &graph, + WhileLayer(const std::vector input_tensors, + const std::vector output_tensors, const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index, - exec::ExecutorMap *executor_map, + exec::ExecutorMap *executor_map, cpu_common::DynamicMemoryManager *dyn_memory_manager, const std::shared_ptr &external_context); public: @@ -49,11 +50,10 @@ public: private: const ir::SubgraphIndex _cond_subg_index; const ir::SubgraphIndex _body_subg_index; - const ir::OperandIndexSequence &_output_indices; - const ir::Graph &_graph; - const std::vector _input_tensors; - const std::vector _output_tensors; + const std::vector _input_tensors; + const std::vector _output_tensors; exec::ExecutorMap *_executor_map; + cpu_common::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors const std::shared_ptr _external_context; }; diff --git a/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc new file mode 100644 index 0000000..732b03c --- /dev/null +++ b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/cpu_common/BackendContextHelpers.h" diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc similarity index 51% rename from runtime/onert/backend/cpu/ConstantInitializer.cc rename to runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc index 6f6eb77..610ba5f 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.cc +++ b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc @@ -14,19 +14,19 @@ * limitations under the License. */ -#include "ConstantInitializer.h" -#include "Tensor.h" +#include "backend/cpu_common/ConstantInitializer.h" +#include "backend/cpu_common/Tensor.h" namespace onert { namespace backend { -namespace cpu +namespace cpu_common { ConstantInitializer::ConstantInitializer(const ir::Operands &operands, const std::shared_ptr &tensor_reg) - : IConstantInitializer{operands}, _tensor_reg{tensor_reg} + : ConstantInitializerBase{operands}, _tensor_reg{tensor_reg} { // DO NOTHING } @@ -53,42 +53,6 @@ void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &in }; } -void ConstantInitializer::visit(const ir::operation::Conv2D &node) -{ - const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL); - const auto &kernel_obj = _operands.at(kernel_index); - registerExternalInitializer(kernel_index, kernel_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS); - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); -} - -void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) -{ - const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL); - const auto &kernel_obj = _operands.at(kernel_index); - registerExternalInitializer(kernel_index, kernel_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS); - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); -} - -void ConstantInitializer::visit(const ir::operation::FullyConnected &node) -{ - const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT); - const auto &weight_obj = _operands.at(weight_index); - registerExternalInitializer(weight_index, weight_obj); - - const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS); - if (!bias_index.undefined()) - { - const auto &bias_obj = _operands.at(bias_index); - registerExternalInitializer(bias_index, bias_obj); - } -} - -} // namespace cpu +} // namespace cpu_common } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/IConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc similarity index 86% rename from runtime/onert/core/src/backend/IConstantInitializer.cc rename to runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc index 6fb9757..15c2dfe 100644 --- a/runtime/onert/core/src/backend/IConstantInitializer.cc +++ b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "backend/IConstantInitializer.h" +#include "backend/cpu_common/ConstantInitializerBase.h" #include @@ -24,9 +24,11 @@ namespace onert { namespace backend { +namespace cpu_common +{ -void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index, - const ir::Operand &obj) +void ConstantInitializerBase::registerCopyInitializer(const ir::OperandIndex &index, + const ir::Operand &obj) { // For only CONSTANTS // TODO Add to check if tensor has been allocated @@ -67,8 +69,8 @@ void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index } } -void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index, - const ir::Operand &obj) +void ConstantInitializerBase::registerPermuteInitializer(const ir::OperandIndex &index, + const ir::Operand &obj) { // For only CONSTANTS // TODO Add to check if tensor has been allocated @@ -82,27 +84,27 @@ void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &in switch (type) { case DataType::FLOAT32: - _init_map[index] = std::bind(permuteInit, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit, _1, _2, _current_layout); break; case DataType::INT32: - _init_map[index] = std::bind(permuteInit, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit, _1, _2, _current_layout); break; case DataType::UINT32: - _init_map[index] = std::bind(permuteInit, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit, _1, _2, _current_layout); break; case DataType::BOOL8: case DataType::QUANT_UINT8_ASYMM: - _init_map[index] = std::bind(permuteInit, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit, _1, _2, _current_layout); break; case DataType::QUANT_INT8_SYMM: case DataType::QUANT_INT8_ASYMM: - _init_map[index] = std::bind(permuteInit, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit, _1, _2, _current_layout); break; case DataType::FLOAT16: - _init_map[index] = std::bind(permuteInit, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit, _1, _2, _current_layout); break; case DataType::INT64: - _init_map[index] = std::bind(permuteInit, _1, _2, _current_op_seq_layout); + _init_map[index] = std::bind(permuteInit, _1, _2, _current_layout); break; default: throw std::runtime_error("Not supported, yet"); @@ -110,5 +112,6 @@ void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &in } } +} // namespace cpu_common } // namespace backend } // namespace onert diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc index cac43ba..8c5c46a 100644 --- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc @@ -17,6 +17,7 @@ #include "backend/cpu_common/StaticTensorManager.h" #include "backend/cpu_common/DynamicTensorManager.h" +#include "backend/cpu_common/Tensor.h" #include namespace onert @@ -27,31 +28,13 @@ namespace cpu_common { StaticTensorManager::StaticTensorManager(const std::shared_ptr ®, - DynamicMemoryManager *dynamic_mem_mgr) - : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}, - _dynamic_mem_mgr{dynamic_mem_mgr} + DynamicTensorManager *dynamic_tensor_manager) + : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, + _dynamic_tensor_manager{dynamic_tensor_manager} { // DO NOTHING } -void StaticTensorManager::allocateConsts(void) -{ - for (auto &pair : _tensors->native_tensors()) - { - const auto &ind = pair.first; - auto tensor = pair.second.get(); - if (_as_constants[ind]) - { - auto mem_alloc = _const_mgr->allocate(_tensors->getITensor(ind), tensor->total_size()); - tensor->setBuffer(mem_alloc); - auto buffer = mem_alloc->base(); - VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value() - << "): " << static_cast(buffer) - << "size : " << tensor->total_size() << std::endl; - } - } -} - void StaticTensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); @@ -65,14 +48,12 @@ void StaticTensorManager::allocateNonconsts(void) auto *buffer = _nonconst_mgr->getBuffer(ind); tensor->setBuffer(buffer); - VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value() - << "): " << static_cast(buffer) << std::endl; + VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value() + << "): " << static_cast(buffer) << std::endl; } } } -void StaticTensorManager::deallocateConsts(void) { _const_mgr->deallocate(); } - void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); } void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, @@ -80,8 +61,17 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, bool as_const) { assert(!_tensors->getNativeTensor(ind)); - auto tensor = std::make_unique(tensor_info, backend_layout, _dynamic_mem_mgr); - _tensors->setNativeTensor(ind, std::move(tensor)); + if (as_const) + { + auto tensor = std::make_unique(tensor_info, backend_layout); + _tensors->setNativeTensor(ind, std::move(tensor)); + } + else + { + auto tensor = std::make_unique(tensor_info, backend_layout, + _dynamic_tensor_manager->dynamic_mem_mgr().get()); + _tensors->setNativeTensor(ind, std::move(tensor)); + } _as_constants[ind] = as_const; } diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/cpu_common/Tensor.cc index d3dcf9a..e412cb7 100644 --- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc +++ b/runtime/onert/core/src/backend/cpu_common/Tensor.cc @@ -95,3 +95,20 @@ bool Tensor::applyShape(const ir::Shape &new_shape) } // namespace cpu_common } // namespace backend } // namespace onert + +// ExternalTensor + +namespace onert +{ +namespace backend +{ +namespace cpu_common +{ + +// `dynamic_cast` not working across library boundaries on NDK +// With this as a key function, `dynamic_cast` works across dl +ExternalTensor::~ExternalTensor() {} + +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc index 0093f50..ea45cbe 100644 --- a/runtime/onert/core/src/compiler/BackendManager.cc +++ b/runtime/onert/core/src/compiler/BackendManager.cc @@ -69,55 +69,73 @@ void BackendManager::loadBackend(const std::string &backend) return; } - // TODO Remove indentation + const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT; + void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL); + + if (handle == nullptr) { - const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT; - void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL); + VERBOSE(BackendManager) << "Failed to load backend '" << backend << "' - " << dlerror() << "\n"; + return; + } - if (handle == nullptr) + VERBOSE(BackendManager) << "Successfully loaded '" << backend << "'(" << backend_so << ")\n"; + + { + // load object creator function + auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create"); + if (backend_create == nullptr) { - VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl; + // TODO replace `fprintf` with `VERBOSE` + fprintf(stderr, "BackendManager: unable to find function `onert_backend_create` : %s\n", + dlerror()); + dlclose(handle); return; } - VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n"; - + // load object creator function + auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy"); + if (backend_destroy == nullptr) { - // load object creator function - auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create"); - if (backend_create == nullptr) - { - fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n", - dlerror()); - abort(); - } - - // load object creator function - auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy"); - if (backend_destroy == nullptr) - { - fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n", - dlerror()); - abort(); - } - - auto backend_object = - std::unique_ptr(backend_create(), backend_destroy); - bool initialized = backend_object->config()->initialize(); // Call initialize here? - if (!initialized) - { - VERBOSE_F() << backend.c_str() << " backend initialization failed. Don't use this backend" - << std::endl; - dlclose(handle); - return; - } - _gen_map.emplace(backend_object->config()->id(), std::move(backend_object)); + // TODO replace `fprintf` with `VERBOSE` + fprintf(stderr, "BackendManager: unable to find `function onert_backend_destroy` : %s\n", + dlerror()); + dlclose(handle); + return; } - // Save backend handle (avoid warning by handle lost without dlclose()) - auto u_handle = std::unique_ptr{handle, [](void *h) { dlclose(h); }}; - _handle_map.emplace(backend, std::move(u_handle)); + auto backend_object = + std::unique_ptr(backend_create(), backend_destroy); + bool initialized = backend_object->config()->initialize(); // Call initialize here? + if (!initialized) + { + VERBOSE(BackendManager) << backend.c_str() + << " backend initialization failed. Don't use this backend" + << std::endl; + dlclose(handle); + return; + } + _gen_map.emplace(backend_object->config()->id(), std::move(backend_object)); } + + // Save backend handle (avoid warning by handle lost without dlclose()) + + // NOTE This is a workaround for clang-format3.9 (seems like it does not understand + // "by-copy capture with an initializer" + // clang-format off + auto u_handle = std::unique_ptr{ + handle, [id = backend, filename = backend_so](void *h) { + if (dlclose(h) == 0) + { + VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n"; + } + else + { + VERBOSE(BackendManager) + << "Failed to unload backend '" << id << "'- " << dlerror() << "\n"; + } + }}; +// clang-format on +_handle_map.emplace(backend, std::move(u_handle)); } backend::Backend *BackendManager::get(const std::string &key) diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc index c2844bd..7eeb14a 100644 --- a/runtime/onert/core/src/compiler/Compiler.cc +++ b/runtime/onert/core/src/compiler/Compiler.cc @@ -41,6 +41,30 @@ #include "ir/OperationDumper.h" #include "misc/string_helpers.h" +namespace +{ + +using namespace onert; + +std::string getOpBackends(std::unordered_map &opcode_to_backend) +{ + std::unordered_map::iterator it; + std::string opbackends; + + for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it) + { + if (!opbackends.empty()) + opbackends = opbackends + ", "; + + auto opcode = it->first; + const std::string opname = ir::toString(opcode); + opbackends += opname + "=" + it->second; + } + return opbackends; +} + +} // namespace + namespace onert { @@ -51,7 +75,6 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) { CompilerOptions options; options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';'); - options.is_primary_subgraph = false; options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH); options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP); options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE); @@ -108,13 +131,15 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs) return options; } -Compiler::Compiler(const std::shared_ptr &subgs) +Compiler::Compiler(const std::shared_ptr &subgs, util::TracingCtx *tracing_ctx) : _subgraphs{subgs}, _state{State::CREATED} { // Set default values for CompilerOptions // All these default values should not be fetched from Env, when we stop supporting Android NN // API. _options = fetchCompilerOptionsFromGlobalConfig(*subgs); + + _options.tracing_ctx = tracing_ctx; } void Compiler::enableToFp16() { _options.fp16_enable = true; } @@ -132,12 +157,10 @@ std::shared_ptr Compiler::compile(void) { // Set control flow backend for control flow operators { - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = - backend::controlflow::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = - backend::controlflow::Config::ID; - _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = - backend::controlflow::Config::ID; + auto &cfid = backend::controlflow::Config::ID; + _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = cfid; + _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = cfid; + _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = cfid; } // FIXME This is a workaround for bcq operations, should remove it @@ -157,7 +180,11 @@ std::shared_ptr Compiler::compile(void) VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl; VERBOSE(Compiler) << "op_seq_max_node : " << _options.op_seq_max_node << std::endl; VERBOSE(Compiler) << "executor : " << _options.executor << std::endl; - VERBOSE(Compiler) << "manual_scheduler_options : (Too many things to print)" << std::endl; + VERBOSE(Compiler) << "manual backend_for_all : " + << _options.manual_scheduler_options.backend_for_all << std::endl; + VERBOSE(Compiler) << "manual_scheduler_options : " + << getOpBackends(_options.manual_scheduler_options.opcode_to_backend) + << std::endl; VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl; VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl; VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl; @@ -202,7 +229,6 @@ std::shared_ptr Compiler::compile(void) // Lower: Assign backend std::unordered_map> lowered_subgs; _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { - _options.is_primary_subgraph = (index == ir::SubgraphIndex{0}); onert::dumper::dot::DotDumper dot_dumper(subg, dump_level); dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value())); @@ -230,6 +256,14 @@ std::shared_ptr Compiler::compile(void) _subgraphs.reset(); + for (auto &pair : lowered_subgs) + { + const auto &subg_index = pair.first; + auto &lowered_subg = pair.second; + onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level); + dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value())); + } + // Shape inference. { const auto primary_subg_idx = ir::SubgraphIndex{0}; @@ -266,12 +300,8 @@ std::shared_ptr Compiler::compile(void) auto &lowered_subg = pair.second; auto indexed_ranks = lowered_subg->indexed_ranks(); - _options.is_primary_subgraph = (subg_index == ir::SubgraphIndex{0}); - - onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level); - dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value())); - - ir::OperationDumper dumper("START SUBGRAPH " + std::to_string(subg_index.value())); + ir::OperationDumper dumper("Executor generation of Subgraph " + + std::to_string(subg_index.value())); lowered_subg->graph().operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); }); auto executor = std::unique_ptr{ diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index bb325ff..356feed 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -16,6 +16,7 @@ #include "ExecutorFactory.h" +#include #include #include "exec/ExecutionObservers.h" #include "exec/LinearExecutor.h" @@ -25,16 +26,13 @@ #include "compiler/ExecutionBuilder.h" #include "exec/ExecTime.h" #include "compiler/Linear.h" -#include "compiler/TensorBuilders.h" -#include "backend/IConstantInitializer.h" -#include "backend/IKernelGenerator.h" -#include "backend/IOptimizer.h" #include "backend/IPortableTensor.h" -#include "backend/ITensorRegister.h" #include "backend/controlflow/Config.h" #include "backend/controlflow/KernelGenerator.h" #include "backend/controlflow/UserTensor.h" #include "backend/controlflow/TensorBuilder.h" +#include "util/TracingCtx.h" + #include namespace onert @@ -66,6 +64,36 @@ private: std::shared_ptr _config; }; +void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph, + const ir::OperandIndexSequence &indices) +{ + // TODO Store controlflow backend in BackendContext + std::shared_ptr cf_tensor_reg; + for (const auto &e : lowered_graph.backend_contexts()) + { + auto backend = e.first; + auto &context = e.second; + if (backend->config()->id() == backend::controlflow::Config::ID) + { + cf_tensor_reg = + std::dynamic_pointer_cast(context->tensor_registry); + } + } + assert(cf_tensor_reg); + + for (auto ind : indices) + { + const auto &operand = lowered_graph.graph().operands().at(ind); + auto tensor = std::make_unique( + operand.info(), + ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */ + ); + + // Add tensor to controlflow TensorRegistry. + cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor)); + } +} + } // namespace } // namespace onert @@ -134,97 +162,6 @@ void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_g } } -void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph, - const std::vector &order) -{ - for (const auto index : order) - { - const auto &op_seq = lowered_graph->op_seqs().at(index); - const auto backend = lowered_graph->getLowerInfo(index)->backend(); - const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register; - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs(); - - if (tensor_register) - { - // Custom registration - tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo()); - } - else - { - // Default registration - for (const auto op_idx : op_seq) - { - const auto &op = lowered_graph->graph().operations().at(op_idx); - for (const auto &index : - (op.getInputs() | ir::Remove::UNDEFINED) + (op.getOutputs() | ir::Remove::UNDEFINED)) - { - if (!tensor_builder->isRegistered(index) && !model_io.contains(index)) - { - const auto &operand_lower_info = - lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement(); - - // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl) - // op.getOutputs() of permute (CPU) returns tensor A - // but tensor A belongs to the backend of acl_cl. - // So, we have to make this tensor NOT registered for CPU. - if (operand_lower_info.backend() != backend) - continue; - - const auto &obj = lowered_graph->graph().operands().at(index); - const auto frontend_layout = op_seq.getLayout(); - const auto backend_layout = operand_lower_info.layout(); - ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout), - obj.typeInfo(), obj.info().memAllocType(), - obj.isConstant()}; - tensor_builder->registerTensorInfo(index, backend_info, backend_layout); - } - } - } - } - } -} - -std::vector -ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph, - const ir::OperandIndexSequence &indices) -{ - std::vector ret; - - // TODO Store controlflow backend in BackendContext - std::shared_ptr cf_tensor_builder; - std::shared_ptr cf_tensor_reg; - for (const auto &e : lowered_graph.backend_contexts()) - { - auto backend = e.first; - auto &context = e.second; - if (backend->config()->id() == backend::controlflow::Config::ID) - { - cf_tensor_builder = - std::dynamic_pointer_cast(context->tensor_builder); - cf_tensor_reg = - std::dynamic_pointer_cast(context->tensor_registry); - } - } - assert(cf_tensor_builder); - assert(cf_tensor_reg); - - for (auto ind : indices) - { - const auto &operand = lowered_graph.graph().operands().at(ind); - auto tensor = std::make_unique( - operand.info(), - ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */ - ); - - // Add tensor to controlflow TensorRegistry. - cf_tensor_reg->setNativeUserTensor(ind, std::move(tensor)); - auto *itensor = cf_tensor_reg->getITensor(ind); - ret.push_back(itensor); - } - return ret; -} - void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph) { TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true}; @@ -260,110 +197,78 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr lo initializeBackendContext(lowered_graph.get()); - // linearize - assert(!lowered_graph->graph().isBuildingPhase()); - - /************************************************* - * Backend dependent analysis & optimization phase - *************************************************/ - - for (auto &pair : backend_contexts) - { - auto &optimizer = pair.second->optimizer; - if (optimizer) - optimizer->optimize(); - } + TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; - /********************************************************** - * Backend dependent analysis & optimization phase finished - **********************************************************/ + assert(!lowered_graph->graph().isBuildingPhase()); - /*********************** - * Code generation phase - ***********************/ + initializeSubgraphIOTensors( + *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) | + ir::Remove::DUPLICATED | ir::Remove::UNDEFINED); + // linearize auto order = Linear::linearize(*lowered_graph); - runTensorRegistration(lowered_graph.get(), order); - - std::vector input_tensors; - std::vector output_tensors; - if (options.is_primary_subgraph) - { - input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs()); - output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs()); - } - Linear::dump(*lowered_graph, order); - Linear::planTensors(*lowered_graph, order); - TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true}; - TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; - - for (auto &tensor_builder : tensor_builders) + for (auto &pair : backend_contexts) { - tensor_builder->prepare(); + pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo()); } prepareMigrantTensors(*lowered_graph); - ExecutionBuilder builder; - - // Generate kernels - lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index, - const ir::OpSequence &op_seq) { - auto lower_info = lowered_graph->getLowerInfo(op_seq_index); - auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen; - // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow - auto cf_kernel_gen = dynamic_cast(kernel_gen.get()); - if (cf_kernel_gen != nullptr) + // Give some runtime objects to controlflow KernelGenerator + for (auto &pair : backend_contexts) + { + auto cf_context = dynamic_cast(pair.second.get()); + if (cf_context != nullptr) { + auto cf_kernel_gen = cf_context->kernel_gen; cf_kernel_gen->setTensorRegistries(tensor_regs); cf_kernel_gen->setExecutorMap(executor_map); } - auto fn_seq = kernel_gen->generate(op_seq); - if (options.he_profiling_mode) - { - fn_seq->wrap(lower_info->backend()->config()); - } - builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)}); - }); - - for (auto &tensor_builder : tensor_builders) - { - tensor_builder->allocate(); } + ExecutionBuilder builder; + + // Adjust the order of backends for the upcoming iteration + std::deque> ordered_contexts; for (auto &pair : backend_contexts) { - pair.second->initConsts(); + // NOTE controlflow backend must be processed lastly. + // This is because of Permute layer's specialty which is the only operation that could have + // different ITensor objects for the input and the output. And it requires all other backends' + // tensors are ready to use. + if (pair.first->config()->id() == "controlflow") + ordered_contexts.emplace_back(pair.first, pair.second.get()); + else + ordered_contexts.emplace_front(pair.first, pair.second.get()); } - lowered_graph->graph().operands().iterate( - [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - auto code_map = builder.releaseCodeMap(); - - for (auto &it : code_map) + // Generate kernels + for (auto &pair : ordered_contexts) { - auto op_seq_index = it.first; - auto &fn_seq = it.second.fn_seq; - - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend(); - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - tensor_builder->postFunctionPrepare(); - }); + auto codes = pair.second->genKernels(order, lowered_graph->op_seqs()); + for (auto &pair : codes) + { + auto &op_seq_ind = pair.first; + auto &fn_seq = pair.second; + auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind); + auto lower_info = lowered_graph->getLowerInfo(op_seq_ind); + if (options.he_profiling_mode) + fn_seq->wrap(lower_info->backend()->config()); + builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)}); + } } - auto exec = - new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, - std::move(code_map), order}; + auto code_map = builder.releaseCodeMap(); + + auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), + order, options.tracing_ctx}; if (!options.trace_filepath.empty()) { - std::unique_ptr ctp = - std::make_unique(options.trace_filepath, exec->graph()); + std::unique_ptr ctp = std::make_unique( + options.trace_filepath, exec->graph(), options.tracing_ctx); exec->addObserver(std::move(ctp)); } @@ -378,100 +283,81 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( initializeBackendContext(lowered_graph.get()); - auto order = Linear::linearize(*lowered_graph); - runTensorRegistration(lowered_graph.get(), order); - - std::vector input_tensors; - std::vector output_tensors; - if (options.is_primary_subgraph) - { - input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs()); - output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs()); - } - - TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true}; TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; - // To make tensors never be deallocated, this is a workaround to use static memory planner - for (auto &tensor_builder : tensor_builders) - { - lowered_graph->graph().operands().iterate( - [&](const ir::OperandIndex &ind, const ir::Operand &) { - if (tensor_builder->isRegistered(ind)) - { - tensor_builder->notifyFirstUse(ind); - } - }); - } + assert(!lowered_graph->graph().isBuildingPhase()); + + initializeSubgraphIOTensors( + *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) | + ir::Remove::DUPLICATED | ir::Remove::UNDEFINED); - for (auto &tensor_builder : tensor_builders) + // linearize + // This order is just for giving topological order info to the backens + // TODO When we pass a partial graph to a backend, we can remove this + auto order = Linear::linearize(*lowered_graph); + for (auto &pair : backend_contexts) { - tensor_builder->prepare(); + pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo()); } prepareMigrantTensors(*lowered_graph); - ExecutionBuilder builder; - - // Generate kernels - lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index, - const ir::OpSequence &op_seq) { - auto lower_info = lowered_graph->getLowerInfo(op_seq_index); - auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen; - // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow - auto cf_kernel_gen = dynamic_cast(kernel_gen.get()); - if (cf_kernel_gen != nullptr) + // Give some runtime objects to controlflow KernelGenerator + for (auto &pair : backend_contexts) + { + auto cf_context = dynamic_cast(pair.second.get()); + if (cf_context != nullptr) { - assert(cf_kernel_gen != nullptr); + auto cf_kernel_gen = cf_context->kernel_gen; cf_kernel_gen->setTensorRegistries(tensor_regs); cf_kernel_gen->setExecutorMap(executor_map); } - auto fn_seq = kernel_gen->generate(op_seq); - if (options.he_profiling_mode) - { - fn_seq->wrap(lower_info->backend()->config()); - } - builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)}); - }); - - for (const auto &tensor_builder : tensor_builders) - { - tensor_builder->allocate(); } + ExecutionBuilder builder; + + // Adjust the order of backends for the upcoming iteration + std::deque> ordered_contexts; for (auto &pair : backend_contexts) { - pair.second->initConsts(); + // NOTE controlflow backend must be processed lastly. + // This is because of Permute layer's specialty which is the only operation that could have + // different ITensor objects for the input and the output. And it requires all other backends' + // tensors are ready to use. + if (pair.first->config()->id() == "controlflow") + ordered_contexts.emplace_back(pair.first, pair.second.get()); + else + ordered_contexts.emplace_front(pair.first, pair.second.get()); } - lowered_graph->graph().operands().iterate( - [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); }); - - auto code_map = builder.releaseCodeMap(); - - for (auto &it : code_map) + // Generate kernels + for (auto &pair : ordered_contexts) { - auto op_seq_index = it.first; - auto &fn_seq = it.second.fn_seq; - - fn_seq->iterate([&](exec::IFunction &ifunc) { - ifunc.prepare(); - auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend(); - auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; - tensor_builder->postFunctionPrepare(); - }); + auto codes = pair.second->genKernels(order, lowered_graph->op_seqs()); + for (auto &pair : codes) + { + auto &op_seq_ind = pair.first; + auto &fn_seq = pair.second; + auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind); + auto lower_info = lowered_graph->getLowerInfo(op_seq_ind); + if (options.he_profiling_mode) + fn_seq->wrap(lower_info->backend()->config()); + builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)}); + } } + auto code_map = builder.releaseCodeMap(); + exec::ExecutorBase *exec = nullptr; if (parallel) { - exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors, - tensor_regs, std::move(code_map)}; + exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), + options.tracing_ctx}; } else { - auto dataflow_exec = new exec::DataflowExecutor{ - std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, std::move(code_map)}; + auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs, + std::move(code_map), options.tracing_ctx}; if (options.he_profiling_mode) { std::vector backends; @@ -489,8 +375,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( if (!options.trace_filepath.empty()) { - std::unique_ptr ctp = - std::make_unique(options.trace_filepath, exec->graph()); + std::unique_ptr ctp = std::make_unique( + options.trace_filepath, exec->graph(), options.tracing_ctx); exec->addObserver(std::move(ctp)); } diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h index e76b721..06dc691 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.h +++ b/runtime/onert/core/src/compiler/ExecutorFactory.h @@ -46,9 +46,6 @@ private: static void initializeBackendContext(compiler::LoweredGraph *lowered_graph); static void runTensorRegistration(compiler::LoweredGraph *lowered_graph, const std::vector &order); - static std::vector - initializeModelIOTensors(compiler::LoweredGraph &lowered_graph, - const ir::OperandIndexSequence &indices); static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph); static exec::IExecutor * createLinearExecutor(std::unique_ptr lowered_graph, diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc index 30c8f72..fdd2a76 100644 --- a/runtime/onert/core/src/compiler/Linear.cc +++ b/runtime/onert/core/src/compiler/Linear.cc @@ -19,8 +19,6 @@ #include "Linear.h" #include "backend/IConfig.h" -#include "backend/IConstantInitializer.h" -#include "backend/ITensorRegister.h" #include "backend/Backend.h" #include "util/logging.h" @@ -62,190 +60,5 @@ void Linear::dump(const compiler::LoweredGraph &lowered_graph, } } -void Linear::planTensors(const compiler::LoweredGraph &lowered_graph, - const std::vector &order) -{ - const auto &graph = lowered_graph.graph(); - ir::OperandIndexMap> tensor_builder_map; - - ir::OperandIndexMap uses_map; - ir::OperandIndexMap def_map; - ir::OperandIndexSequence constants; - - // Prepare scanning - graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) { - const auto lower_info = lowered_graph.getLowerInfo(ind); - // TODO Remove if onert doesn't support anymore such as - // GeneratedTests.reshape_quant8_weights_as_inputs - if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 && - !graph.getInputs().contains(ind)) - { - VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process." - << std::endl; - return; - } - - // Unused input of subgraph - // TODO Register unused input as nullptr in tensor_builder - if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 && - graph.getInputs().contains(ind)) - { - VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process." - << std::endl; - return; - } - - uses_map[ind] = obj.getUses().size(); - def_map[ind] = obj.getDef().valid() ? 1 : 0; - - bool is_const = obj.isConstant(); - if (is_const) - { - constants.append(ind); - } - - auto factor = lower_info->def_factors().getOnlyElement(); - auto backend = factor.backend(); - auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder; - if (!tensor_builder->isRegistered(ind)) - { - // These tensors do not exist in any op_seq (No use and def) - const auto info = obj.info(); - const auto backend_layout = factor.layout(); - // TODO Change tensor info to have permuted shape - tensor_builder->registerTensorInfo(ind, info, backend_layout); - } - - tensor_builder_map[ind] = tensor_builder; - }); - - const auto io_tensors = - (graph.getInputs() + graph.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED; - - // If a tensor is model output, increase the use of the tensor. - // This aim is same to above one. - for (const auto &ind : io_tensors) - { - uses_map[ind]++; - } - - // Start scanning to do notify{First|Last}Use for each tensor - - // If a tensor is a constant, increase the use of the tensor. - // It makes the tensor not be dealloced. It means these will be deallocated last. - // And allocate constant operands first - VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl; - for (const auto &ind : constants) - { - uses_map[ind]++; - tensor_builder_map[ind]->notifyFirstUse(ind); - } - - // Allocate Model's inputs - VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl; - for (const auto &ind : graph.getInputs() | ir::Remove::DUPLICATED) - { - auto tensor_builder = tensor_builder_map[ind]; - if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs - continue; - tensor_builder->notifyFirstUse(ind); - } - - // At each operation, - // 1. Scan DEF of outputs. If the DEF, allocate it - // 2. Scan DEF of inputs. If variable tensor, allocate it - // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0 - VERBOSE(LINEAR) << "TENSORS" << std::endl; - for (const auto op_seq_ind : order) - { - const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind); - for (const auto &op_idx : op_seq.operations()) - { - for (const auto &ind : graph.operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - assert(def_map.find(ind) != def_map.end()); - if (def_map[ind]) - { - def_map[ind] = 0; - tensor_builder_map[ind]->notifyFirstUse(ind); - } - } - - // Scan variable tensors - // This tensor has features like constant. But OperandInfo and LowerInfo treat them as - // non-constant because of less memory usage by memory planning in here - for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - const auto &operand = graph.operands().at(ind); - if (operand.info().isVariable()) - { - // The variable tensor with buffer is not supported yet - assert(operand.data() == nullptr); - assert(operand.getUses().size() == 1 && !operand.getDef().valid()); - assert(lowered_graph.getLowerInfo(ind)->def_factors().size() == 1 && - lowered_graph.getLowerInfo(ind)->use_factors().size() == 1); - assert(uses_map[ind] == 1 && def_map[ind] == 0); - tensor_builder_map[ind]->notifyFirstUse(ind); - } - } - - for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED | - ir::Remove::UNDEFINED) - { - assert(uses_map.find(ind) != uses_map.end()); - assert(uses_map[ind] > 0); - uses_map[ind]--; - if (uses_map[ind] == 0) - { - // plan for deallocation of static tensornode - tensor_builder_map[ind]->notifyLastUse(ind); - - // plan for deallocation of dynamic tensor - auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager(); - if (dyn_tensor_manager) - { - const auto *backend = - lowered_graph.getLowerInfo(ind)->def_factors().getOnlyElement().backend(); - auto &tensor_registry = lowered_graph.backend_contexts().at(backend)->tensor_registry; - auto *tensor = tensor_registry->getITensor(ind); - assert(tensor); - if (!io_tensors.contains(ind)) // I/O tensors cannot be deallocated - dyn_tensor_manager->planDealloc(op_idx, tensor); - } - } - } - } - } - - // Dispose and validate - for (const auto &ind : io_tensors) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder_map[ind]->notifyLastUse(ind); - } - } - - for (const auto &ind : constants) - { - --uses_map[ind]; - if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice - { - tensor_builder_map[ind]->notifyLastUse(ind); - } - } - - assert( - std::all_of(uses_map.begin(), uses_map.end(), - [](std::pair it) { return it.second == 0; })); - - assert( - std::all_of(def_map.begin(), def_map.end(), - [](std::pair it) { return it.second == 0; })); -} - } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h index 1e24cf9..56b42cc 100644 --- a/runtime/onert/core/src/compiler/Linear.h +++ b/runtime/onert/core/src/compiler/Linear.h @@ -22,7 +22,6 @@ #include "ir/OpSequences.h" #include "ir/Index.h" -#include "backend/ITensorBuilder.h" #include "compiler/LoweredGraph.h" namespace onert @@ -44,8 +43,6 @@ public: static std::vector linearize(const compiler::LoweredGraph &lowered_graph); static void dump(const compiler::LoweredGraph &lowered_graph, const std::vector &order); - static void planTensors(const compiler::LoweredGraph &lowered_graph, - const std::vector &order); }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc index 673d7d3..6d5210d 100644 --- a/runtime/onert/core/src/compiler/LoweredGraph.cc +++ b/runtime/onert/core/src/compiler/LoweredGraph.cc @@ -32,6 +32,7 @@ #include "compiler/BackendResolver.h" #include "compiler/ManualScheduler.h" #include "compiler/HEScheduler.h" +#include "util/TracingCtx.h" namespace onert { @@ -40,6 +41,13 @@ namespace compiler LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph} { + // set tracing_ctx for copied graph + if (options.tracing_ctx) + { + auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph); + options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value()); + } + bool linear_executor = (options.executor == "Linear"); // Build backend contexts @@ -112,7 +120,7 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option .run(); // Set LowerInfo for each operand from the operand::LowerInfo holder - manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph); + manipulateLowerInfo(operands_lower_info); dumpLowerInfo(); } @@ -126,7 +134,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option // Optimization passes pass::PassRunner{}.append(std::make_unique(*this)).run(); - VERBOSE(OpSequences) << "Dump after permutation insertion" << std::endl; + VERBOSE(LoweredGraph) << "Dump after permutation insertion" << std::endl; + for (auto operand : _graph.getInputs()) + VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl; + for (auto operand : _graph.getOutputs()) + VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl; dumpOpSequences(_op_seqs, _graph.operations()); // Graph verifications @@ -322,50 +334,22 @@ void LoweredGraph::makeOpSequences( } void LoweredGraph::manipulateLowerInfo( - ir::OperandIndexMap> &operands_lower_info, - bool is_primary) + ir::OperandIndexMap> &operands_lower_info) { const auto controlflow_backend = BackendManager::get().getControlflow(); - // TODO Rather than handling primary graph specially, - // let the permute inserted and remove it later - if (is_primary) + // TODO Rather than using NHWC Get frontend layout of this node from IR + auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC}; + for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) { - // TODO Rather than using NHWC Get frontend layout of this node from IR - auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC}; - for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) - { - auto &&lower_info = operands_lower_info.at(index); - assert(lower_info->def_factors().empty()); - lower_info->addDefPermuteFactor(factor); - } - for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED) - { - auto &&lower_info = operands_lower_info.at(index); - lower_info->addUsePermuteFactor(factor); - } + auto &&lower_info = operands_lower_info.at(index); + assert(lower_info->def_factors().empty()); + lower_info->addDefPermuteFactor(factor); } - else + for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED) { - for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) - { - auto &&lower_info = operands_lower_info.at(index); - if (!(lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0)) - { - // In case of not that Graph's input is not used in any operation and not the graph's - // output. - // In other words, it is not unused input in Graph. - lower_info->addDefPermuteFactor(*lower_info->use_factors().begin()); - } - else - { - // In case of that an operand is Graph's input and not input or output of any operation - lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{ - controlflow_backend, - ir::Layout::NHWC // TODO Get frontend layout of this node from IR - }); - } - } + auto &&lower_info = operands_lower_info.at(index); + lower_info->addUsePermuteFactor(factor); } for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED) { @@ -446,8 +430,11 @@ void LoweredGraph::dumpLowerInfo() sstream << (shape.dim(i)) << " "; } sstream << "}" << std::endl; - sstream << " - Def ir::Operations : " << def_ops << std::endl; - sstream << " - Use ir::Operations : " << use_ops << std::endl; + sstream << " - Def Operations : " << def_ops << std::endl; + sstream << " - Use Operations : " << use_ops << std::endl; + sstream << " - Data : " + << (object.data() ? (std::to_string(object.data()->size()) + " bytes") : "N/A") + << std::endl; sstream << " - Lower Info" << std::endl; sstream << " - Def Backends : " << def_layouts << std::endl; sstream << " - Use Backends : " << use_layouts << std::endl; diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc index ed49ee5..1f4a478 100644 --- a/runtime/onert/core/src/compiler/ManualScheduler.cc +++ b/runtime/onert/core/src/compiler/ManualScheduler.cc @@ -100,10 +100,11 @@ std::unique_ptr ManualScheduler::schedule(const ir::Graph &grap } // Dump final assignment - backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) { - VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": " - << backend.config()->id() << std::endl; - }); + WHEN_LOG_ENABLED(backend_resolver->iterate( + [&](const ir::OperationIndex &index, const backend::Backend &backend) { + VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": " + << backend.config()->id() << std::endl; + })); return backend_resolver; } diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc index c18178d..e0c9f52 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.cc +++ b/runtime/onert/core/src/compiler/ShapeValidator.cc @@ -37,7 +37,7 @@ namespace compiler { ShapeValidator::ShapeValidator(const ir::Graph &graph) - : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN} + : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN} { } @@ -59,7 +59,7 @@ void ShapeValidator::operator()() // creating Compiler assert(_graph.subgraphs() == nullptr); - _current_op_seq_layout = _graph.layout(); + _current_layout = _graph.layout(); _graph.operations().iterate( [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); @@ -90,7 +90,7 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); @@ -101,6 +101,14 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node) OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2); + if (node.getInputs().size() != 2) + { + const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)}; + OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2); + OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2)); + OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2); + } + OP_REQUIRES(input_shape.C == output_shape.C); } @@ -330,7 +338,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); @@ -355,7 +363,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToDepth &node) const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); const auto block_size = node.param().block_size; @@ -471,7 +479,7 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node) OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank()); OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank()); - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout); const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout); // The kernel has only IHWO layout on frontend @@ -516,7 +524,7 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node) const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)}; - const auto frontend_layout = _current_op_seq_layout; + const auto frontend_layout = _current_layout; const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout); const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout); diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h index f40c098..763cf7c 100644 --- a/runtime/onert/core/src/compiler/ShapeValidator.h +++ b/runtime/onert/core/src/compiler/ShapeValidator.h @@ -93,7 +93,7 @@ private: // TODO Remove _ctx field const ir::Graph &_graph; const ir::Operands &_ctx; - ir::Layout _current_op_seq_layout; + ir::Layout _current_layout; }; } // namespace compiler diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc index d3b083b..1f2c6f3 100644 --- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc +++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc @@ -142,12 +142,12 @@ void StaticShapeInferer::dump() } } -void StaticShapeInferer::visit(const ir::operation::ArgMax &op) +void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op) { - const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; + const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; const auto &input = _operands.at(input_idx); - const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; const auto &axis = _operands.at(axis_idx); // get mutable output operand @@ -166,7 +166,8 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op) axis_value = axis_value < 0 ? axis_value + rank : axis_value; // re-sizing output shape - ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis_value, rank); + ir::Shape new_shape = + shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank); output.info().shape(new_shape); } @@ -335,35 +336,47 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) // even when axis is constant, output shape should be recalculated since user might call // nnfw_set_input_tensorinfo(input, some_new_shape) - auto axis_buf = reinterpret_cast(axis.data()->base()); - assert(axis_buf); + auto axis_type = axis.typeInfo().type(); + assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64); + + assert(axis.data()->base()); + int32_t axis_value = + (axis_type == ir::DataType::INT32) + ? reinterpret_cast(axis.data()->base())[0] + : static_cast(reinterpret_cast(axis.data()->base())[0]); // re-sizing output shape - ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_buf[0]); + ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value); output.info().shape(new_shape); } void StaticShapeInferer::visit(const ir::operation::Fill &op) { - const auto input_idx{op.getInputs().at(ir::operation::Fill::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)}; + const auto &shape = _operands.at(shape_idx); const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (!input.isConstant()) + if (!shape.isConstant()) { output.info().setDynamic(); _return_has_dynamic_tensor = true; return; } - assert(input.typeInfo().type() == ir::DataType::INT32); + const auto dims_type = shape.typeInfo().type(); + assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64); - auto input_buf = reinterpret_cast(input.data()->base()); - assert(input_buf); + auto dims_buf = shape.data()->base(); + assert(dims_buf); + + const auto &dims_shape = shape.info().shape(); + auto new_shape = ((dims_type == ir::DataType::INT32) + ? shape_inference::inferFillShape( + dims_shape, reinterpret_cast(dims_buf)) + : shape_inference::inferFillShape( + dims_shape, reinterpret_cast(dims_buf))); - // re-sizing output shape - ir::Shape new_shape = shape_inference::inferFillShape(input.info().shape(), input_buf); output.info().shape(new_shape); } diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h deleted file mode 100644 index 3b0360b..0000000 --- a/runtime/onert/core/src/compiler/TensorBuilders.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_COMPILER_TENSOR_BUILDERS_H__ -#define __ONERT_COMPILER_TENSOR_BUILDERS_H__ - -#include -#include -#include "backend/BackendContext.h" -#include "backend/Backend.h" -#include "backend/controlflow/Config.h" -#include "backend/controlflow/TensorBuilder.h" -#include "util/logging.h" - -namespace onert -{ -namespace compiler -{ - -class TensorBuilders -{ -public: - TensorBuilders() = default; - - TensorBuilders(const onert::backend::BackendContexts &backend_contexts, bool include_controlflow) - { - for (const auto &e : backend_contexts) - { - if (e.first->config()->id() == backend::controlflow::Config::ID) - { - _cf_tensor_builder = std::dynamic_pointer_cast( - e.second->tensor_builder); - if (include_controlflow) - _tensor_builders.insert(e.second->tensor_builder); - } - else - { - _tensor_builders.insert(e.second->tensor_builder); - } - } - } - - std::unordered_set>::const_iterator begin() const - { - return _tensor_builders.cbegin(); - } - std::unordered_set>::const_iterator end() const - { - return _tensor_builders.cend(); - } - - std::shared_ptr getControlflowTensorBuilder() const - { - return _cf_tensor_builder; - } - -private: - std::unordered_set> _tensor_builders; - std::shared_ptr _cf_tensor_builder; -}; - -} // namespace compiler -} // namespace onert - -#endif // __ONERT_COMPILER_TENSOR_BUILDERS_H__ diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc index c83a72a..8467d51 100644 --- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc @@ -130,9 +130,11 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde // Generate output operand and permute operation auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo()); - // change model output if operand_index is model output index + // change model output if operand_index is model output index and the out operand is controlflow + // backend auto &model_outputs = _graph.getOutputs(); - if (model_outputs.contains(operand_index)) + const backend::Backend *cf_backend = compiler::BackendManager::get().getControlflow(); + if (model_outputs.contains(operand_index) && factor.backend() == cf_backend) { model_outputs.replace(operand_index, out_operand_index); } @@ -191,8 +193,10 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde const auto &node = _graph.operations().at(node_index); VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl; - VERBOSE_F() << " - Input (original) Operand : " << operand_index << std::endl; - VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << std::endl; + VERBOSE_F() << " - Input (original) Operand : " << operand_index << "(" + << input_factor.backend()->config()->id() << ")" << std::endl; + VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << "(" + << factor.backend()->config()->id() << ")" << std::endl; // OpSequence { diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc index 53bc3c2..b81a757 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.cc +++ b/runtime/onert/core/src/exec/DataflowExecutor.cc @@ -78,11 +78,10 @@ bool DataflowExecutor::noWaitingJobs() } DataflowExecutor::DataflowExecutor(std::unique_ptr lowered_graph, - const std::vector &input_tensors, - const std::vector &output_tensors, const compiler::TensorRegistries &tensor_regs, - compiler::CodeMap &&code_map) - : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs}, + compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx) + : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx}, _code_map{std::move(code_map)} { VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl; @@ -143,7 +142,9 @@ void DataflowExecutor::executeImpl() } assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs - _subject.notifyModelBegin(this); + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); while (!_ready_jobs.empty()) { @@ -157,7 +158,7 @@ void DataflowExecutor::executeImpl() const backend::Backend *backend = _lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend(); - _subject.notifyJobBegin(this, op_seq, backend); + _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend); job->fn_seq()->initRunning(); @@ -167,13 +168,13 @@ void DataflowExecutor::executeImpl() job->run(); - _subject.notifyJobEnd(this, op_seq, backend); + _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend); notify(job_index); _finished_jobs[job_index] = std::move(job); } assert(noWaitingJobs()); - _subject.notifyModelEnd(this); + _subject.notifySubgraphEnd(profiling_subg_index); // Reset input info for the next execution _input_info = _initial_input_info; diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h index 69dfda1..b72c0d0 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.h +++ b/runtime/onert/core/src/exec/DataflowExecutor.h @@ -28,6 +28,7 @@ #include #include "exec/ExecutorBase.h" #include "compiler/CodeMap.h" +#include "util/TracingCtx.h" namespace onert { @@ -50,9 +51,8 @@ public: * @param code_map OpSequence and its code map */ DataflowExecutor(std::unique_ptr lowered_graph, - const std::vector &input_tensors, - const std::vector &output_tensors, - const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map); + const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx); void executeImpl() override; diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc index 1666d3f..2d9d534 100644 --- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc +++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc @@ -92,12 +92,12 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::ArgMax &op) +void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op) { - const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; + const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)}; const auto input = _tensor_registry->getITensor(input_idx); - const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)}; + const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)}; const auto axis = _tensor_registry->getITensor(axis_idx); auto output_ind = op.getOutputs().at(0); @@ -111,7 +111,7 @@ void DynamicShapeInferer::visit(const ir::operation::ArgMax &op) const auto rank = input_shape.rank(); axis_value = axis_value < 0 ? axis_value + rank : axis_value; - ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis_value, rank); + ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank); output->applyShape(new_shape); assert(output->buffer() != nullptr); @@ -388,10 +388,16 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op) auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS); auto axis = _tensor_registry->getITensor(axis_ind); - auto axis_buf = reinterpret_cast(axis->buffer()); - assert(axis_buf); + auto axis_type = axis->data_type(); + assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64); - auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]); + assert(axis->buffer()); + int32_t axis_value = + (axis_type == ir::DataType::INT32) + ? reinterpret_cast(axis->buffer())[0] + : static_cast(reinterpret_cast(axis->buffer())[0]); + + auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value); output->applyShape(output_shape); assert(output->buffer() != nullptr); @@ -402,19 +408,24 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op) // check if output is not dynamic auto output_ind = op.getOutputs().at(0); auto output = _tensor_registry->getITensor(output_ind); - auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT); - auto input = _tensor_registry->getITensor(input_ind); - ir::Shape input_shape = input->getShape(); + auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE); + auto shape = _tensor_registry->getITensor(shape_ind); - if ((!input->is_dynamic()) && (!output->is_dynamic())) + if ((!shape->is_dynamic()) && (!output->is_dynamic())) return; - assert(input->data_type() == ir::DataType::INT32); + const auto dims_type = shape->data_type(); + assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64); - auto input_buf = reinterpret_cast(input->buffer()); - assert(input_buf); + auto dims_buf = shape->buffer(); + assert(dims_buf); - auto output_shape = shape_inference::inferFillShape(input_shape, input_buf); + const auto &dims_shape = shape->getShape(); + auto output_shape = ((dims_type == ir::DataType::INT32) + ? shape_inference::inferFillShape( + dims_shape, reinterpret_cast(dims_buf)) + : shape_inference::inferFillShape( + dims_shape, reinterpret_cast(dims_buf))); output->applyShape(output_shape); assert(output->buffer() != nullptr); diff --git a/runtime/onert/core/src/exec/ExecTime.h b/runtime/onert/core/src/exec/ExecTime.h index 846d093..d2ddbad 100644 --- a/runtime/onert/core/src/exec/ExecTime.h +++ b/runtime/onert/core/src/exec/ExecTime.h @@ -94,7 +94,7 @@ public: /** * @brief Update metrics file with new data. */ - void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); } + void storeOperationsExecTime() const { _json.storeOperationsExecTime(); } static const int64_t NOT_FOUND = -1; private: diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc index ddb1fb6..d5003b1 100644 --- a/runtime/onert/core/src/exec/ExecutionObservee.cc +++ b/runtime/onert/core/src/exec/ExecutionObservee.cc @@ -26,37 +26,38 @@ void ExecutionObservee::add(std::unique_ptr observer) _observers.emplace_back(std::move(observer)); } -void ExecutionObservee::notifyModelBegin(IExecutor *executor) +void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind) { for (auto &o : _observers) { - o->handleBegin(executor); + o->handleSubgraphBegin(ind); } } -void ExecutionObservee::notifyModelEnd(IExecutor *executor) +void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind) { for (auto &o : _observers) { - o->handleEnd(executor); + o->handleSubgraphEnd(ind); } } -void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, +void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, + const ir::OpSequence *op_seq, const backend::Backend *backend) { for (auto &o : _observers) { - o->handleBegin(executor, op_seq, backend); + o->handleJobBegin(executor, index, op_seq, backend); } } -void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, + const ir::OpSequence *op_seq, const backend::Backend *backend) { for (auto &o : _observers) { - o->handleEnd(executor, op_seq, backend); + o->handleJobEnd(executor, index, op_seq, backend); } } diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h index 49d409a..62b3f62 100644 --- a/runtime/onert/core/src/exec/ExecutionObservee.h +++ b/runtime/onert/core/src/exec/ExecutionObservee.h @@ -20,6 +20,7 @@ #include #include "exec/ExecutionObservers.h" +#include "ir/Index.h" namespace onert { @@ -39,11 +40,11 @@ public: * @param observer Observer to be added */ void add(std::unique_ptr observer); - void notifyModelBegin(IExecutor *executor); - void notifyModelEnd(IExecutor *executor); - void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq, + void notifySubgraphBegin(ir::SubgraphIndex ind); + void notifySubgraphEnd(ir::SubgraphIndex ind); + void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq, const backend::Backend *backend); - void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq, + void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq, const backend::Backend *backend); private: diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc index 066b52e..18c0c1d 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.cc +++ b/runtime/onert/core/src/exec/ExecutionObservers.cc @@ -17,12 +17,62 @@ #include "exec/ExecutionObservers.h" #include +#include #include "util/logging.h" #include "exec/IExecutor.h" #include "misc/polymorphic_downcast.h" #include "ir/OpSequence.h" #include "util/EventWriter.h" +#include "util/Utils.h" + +namespace +{ + +void setUserData(const onert::ir::Graph &g, const onert::ir::OpSequence *op_seq, + decltype(EventCollector::Event::userData) &data) +{ + if (op_seq->size() == 0) + return; + + // From a tensor of shape [a, b, c], this will return a string "shape(a b c)". + // String like "[1, 2, 3]" looks better but this will be considered as a list in Json + // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult + auto build_shape_str = [&](onert::ir::OperandIndex operand_idx) { + std::string shape_str; + auto &shape = g.operands().at(operand_idx).info().shape(); + for (int i = 0; i < shape.rank(); i++) + { + if (i == 0) + shape_str = "shape(" + std::to_string(shape.dim(i)); + else + shape_str += " " + std::to_string(shape.dim(i)); + } + shape_str += ")"; + + return shape_str; + }; + + const auto &first_op_idx = op_seq->operations().at(0); + const auto &first_op_node = g.operations().at(first_op_idx); + + auto &inputs = first_op_node.getInputs(); + auto size = inputs.size(); + for (size_t i = 0; i < size; i++) + { + auto operand_idx = inputs.at(i); + if (operand_idx.undefined()) + continue; + + std::string key("input_shape_" + std::to_string(i)); + std::string value = build_shape_str(operand_idx); + data.emplace_back(std::make_pair(key, value)); + } + + // add other userData as needed +} + +} // namespace namespace onert { @@ -30,8 +80,8 @@ namespace onert namespace exec { -void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *, - const onert::backend::Backend *backend) +void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex, + const ir::OpSequence *, const onert::backend::Backend *backend) { _timer = backend->config()->timer(); if (_timer == nullptr) @@ -39,8 +89,8 @@ void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence _timer->handleBegin(); } -void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, const ir::OpSequence *op_seq, + const backend::Backend *backend) { _timer->handleEnd(); const auto timer_res = _timer->getTime(); @@ -70,51 +120,74 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq, } }; -ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph) - : _base_filepath(filepath), _recorder{}, _collector{&_recorder}, _graph{graph} +TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph, + const util::TracingCtx *tracing_ctx) + : _recorder{std::make_unique()}, _collector{_recorder.get()}, _graph{graph}, + _tracing_ctx{tracing_ctx} { + // TODO Remove below after using _tracing_ctx + UNUSED_RELEASE(_tracing_ctx); + + _event_writer = EventWriter::get(filepath); + _event_writer->startToUse(); } -ChromeTracingObserver::~ChromeTracingObserver() +TracingObserver::~TracingObserver() { try { - EventWriter{_recorder}.writeToFiles(_base_filepath); + _event_writer->readyToFlush(std::move(_recorder)); } catch (const std::exception &e) { - std::cerr << "E: Fail to record event in ChromeTracingObserver: " << e.what() << std::endl; + std::cerr << "E: Fail to record event in TracingObserver: " << e.what() << std::endl; } } -void ChromeTracingObserver::handleBegin(IExecutor *) +void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind) { + // TODO Write subg_ind into profling result + UNUSED_RELEASE(subg_ind); _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"}); } -void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind, + const ir::OpSequence *op_seq, const backend::Backend *backend) { + // TODO Write subg_ind into profling result + UNUSED_RELEASE(subg_ind); + std::string backend_id = backend->config()->id(); - _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, - opSequenceTag(op_seq, _graph.operations())}); + + auto ev = EventCollector::Event{EventCollector::Edge::BEGIN, backend_id, + opSequenceTag(op_seq, _graph.operations())}; + // add shape of inputs + setUserData(_graph, op_seq, ev.userData); + + _collector.onEvent(ev); } -void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq, - const backend::Backend *backend) +void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind, + const ir::OpSequence *op_seq, const backend::Backend *backend) { + // TODO Write subg_ind into profling result + UNUSED_RELEASE(subg_ind); + std::string backend_id = backend->config()->id(); _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id, opSequenceTag(op_seq, _graph.operations())}); } -void ChromeTracingObserver::handleEnd(IExecutor *) +void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind) { + // TODO Write subg_ind into profling result + UNUSED_RELEASE(subg_ind); + _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"}); } -std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq, - const ir::Operations &operations) +std::string TracingObserver::opSequenceTag(const ir::OpSequence *op_seq, + const ir::Operations &operations) { if (op_seq->size() == 0) return "Empty OpSequence"; diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h index f8c2acc..a9eebfe 100644 --- a/runtime/onert/core/src/exec/ExecutionObservers.h +++ b/runtime/onert/core/src/exec/ExecutionObservers.h @@ -18,12 +18,16 @@ #define __ONERT_EXEC_OBSREVERS_H__ #include "exec/IFunction.h" +#include "ir/Index.h" #include "ir/OpSequence.h" #include "ExecTime.h" #include "util/ITimer.h" #include "exec/IExecutor.h" #include "util/EventCollector.h" #include "util/EventRecorder.h" +#include "util/EventWriter.h" +#include "util/TracingCtx.h" +#include "util/EventWriter.h" namespace onert { @@ -33,13 +37,15 @@ class IExecutionObserver { public: /// @brief Invoked just before model (not individual operation) execution begins - virtual void handleBegin(IExecutor *) { return; } + virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; } - virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0; - virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0; + virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) = 0; + virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) = 0; /// @brief Invoked just after model (not individual operation) execution ends - virtual void handleEnd(IExecutor *) { return; } + virtual void handleSubgraphEnd(ir::SubgraphIndex) { return; } virtual ~IExecutionObserver() = default; }; @@ -51,10 +57,12 @@ public: : _et(std::move(et)), _graph(graph) { } - void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; + void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) override; + void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) override; - void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); } + void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); } private: std::unique_ptr _timer; @@ -62,24 +70,28 @@ private: const ir::Graph &_graph; }; -class ChromeTracingObserver : public IExecutionObserver +class TracingObserver : public IExecutionObserver { public: - ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph); - ~ChromeTracingObserver(); - void handleBegin(IExecutor *) override; - void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override; - void handleEnd(IExecutor *) override; + TracingObserver(const std::string &filepath, const ir::Graph &graph, + const util::TracingCtx *tracing_ctx); + ~TracingObserver(); + void handleSubgraphBegin(ir::SubgraphIndex) override; + void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) override; + void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *, + const backend::Backend *) override; + void handleSubgraphEnd(ir::SubgraphIndex) override; private: static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations); private: - const std::string &_base_filepath; - EventRecorder _recorder; + std::unique_ptr _recorder; EventCollector _collector; const ir::Graph &_graph; + EventWriter *_event_writer; + const util::TracingCtx *_tracing_ctx; }; } // namespace exec diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc index 018a0bb..588a325 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.cc +++ b/runtime/onert/core/src/exec/ExecutorBase.cc @@ -15,11 +15,11 @@ */ #include "ExecutorBase.h" +#include "ShapeConverter.h" -#include "backend/ITensor.h" #include "backend/controlflow/UserTensor.h" -#include "backend/cpu_common/Tensor.h" #include "util/logging.h" +#include "misc/polymorphic_downcast.h" namespace onert { @@ -27,43 +27,27 @@ namespace exec { ExecutorBase::ExecutorBase(std::unique_ptr &&lowered_graph, - const std::vector &input_tensors, - const std::vector &output_tensors, - const compiler::TensorRegistries &tensor_regs) - : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, - _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex() + const compiler::TensorRegistries &tensor_regs, + const util::TracingCtx *tracing_ctx) + : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex(), + _tracing_ctx(tracing_ctx) { - // TODO Fix the way of knowing whether it is primary or not - bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty()); - if (!primary_executor) - { - auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) { - std::vector list; - for (auto ind : ind_seq) - { - backend::ITensor *tensor = tensor_regs.getITensor(ind); - assert(tensor != nullptr); - list.push_back(tensor); - } - return list; - }; - auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) { - std::vector list; - for (auto ind : ind_seq) - { - backend::ITensor *tensor = tensor_regs.getITensor(ind); - assert(tensor != nullptr); - list.push_back(tensor); - } - return list; - }; - _input_tensors = build_input_tensor_list(_graph.getInputs()); - _output_tensors = build_output_tensor_list(_graph.getOutputs()); - } + auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) { + assert(tensors.empty()); + for (auto ind : ind_seq) + { + backend::ITensor *tensor = tensor_regs.getITensor(ind); + assert(tensor != nullptr); + auto io_tensor = nnfw::misc::polymorphic_downcast(tensor); + tensors.push_back(io_tensor); + } + }; + build_tensor_list(_graph.getInputs(), _input_tensors); + build_tensor_list(_graph.getOutputs(), _output_tensors); } -void ExecutorBase::execute(const std::vector &src_tensors, - const std::shared_ptr &pre_fn) +void ExecutorBase::execute(const std::vector &inputs, + const std::vector &outputs) { // For thread-safe, use mutex // TODO: if all used backends on this executor are thread-safe, @@ -71,31 +55,37 @@ void ExecutorBase::execute(const std::vector &src_tensors, // Deadlock occurs when an Executor is called recursively. std::lock_guard lock(_mutex); - assert(src_tensors.size() == _graph.getInputs().size()); - assert(src_tensors.size() == _input_tensors.size()); - for (uint32_t n = 0; n < _graph.getInputs().size(); ++n) + assert(inputs.size() == _graph.getInputs().size()); + assert(inputs.size() == _input_tensors.size()); + for (uint32_t n = 0; n < inputs.size(); ++n) { - // when user changes input shape, the input tensor is dynamic and its memory is not allocated. - // This code find the info to allocate dynamic tensor, and allocate memory based on the source - // tensor's shape set by caller. - const auto src_tensor = src_tensors[n]; + const auto input = inputs[n]; + assert(input->buffer() != nullptr); auto input_tensor = _input_tensors[n]; - // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors - if (src_tensor != nullptr && input_tensor != nullptr) + assert(input_tensor != nullptr); + if (input != nullptr) { - const auto orig_input_shape = input_tensor->getShape(); + const auto orig_input_shape = input_tensor->orig_info().shape(); const auto changed_input_shape = - convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout()); + convertShape(input->getShape(), input->layout(), input_tensor->orig_layout()); if (orig_input_shape != changed_input_shape) { input_tensor->set_dynamic(); } } + input_tensor->setTensor(input); } - // TODO Move calling permute_fn.run() into executeImpl() - assert(pre_fn); - pre_fn->run(); + assert(outputs.size() == _graph.getOutputs().size()); + assert(outputs.size() == _output_tensors.size()); + for (uint32_t n = 0; n < outputs.size(); ++n) + { + const auto output = outputs[n]; + // assert(dst_tensor->buffer() != nullptr); + auto output_tensor = _output_tensors[n]; + assert(output_tensor != nullptr); + output_tensor->setTensor(output); + } executeImpl(); } @@ -111,19 +101,19 @@ void ExecutorBase::execute(const IODescription &desc) assert(_input_tensors.size() == desc.inputs.size()); for (uint32_t i = 0; i < _input_tensors.size(); ++i) { - // TODO Remove dynamic_cast - auto *tensor = dynamic_cast(_input_tensors[i]); - assert(tensor); + auto tensor = _input_tensors[i]; + + // TODO Check if (desc.inputs[i] == nullptr) + // TODO Better design for ITensor? (we need const_cast as ITensor is writable) + tensor->setUserTensor(static_cast(const_cast(desc.inputs[i]->buffer)), + desc.inputs[i]->size); + auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i}); if (input_shape != desc.dynamic_input_shapes.end()) { tensor->set_dynamic(); tensor->setShape(input_shape->second); } - // TODO Check if (desc.inputs[i] == nullptr) - // TODO Better design for ITensor? (we need const_cast as ITensor is writable) - tensor->setBuffer(static_cast(const_cast(desc.inputs[i]->buffer)), - desc.inputs[i]->size); handleDynamicInputTensor(ir::IOIndex{i}, desc); } @@ -131,13 +121,12 @@ void ExecutorBase::execute(const IODescription &desc) assert(_output_tensors.size() == desc.outputs.size()); for (uint32_t i = 0; i < _output_tensors.size(); ++i) { - // TODO Remove dynamic_cast - auto *tensor = dynamic_cast(_output_tensors[i]); - assert(tensor); - tensor->set_dynamic(); // It can't be resized but shape could change + auto tensor = _output_tensors[i]; + if (desc.outputs[i] == nullptr) throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."}; - tensor->setBuffer(static_cast(desc.outputs[i]->buffer), desc.outputs[i]->size); + tensor->setUserTensor(static_cast(desc.outputs[i]->buffer), desc.outputs[i]->size); + tensor->set_dynamic(); // It can't be resized but shape could change } executeImpl(); diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h index 8a6ec91..5d95c10 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.h +++ b/runtime/onert/core/src/exec/ExecutorBase.h @@ -17,23 +17,25 @@ #ifndef __ONERT_EXEC_EXECUTOR_BASE_H__ #define __ONERT_EXEC_EXECUTOR_BASE_H__ -#include - #include "IPermuteFunction.h" -#include "exec/ExecutionObservers.h" -#include "ShapeConverter.h" #include "exec/IExecutor.h" -#include "compiler/LoweredGraph.h" -#include "ir/LowerInfoMap.h" -#include "backend/IConfig.h" -#include "backend/Backend.h" #include "exec/ExecTime.h" -#include "exec/IFunction.h" -#include "backend/IDynamicTensorManager.h" -#include "backend/ITensorManager.h" #include "exec/ExecutionObservee.h" +#include "exec/IFunction.h" +#include "exec/IODescription.h" +#include "ir/Graph.h" +#include "ir/Index.h" +#include "ir/LowerInfoMap.h" +#include "ir/OperationIndexMap.h" +#include "compiler/LoweredGraph.h" #include "compiler/TensorRegistries.h" -#include +#include "backend/controlflow/IOTensor.h" +#include "util/TracingCtx.h" + +#include +#include +#include +#include namespace onert { @@ -49,25 +51,17 @@ public: * @param tensor_builders Tensor builders that are currently used */ ExecutorBase(std::unique_ptr &&lowered_graph, - const std::vector &input_tensors, - const std::vector &output_tensors, - const compiler::TensorRegistries &tensor_regs); + const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx); virtual ~ExecutorBase() = default; const ir::Graph &graph() final { return _graph; } - /** - * @brief Execute without IODescription - * - * @param src_tensor Tensor list that will be copied to input tensors of this - * @param pre_fn The permutation function that copy from src_tensor to input tensors of this - */ - void execute(const std::vector &src_tensors, - const std::shared_ptr &pre_fn); - void execute(const IODescription &desc) final; + void execute(const std::vector &inputs, + const std::vector &outputs) override; + // Used only in Dataflow and Parallel Executors void setIndexedRanks(std::shared_ptr> ranks) final { @@ -78,9 +72,10 @@ public: void addObserver(std::unique_ptr ref) { _subject.add(std::move(ref)); }; - const std::vector &getInputTensors() const { return _input_tensors; } - - const std::vector &getOutputTensors() const { return _output_tensors; } + const std::vector &getOutputTensors() const override + { + return _output_tensors; + } protected: /** @@ -93,9 +88,10 @@ protected: std::shared_ptr> _indexed_ranks; std::unique_ptr _lowered_graph; const ir::Graph &_graph; - std::vector _input_tensors; - std::vector _output_tensors; + std::vector _input_tensors; + std::vector _output_tensors; std::mutex _mutex; + const util::TracingCtx *_tracing_ctx; private: void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc); diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h index 11017ed..8f62156 100644 --- a/runtime/onert/core/src/exec/IPermuteFunction.h +++ b/runtime/onert/core/src/exec/IPermuteFunction.h @@ -120,7 +120,8 @@ protected: } assert(src_tensor != dst_tensor); - assert(underlying_type(src_tensor->data_type()) == underlying_type(dst_tensor->data_type())); + if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type())) + throw std::runtime_error("data type does not match"); switch (src_tensor->data_type()) { case ir::DataType::FLOAT32: diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc index 72a18de..b29216a 100644 --- a/runtime/onert/core/src/exec/JSONExecTime.cc +++ b/runtime/onert/core/src/exec/JSONExecTime.cc @@ -135,7 +135,7 @@ void JSON::printOperation(const std::map &operation_info, stream.seekp(-2, std::ofstream::end); } -void JSON::uploadOperationsExecTime() const +void JSON::storeOperationsExecTime() const { std::ofstream stream(_measurement_file); if (!stream.is_open()) diff --git a/runtime/onert/core/src/exec/JSONExecTime.h b/runtime/onert/core/src/exec/JSONExecTime.h index a64cb31..8987d72 100644 --- a/runtime/onert/core/src/exec/JSONExecTime.h +++ b/runtime/onert/core/src/exec/JSONExecTime.h @@ -54,18 +54,16 @@ public: loadOperationsExecTime(); }; /** - * @brief Update _operations_exec_time_file with new data. + * @brief Update _measurement_file with new data. */ - void uploadOperationsExecTime() const; + void storeOperationsExecTime() const; private: ///@brief file containing measurements std::string _measurement_file; std::unordered_map _backends; - std::unordered_map< - const backend::Backend *, - std::unordered_map>>> - &_measurements; + MeasurementData &_measurements; + /** * @brief Helper function for inserting data to OperationExecTimes * @@ -86,7 +84,7 @@ private: void printOperation(const std::map &operation_info, std::ofstream &stream) const; /** - * @brief Parse and load operations_exec_time from _operations_exec_time_file. + * @brief Parse and load _measurements from _measurement_file. */ void loadOperationsExecTime(); }; diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc index 6e6ca11..a6d4473 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.cc +++ b/runtime/onert/core/src/exec/LinearExecutor.cc @@ -39,7 +39,9 @@ char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operati void LinearExecutor::executeImpl() { - _subject.notifyModelBegin(this); + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); for (auto &&code : _code) { const auto op_seq = code.op_seq; @@ -48,7 +50,7 @@ void LinearExecutor::executeImpl() #ifdef RUY_PROFILER ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations())); #endif - _subject.notifyJobBegin(this, op_seq, backend); + _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend); auto &fn_seq = code.fn_seq; @@ -58,9 +60,9 @@ void LinearExecutor::executeImpl() fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor); fn_seq->run(); - _subject.notifyJobEnd(this, op_seq, backend); + _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend); } - _subject.notifyModelEnd(this); + _subject.notifySubgraphEnd(profiling_subg_index); } } // namespace exec diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h index 22d00ec..d43c970 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.h +++ b/runtime/onert/core/src/exec/LinearExecutor.h @@ -27,6 +27,7 @@ #include "compiler/Linear.h" #include "exec/FunctionSequence.h" #include "compiler/CodeMap.h" +#include "util/TracingCtx.h" namespace onert { @@ -47,11 +48,9 @@ public: * @param code_map OpSequence and its code map */ LinearExecutor(std::unique_ptr lowered_graph, - const std::vector &input_tensors, - const std::vector &output_tensors, const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, - const std::vector &order) - : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs} + const std::vector &order, const util::TracingCtx *tracing_ctx) + : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx} { for (auto index : order) { diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc index 676bdb5..e9e576c 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.cc +++ b/runtime/onert/core/src/exec/ParallelExecutor.cc @@ -60,12 +60,10 @@ void ParallelExecutor::notify(uint32_t finished_job_id) } ParallelExecutor::ParallelExecutor(std::unique_ptr lowered_graph, - const std::vector &input_tensors, - const std::vector &output_tensors, const compiler::TensorRegistries &tensor_regs, - compiler::CodeMap &&code_map) - : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, - std::move(code_map)} + compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx) + : DataflowExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), tracing_ctx} { VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl; } @@ -100,7 +98,10 @@ void ParallelExecutor::executeImpl() VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl; - _subject.notifyModelBegin(this); + auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph); + + _subject.notifySubgraphBegin(profiling_subg_index); + while (true) { std::unique_lock lock{_mu_jobs}; @@ -126,9 +127,11 @@ void ParallelExecutor::executeImpl() auto op_sequence_index = _job_to_op_seq[job_index]; auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index); auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend(); - auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); }; + auto setup = [&, op_seq, backend]() { + _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend); + }; auto teardown = [&, job_index, op_seq, backend]() { - _subject.notifyJobEnd(this, op_seq, backend); + _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend); notify(job_index); }; @@ -146,7 +149,7 @@ void ParallelExecutor::executeImpl() // Wait for all the jobs done _scheduler->finish(); - _subject.notifyModelEnd(this); + _subject.notifySubgraphEnd(profiling_subg_index); // Reset input info for the next execution _input_info = _initial_input_info; diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h index 111c20c..fd9db42 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.h +++ b/runtime/onert/core/src/exec/ParallelExecutor.h @@ -28,6 +28,7 @@ #include #include "exec/DataflowExecutor.h" #include "ParallelScheduler.h" +#include "util/TracingCtx.h" namespace onert { @@ -51,9 +52,8 @@ public: * @param code_map OpSequence and its code map */ ParallelExecutor(std::unique_ptr lowered_graph, - const std::vector &input_tensors, - const std::vector &output_tensors, - const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map); + const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map, + const util::TracingCtx *tracing_ctx); void executeImpl() override; diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h index 2e3f3ca..99d7b3a 100644 --- a/runtime/onert/core/src/interp/InterpExecutor.h +++ b/runtime/onert/core/src/interp/InterpExecutor.h @@ -58,6 +58,15 @@ public: * @note It should be called after setting input and output buffer */ void execute(const exec::IODescription &desc) final; + void execute(const std::vector &, + const std::vector &) final + { + throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"}; + } + const std::vector &getOutputTensors() const final + { + throw new std::runtime_error{"Interpreter does not support this function."}; + } private: const ir::Graph &_graph; diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc index 0473855..e1fb767 100644 --- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc +++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc @@ -116,7 +116,7 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor float *ofm_ptr = reinterpret_cast(ofm_tensor->buffer()); nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, - cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr); + cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr, nullptr); } void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node) diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc index 9eedcd2..8e75c4f 100644 --- a/runtime/onert/core/src/ir/DataType.cc +++ b/runtime/onert/core/src/ir/DataType.cc @@ -42,6 +42,7 @@ size_t sizeOfDataType(DataType data_type) return sizeof(uint8_t); case DataType::QUANT_INT8_SYMM: case DataType::QUANT_INT8_ASYMM: + case DataType::QUANT_INT8_SYMM_PER_CHANNEL: return sizeof(int8_t); case DataType::FLOAT16: return sizeof(float16); diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc index eecfe81..a8578b4 100644 --- a/runtime/onert/core/src/ir/OperationDumper.cc +++ b/runtime/onert/core/src/ir/OperationDumper.cc @@ -72,7 +72,14 @@ OperationDumper::OperationDumper(const std::string &start_msg) VERBOSE(LIR) << start_msg << std::endl; } -void OperationDumper::visit(const ArgMax &node) { dumpBinaryInputOp(node); } +void OperationDumper::visit(const ArgMinMax &node) +{ + std::string min_max = node.param().is_arg_max ? "(Max)" : "(Min)"; + VERBOSE(LIR) << "* " << node.name() << min_max << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMinMax::INPUT) << ") Axis(" + << node.getInputs().at(ArgMinMax::AXIS) << ") " << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; +} void OperationDumper::visit(const BatchToSpaceND &node) { @@ -159,6 +166,14 @@ void OperationDumper::visit(const ExpandDims &node) dumpUnaryInputOp(node, axis); } +void OperationDumper::visit(const Fill &node) +{ + VERBOSE(LIR) << "* " << node.name() << std::endl; + VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(Fill::Input::SHAPE) << ") Value(" + << node.getInputs().at(Fill::Input::VALUE) << ")" << std::endl; + VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; +} + void OperationDumper::visit(const FullyConnected &node) { std::string inputs = @@ -505,7 +520,7 @@ void OperationDumper::visit(const While &node) } VERBOSE(LIR) << " - Inputs : " << "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph (" - << node.param().cond_subg_index << ") Inputs(" << inputs << ")" << std::endl; + << node.param().body_subg_index << ") Inputs(" << inputs << ")" << std::endl; std::string outputs; const auto &output_indices = node.getOutputs(); for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it) diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h index 91642ab..fe18307 100644 --- a/runtime/onert/core/src/ir/OperationDumper.h +++ b/runtime/onert/core/src/ir/OperationDumper.h @@ -31,7 +31,7 @@ public: OperationDumper(const std::string &start_msg); public: - void visit(const operation::ArgMax &) override; + void visit(const operation::ArgMinMax &) override; void visit(const operation::BatchToSpaceND &node) override; void visit(const operation::BCQFullyConnected &node) override; void visit(const operation::BinaryArithmetic &node) override; @@ -48,6 +48,7 @@ public: void visit(const operation::ElementwiseUnary &) override; void visit(const operation::EmbeddingLookup &) override; void visit(const operation::ExpandDims &) override; + void visit(const operation::Fill &) override; void visit(const operation::FullyConnected &node) override; void visit(const operation::Gather &) override; void visit(const operation::HashtableLookup &) override; diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc index da08e81..6f81c2a 100644 --- a/runtime/onert/core/src/ir/OperationValidator.cc +++ b/runtime/onert/core/src/ir/OperationValidator.cc @@ -55,6 +55,17 @@ bool OperationValidator::isSameType(const OperandIndex &idx1, const OperandIndex return operandType(idx1) == operandType(idx2); } +bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2) +{ + if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale()) + return false; + + if (_operands.at(idx1).typeInfo().offset() != _operands.at(idx2).typeInfo().offset()) + return false; + + return true; +} + bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &type) { return operandType(idx) == type; @@ -76,29 +87,54 @@ bool OperationValidator::isValidType(const OperandIndex &idx, void OperationValidator::visit(const operation::AddN &node) { + const auto output_index(node.getOutputs().at(0)); + int size = node.getInputs().size(); for (int i = 0; i < size; i++) { const auto input_index(node.getInputs().at(i)); OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32})); + OP_REQUIRES(isSameType(input_index, output_index)); } } +void OperationValidator::visit(const operation::ArgMinMax &node) +{ + const auto input_index(node.getInputs().at(operation::ArgMinMax::Input::INPUT)); + const auto axis_index(node.getInputs().at(operation::ArgMinMax::Input::AXIS)); + const auto output_index(node.getOutputs().at(0)); + const auto output_type = node.param().output_type; + + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::UINT8, + DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, output_type)); +} + void OperationValidator::visit(const operation::BatchMatMul &node) { const auto lhs_index(node.getInputs().at(operation::BatchMatMul::Input::LHS)); const auto rhs_index(node.getInputs().at(operation::BatchMatMul::Input::RHS)); + const auto output_index(node.getOutputs().at(0)); // Constant lhs and rhs is not implemented yet OP_REQUIRES(!isConstant(lhs_index) && !isConstant(rhs_index)); + + // Allow hybrid quantization (lhs: float / rhs: qint8 / out: float) + OP_REQUIRES(isValidType(lhs_index, {DataType::FLOAT32, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isSameType(lhs_index, rhs_index) || + ((operandType(lhs_index) == DataType::FLOAT32) && + (operandType(rhs_index) == DataType::QUANT_INT8_ASYMM))); + OP_REQUIRES(isSameType(lhs_index, output_index)); } void OperationValidator::visit(const operation::BatchToSpaceND &node) { - const auto block_size_index{node.getInputs().at(operation::BatchToSpaceND::Input::BLOCK_SIZE)}; + const auto input_index{node.getInputs().at(operation::BatchToSpaceND::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; - // Non-constant block_size is not implemented yet - OP_REQUIRES(isConstant(block_size_index)); + OP_REQUIRES(isSameType(input_index, output_index)); } void OperationValidator::visit(const operation::BinaryArithmetic &node) @@ -122,10 +158,48 @@ void OperationValidator::visit(const operation::Comparison &node) OP_REQUIRES(isValidType(output_index, DataType::BOOL8)); } +void OperationValidator::visit(const operation::Concat &node) +{ + const auto output_index{node.getOutputs().at(0)}; + + for (auto input_index : node.getInputs()) + { + OP_REQUIRES(isSameType(input_index, output_index)); + + // Int8 quantization requires same scale and zero point + if (isValidType(output_index, DataType::QUANT_INT8_ASYMM)) + { + OP_REQUIRES(isSameQuantParam(input_index, output_index)); + } + } +} + +void OperationValidator::visit(const operation::Conv2D &node) +{ + const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + + uint32_t stride_horizontal = node.param().stride.horizontal; + uint32_t stride_vertical = node.param().stride.vertical; + uint32_t dilation_width = node.param().dilation.width_factor; + uint32_t dilation_height = node.param().dilation.height_factor; + + OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0)); + OP_REQUIRES((dilation_width > 0) && (dilation_height > 0)); + OP_REQUIRES(isSameType(input_index, output_index)); +} + void OperationValidator::visit(const operation::DepthToSpace &node) { + const auto input_index{node.getInputs().at(operation::DepthToSpace::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + int32_t block_size = node.param().block_size; + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::INT64, + DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM})); + OP_REQUIRES(isSameType(input_index, output_index)); + OP_REQUIRES(block_size > 0); } @@ -151,6 +225,32 @@ void OperationValidator::visit(const operation::ElementwiseActivation &node) // Check if I/O types match OP_REQUIRES(isSameType(output_index, input_index)); + + switch (node.param().op_type) + { + case operation::ElementwiseActivation::Type::ELU: + OP_REQUIRES(isValidType(input_index, DataType::FLOAT32)); + break; + case operation::ElementwiseActivation::Type::LEAKY_RELU: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + case operation::ElementwiseActivation::Type::LOGISTIC: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + case operation::ElementwiseActivation::Type::RELU: + OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM})); + break; + case operation::ElementwiseActivation::Type::TANH: + OP_REQUIRES( + isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, + DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM})); + break; + } } void OperationValidator::visit(const operation::ElementwiseBinary &node) @@ -161,6 +261,13 @@ void OperationValidator::visit(const operation::ElementwiseBinary &node) OP_REQUIRES(isSameType(lhs_index, rhs_index)); OP_REQUIRES(isSameType(lhs_index, output_index)); + + const auto op_type = node.param().op_type; + if (op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND || + op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR) + { + OP_REQUIRES(isValidType(lhs_index, DataType::BOOL8)); + } } void OperationValidator::visit(const operation::ElementwiseUnary &node) @@ -195,8 +302,17 @@ void OperationValidator::visit(const operation::ElementwiseUnary &node) void OperationValidator::visit(const operation::EmbeddingLookup &node) { const auto lookups_index{node.getInputs().at(operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(operation::EmbeddingLookup::Input::VALUES)}; + const auto output_index{node.getOutputs().at(0)}; OP_REQUIRES(isValidType(lookups_index, DataType::INT32)); + + // TFLite: Allow hybrid type - value table & output + // NNAPI: Require same value table and output type + OP_REQUIRES( + isSameType(values_index, output_index) || + (isValidType(output_index, DataType::FLOAT32) && + (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM})))); } void OperationValidator::visit(const operation::ExpandDims &node) @@ -206,7 +322,19 @@ void OperationValidator::visit(const operation::ExpandDims &node) const auto axis_index{node.getInputs().at(operation::ExpandDims::Input::AXIS)}; OP_REQUIRES(isSameType(output_index, input_index)); - OP_REQUIRES(isValidType(axis_index, DataType::INT32)); + OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64})); +} + +void OperationValidator::visit(const operation::Fill &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(operation::Fill::Input::SHAPE)}; + const auto value_index{node.getInputs().at(operation::Fill::Input::VALUE)}; + + OP_REQUIRES(isSameType(output_index, value_index)); + OP_REQUIRES(isValidType(input_index, {DataType::INT32, DataType::INT64})); + OP_REQUIRES(isValidType(output_index, + {DataType::FLOAT32, DataType::INT32, DataType::INT64, DataType::BOOL8})); } void OperationValidator::visit(const operation::HashtableLookup &node) diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h index 2ea8000..5b95b16 100644 --- a/runtime/onert/core/src/ir/OperationValidator.h +++ b/runtime/onert/core/src/ir/OperationValidator.h @@ -44,10 +44,13 @@ public: public: void visit(const operation::AddN &node) override; + void visit(const operation::ArgMinMax &node) override; void visit(const operation::BatchMatMul &node) override; void visit(const operation::BatchToSpaceND &node) override; void visit(const operation::BinaryArithmetic &node) override; void visit(const operation::Comparison &node) override; + void visit(const operation::Concat &node) override; + void visit(const operation::Conv2D &node) override; void visit(const operation::DepthToSpace &node) override; void visit(const operation::DepthwiseConv2D &node) override; void visit(const operation::ElementwiseActivation &node) override; @@ -55,6 +58,7 @@ public: void visit(const operation::ElementwiseUnary &node) override; void visit(const operation::EmbeddingLookup &node) override; void visit(const operation::ExpandDims &node) override; + void visit(const operation::Fill &node) override; void visit(const operation::HashtableLookup &node) override; void visit(const operation::Pack &node) override; void visit(const operation::Pad &node) override; @@ -76,6 +80,7 @@ private: DataType operandType(const OperandIndex &idx); bool isConstant(const OperandIndex &idx); bool isSameType(const OperandIndex &idx1, const OperandIndex &idx2); + bool isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2); bool isValidType(const OperandIndex &idx, const DataType &type); bool isValidType(const OperandIndex &idx, std::initializer_list valid_types); diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMinMax.cc similarity index 78% rename from runtime/onert/core/src/ir/operation/ArgMax.cc rename to runtime/onert/core/src/ir/operation/ArgMinMax.cc index f3bd8fd..989d905 100644 --- a/runtime/onert/core/src/ir/operation/ArgMax.cc +++ b/runtime/onert/core/src/ir/operation/ArgMinMax.cc @@ -14,10 +14,7 @@ * limitations under the License. */ -#include "ir/operation/ArgMax.h" - -#include - +#include "ir/operation/ArgMinMax.h" #include "ir/OperationVisitor.h" namespace onert @@ -27,10 +24,10 @@ namespace ir namespace operation { -void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); } +void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); } -ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) +ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc index 6a0be7e..20b6fa1 100644 --- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc +++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc @@ -57,7 +57,7 @@ std::string ElementwiseUnary::name() const {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}}, {ElementwiseUnaryType::SIN, std::string{"Sin"}}, {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}}, - {ElementwiseUnaryType::SQURE, std::string{"Squre"}}, + {ElementwiseUnaryType::SQUARE, std::string{"Square"}}, {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}}; return name_map.at(_param.op_type); } diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc index 45cce66..9da93f6 100644 --- a/runtime/onert/core/src/util/ConfigSource.cc +++ b/runtime/onert/core/src/util/ConfigSource.cc @@ -30,8 +30,10 @@ namespace util { static std::unique_ptr _source; +static std::unique_ptr _source_ext; void config_source(std::unique_ptr &&source) { _source = std::move(source); } +void config_source_ext(std::unique_ptr &&source) { _source_ext = std::move(source); } static IConfigSource *config_source() { @@ -67,6 +69,15 @@ static std::string getConfigOrDefault(const std::string &key) auto ret = config_source()->get(key); if (ret.empty()) { + // if env is not set, search from external + if (_source_ext.get()) + { + ret = _source_ext.get()->get(key); + } + } + // if not found search from defaults + if (ret.empty()) + { auto itr = defaults.find(key); if (itr != defaults.end()) { diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc index de37276..fd56187 100644 --- a/runtime/onert/core/src/util/EventCollector.cc +++ b/runtime/onert/core/src/util/EventCollector.cc @@ -38,15 +38,17 @@ class DurationEventBuilder public: DurationEventBuilder(const std::string &ts) : _ts{ts} {} - DurationEvent build(const std::string &tid, const std::string &name, const std::string &ph) const + DurationEvent build(const EventCollector::Event &evt_collected, const std::string &ph) const { DurationEvent evt; - evt.name = name; - evt.tid = tid; + evt.name = evt_collected.label; + evt.tid = evt_collected.backend; evt.ph = ph; evt.ts = _ts; + evt.args = evt_collected.userData; + return evt; } @@ -93,11 +95,11 @@ void EventCollector::onEvent(const Event &event) switch (event.edge) { case Edge::BEGIN: - _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "B")); + _rec->emit(DurationEventBuilder(ts).build(event, "B")); break; case Edge::END: - _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "E")); + _rec->emit(DurationEventBuilder(ts).build(event, "E")); break; } diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h index 8154be5..7daa485 100644 --- a/runtime/onert/core/src/util/EventCollector.h +++ b/runtime/onert/core/src/util/EventCollector.h @@ -19,6 +19,10 @@ #include "util/EventRecorder.h" +#include +#include +#include + class EventCollector { public: @@ -31,8 +35,24 @@ public: struct Event { Edge edge; + uint32_t session_index; + uint32_t subg_index; std::string backend; + uint32_t op_index; + std::string op_name; + uint32_t op_seq_size; // if this event is for an operation sequence of multiple operations + + // TODO deprecate this. label can be differ by writer. So let the writer decide label. std::string label; + + // user-defined data: pairs of (key, value) + std::vector> userData; + + Event(Edge a_edge, const std::string &a_backend, const std::string &a_label) + : edge(a_edge), session_index(0), subg_index(0), backend(a_backend), op_index(0), + op_seq_size(0), label(a_label) + { /* empty */ + } }; public: diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc deleted file mode 100644 index 6c03a5b..0000000 --- a/runtime/onert/core/src/util/EventCollectorGlobal.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/EventCollectorGlobal.h" - -#include -#include -#include - -#include "util/ConfigSource.h" -#include "util/EventWriter.h" - -namespace onert -{ -namespace util -{ - -EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder} -{ - // DO NOTHING -} - -EventCollectorGlobal::~EventCollectorGlobal() -{ - if (!_recorder.empty()) - { - try - { - // TODO Need better way for saved file path than the hardcoded path - EventWriter{_recorder}.writeToFile("trace.global.json", - EventWriter::WriteFormat::CHROME_TRACING); - } - catch (const std::exception &e) - { - std::cerr << "E: Fail to record event in EventCollectorGlobal: " << e.what() << std::endl; - } - } -} - -EventCollectorGlobal &EventCollectorGlobal::get() -{ - static EventCollectorGlobal instance; - return instance; -} - -EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag} -{ - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag}); -} -EventDurationBlock::~EventDurationBlock() -{ - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag}); -} - -EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {} - -EventDurationManual::~EventDurationManual() -{ - // Check if it has called begin-end pair - assert(_pair); -} - -void EventDurationManual::begin() -{ - _pair = false; - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag}); -} - -void EventDurationManual::end() -{ - assert(!_pair); - _pair = true; - auto &glob = EventCollectorGlobal::get(); - glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag}); -} - -} // namespace util -} // namespace onert diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.h b/runtime/onert/core/src/util/EventCollectorGlobal.h deleted file mode 100644 index 1027ec8..0000000 --- a/runtime/onert/core/src/util/EventCollectorGlobal.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ -#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ - -#include "util/EventRecorder.h" -#include "util/EventCollector.h" - -namespace onert -{ -namespace util -{ - -/** - * @brief Singleton class for event collection from anywhere in code - * - */ -class EventCollectorGlobal -{ -public: - /** - * @brief Get the singleton object of this class - * - * @return EventCollectorGlobal& Singleton object - */ - static EventCollectorGlobal &get(); - -public: - /** - * @brief Getter for event collector object - * - * @return EventCollector& Collector object - */ - EventCollector &collector() { return _collector; } - -private: - EventCollectorGlobal(); - ~EventCollectorGlobal(); - -private: - EventRecorder _recorder; - EventCollector _collector; -}; - -/** - * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor - * - */ -class EventDurationBlock -{ -public: - /** - * @brief Raise a duration event with type of BEGIN - * - * @param tag A label for the duration event - */ - EventDurationBlock(const std::string &tag); - /** - * @brief Raise a duration event with type of END - * - */ - ~EventDurationBlock(); - -private: - std::string _tag; -}; - -/** - * @brief Helper class for emitting duration event which is handled manually - * - * Usage: - * { - * ... - * EventDurationManual duration("some tag"); - * duration.begin(); - * ... - * ... // Code for duration - * ... - * duration.end(); - * } - * - */ -class EventDurationManual -{ -public: - /** - * @brief Construct a new Event Duration Manual object - * - * @param tag A label for the duration object - */ - EventDurationManual(const std::string &tag); - /** - * @brief Destroy the Event Duration Manual object - * - */ - ~EventDurationManual(); - - /** - * @brief Raise a duration event with type of BEGIN - * - */ - void begin(); - /** - * @brief Raise a duration event with type of END - * - */ - void end(); - -private: - std::string _tag; - bool _pair; -}; - -} // namespace util -} // namespace onert - -/** - * Helper Macro Definitions - * - * HOW TO USE - * - * void f(args) - * { - * EVENT_DURATION_FUNCTION(); - * ... - * if(cond) - * { - * EVENT_DURATION_REGION("if branch"); - * ... - * } - * ... - * } - */ - -#define EVENT_DURATION_FUNCTION() \ - ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ } - -#define EVENT_DURATION_REGION(tag) \ - ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag } - -#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__ diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h index 7af4c7d..3ed4087 100644 --- a/runtime/onert/core/src/util/EventRecorder.h +++ b/runtime/onert/core/src/util/EventRecorder.h @@ -27,8 +27,9 @@ struct Event { std::string name; std::string tid; - std::string ph; /* REQUIRED */ - std::string ts; /* REQUIRED */ + std::string ph; /* REQUIRED */ + std::string ts; /* REQUIRED */ + std::vector> args; // user-defined data: pairs of (key, value) }; struct DurationEvent : public Event diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc index dacb40e..8760a16 100644 --- a/runtime/onert/core/src/util/EventWriter.cc +++ b/runtime/onert/core/src/util/EventWriter.cc @@ -89,6 +89,7 @@ void fill(Content &content, const Event &evt) content.flds.emplace_back("tid", evt.tid); content.flds.emplace_back("ph", evt.ph); content.flds.emplace_back("ts", evt.ts); + content.args = evt.args; } std::string object(const DurationEvent &evt) @@ -418,40 +419,7 @@ struct MDTableBuilder } // namespace -EventWriter::EventWriter(const EventRecorder &recorder) : _recorder(recorder) -{ - // DO NOTHING -} - -void EventWriter::writeToFiles(const std::string &base_filepath) -{ - // Note. According to an internal issue, let snpe json as just file name not '.snpe.json' - writeToFile(base_filepath, WriteFormat::SNPE_BENCHMARK); - writeToFile(base_filepath + ".chrome.json", WriteFormat::CHROME_TRACING); - writeToFile(base_filepath + ".table.md", WriteFormat::MD_TABLE); -} - -void EventWriter::writeToFile(const std::string &filepath, WriteFormat write_format) -{ - std::ofstream os{filepath, std::ofstream::out}; - switch (write_format) - { - case WriteFormat::CHROME_TRACING: - writeChromeTrace(os); - break; - case WriteFormat::SNPE_BENCHMARK: - writeSNPEBenchmark(os); - break; - case WriteFormat::MD_TABLE: - writeMDTable(os); - break; - default: - assert(!"Invalid value"); - break; - } -} - -void EventWriter::writeSNPEBenchmark(std::ostream &os) +void SNPEWriter::flush(const std::vector> &recorders) { Json::Value root; auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue}; @@ -475,11 +443,14 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os) // Memory { std::unordered_map mem_stats; - for (auto &evt : _recorder.counter_events()) + for (auto &recorder : recorders) { - auto &mem_stat = mem_stats[evt.name]; - uint64_t val = std::stoull(evt.values.at("value")); - mem_stat.accumulate(val); + for (auto &evt : recorder->counter_events()) + { + auto &mem_stat = mem_stats[evt.name]; + uint64_t val = std::stoull(evt.values.at("value")); + mem_stat.accumulate(val); + } } auto &mem = exec_data["memory"] = Json::Value{Json::objectValue}; @@ -501,26 +472,29 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os) // 2D keys : stats[tid][name] std::unordered_map> stats; std::unordered_map> begin_timestamps; - for (auto &evt : _recorder.duration_events()) + for (auto &recorder : recorders) { - auto &stat = stats[evt.tid][evt.name]; - auto &begin_ts = begin_timestamps[evt.tid][evt.name]; - uint64_t timestamp = std::stoull(evt.ts); - if (evt.ph == "B") + for (auto &evt : recorder->duration_events()) { - if (begin_ts != 0) - throw std::runtime_error{"Invalid Data"}; - begin_ts = timestamp; - } - else if (evt.ph == "E") - { - if (begin_ts == 0 || timestamp < begin_ts) - throw std::runtime_error{"Invalid Data"}; - stat.accumulate(timestamp - begin_ts); - begin_ts = 0; + auto &stat = stats[evt.tid][evt.name]; + auto &begin_ts = begin_timestamps[evt.tid][evt.name]; + uint64_t timestamp = std::stoull(evt.ts); + if (evt.ph == "B") + { + if (begin_ts != 0) + throw std::runtime_error{"Invalid Data"}; + begin_ts = timestamp; + } + else if (evt.ph == "E") + { + if (begin_ts == 0 || timestamp < begin_ts) + throw std::runtime_error{"Invalid Data"}; + stat.accumulate(timestamp - begin_ts); + begin_ts = 0; + } + else + throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""}; } - else - throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""}; } for (auto &kv : begin_timestamps) @@ -545,30 +519,71 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os) } } - os << root; + _os << root; } -void EventWriter::writeChromeTrace(std::ostream &os) +void ChromeTracingWriter::flush(const std::vector> &recorders) { - os << "{\n"; - os << " " << quote("traceEvents") << ": [\n"; + _os << "{\n"; + _os << " " << quote("traceEvents") << ": [\n"; - for (auto &evt : _recorder.duration_events()) + for (auto &recorder : recorders) { - os << " " << object(evt) << ",\n"; + flushOneRecord(*recorder); } - for (auto &evt : _recorder.counter_events()) + _os << " { }\n"; + _os << " ]\n"; + _os << "}\n"; +} + +void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder) +{ + for (auto &evt : recorder.duration_events()) { - os << " " << object(evt) << ",\n"; + _os << " " << object(evt) << ",\n"; } - os << " { }\n"; - os << " ]\n"; - os << "}\n"; + for (auto &evt : recorder.counter_events()) + { + _os << " " << object(evt) << ",\n"; + } } -void EventWriter::writeMDTable(std::ostream &os) +void MDTableWriter::flush(const std::vector> &records) +{ + for (auto &recorder : records) + { + MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os); + } +} + +// initialization +std::mutex EventWriter::_mutex; + +void EventWriter::readyToFlush(std::unique_ptr &&recorder) { - MDTableBuilder(_recorder.duration_events(), _recorder.counter_events()).build().write(os); + { + std::unique_lock lock{_mutex}; + + _recorders.emplace_back(std::move(recorder)); + + if (--_ref_count > 0) + return; + } + // The caller of this method is the last instance that uses EventWriter. + // Let's write log files. + + // Note. According to an internal issue, let snpe json as just file name not '.snpe.json' + flush(WriteFormat::SNPE_BENCHMARK); + flush(WriteFormat::CHROME_TRACING); + flush(WriteFormat::MD_TABLE); +} + +void EventWriter::flush(WriteFormat write_format) +{ + auto *writer = _actual_writers[write_format].get(); + assert(writer); + + writer->flush(_recorders); } diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h index 7e838ca..0dcd00b 100644 --- a/runtime/onert/core/src/util/EventWriter.h +++ b/runtime/onert/core/src/util/EventWriter.h @@ -20,7 +20,49 @@ #include "EventRecorder.h" #include -#include +#include +#include +#include +#include + +class EventFormatWriter +{ +public: + EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {} + virtual ~EventFormatWriter() { /* empty */} + + virtual void flush(const std::vector> &) = 0; + +protected: + std::ofstream _os; +}; + +class SNPEWriter : public EventFormatWriter +{ +public: + SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */} + void flush(const std::vector> &) override; +}; + +class ChromeTracingWriter : public EventFormatWriter +{ +public: + ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */} + void flush(const std::vector> &) override; + +private: + void flushOneRecord(const EventRecorder &); +}; + +class MDTableWriter : public EventFormatWriter +{ +public: + MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */} + void flush(const std::vector> &) override; + +private: + void flushOneRecord(const EventRecorder &); +}; class EventWriter { @@ -32,20 +74,58 @@ public: MD_TABLE, }; -public: - EventWriter(const EventRecorder &recorder); + /** + * @brief Retuens a singleton object + */ + static EventWriter *get(const std::string &filename) + { + std::unique_lock lock{_mutex}; -public: - void writeToFiles(const std::string &base_filepath); - void writeToFile(const std::string &filepath, WriteFormat write_format); + static EventWriter singleton(filename); + return &singleton; + } + + /** + * @brief Call this when observer which use EventWriter starts + */ + void startToUse() + { + std::unique_lock lock{_mutex}; + _ref_count++; + } + + /** + * @brief Call this when observer which use EventWriter finishes. + * After multiple observers calls this method, the reference count will eventually be 0. + * Then, EventWriter will write profiling result file. + */ + void readyToFlush(std::unique_ptr &&recorder); private: - void writeSNPEBenchmark(std::ostream &os); - void writeChromeTrace(std::ostream &os); - void writeMDTable(std::ostream &os); + EventWriter(const std::string &filepath) : _ref_count(0) + { + std::string snpe_log_name(filepath); + std::string chrome_tracing_log_name(filepath + ".chrome.json"); + std::string md_table_log_name(filepath + ".table.md"); + + _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique(snpe_log_name); + _actual_writers[WriteFormat::CHROME_TRACING] = + std::make_unique(chrome_tracing_log_name); + _actual_writers[WriteFormat::MD_TABLE] = std::make_unique(md_table_log_name); + }; + + void flush(WriteFormat write_format); private: - const EventRecorder &_recorder; + static std::mutex _mutex; + + // number of observer of an executor that want to write profiling data + int32_t _ref_count; + + // one recorder object per executor + std::vector> _recorders; + + std::unordered_map> _actual_writers; }; #endif // __ONERT_UTIL_EVENT_WRITER_H__ diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc index 1f468a8..3ed3080 100644 --- a/runtime/onert/core/src/util/ShapeInference.cc +++ b/runtime/onert/core/src/util/ShapeInference.cc @@ -128,11 +128,11 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha return broadcastShapes(lhs_shape, rhs_shape); } -ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank) +ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank) { if (axis < 0 || axis >= rank) { - throw std::runtime_error("ArgMax shape inference: Wrong axis value " + std::to_string(axis)); + throw std::runtime_error("ArgMinMax shape inference: Wrong axis value " + std::to_string(axis)); } ir::Shape out_shape; @@ -385,18 +385,22 @@ ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis) return out_shape; } -ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf) +template ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf) { - ir::Shape out_shape(in_shape.dim(0)); + ir::Shape out_shape(fill_shape.dim(0)); for (int out_x = 0; out_x < out_shape.rank(); ++out_x) { - out_shape.dim(out_x) = in_buf[out_x]; + out_shape.dim(out_x) = static_cast(shape_buf[out_x]); } return out_shape; } +// template instantiation +template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int32_t *shape_buf); +template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int64_t *shape_buf); + ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape) { assert(in_shape.rank() >= 2); diff --git a/runtime/onert/core/src/util/TracingCtx.cc b/runtime/onert/core/src/util/TracingCtx.cc new file mode 100644 index 0000000..08a1b32 --- /dev/null +++ b/runtime/onert/core/src/util/TracingCtx.cc @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util/TracingCtx.h" + +namespace onert +{ +namespace util +{ + +// initializing static member var +std::mutex TracingCtx::_session_id_mutex; + +} // namespace util +} // namespace onert diff --git a/runtime/onert/frontend/.clang-format b/runtime/onert/frontend/.clang-format new file mode 120000 index 0000000..83185fe --- /dev/null +++ b/runtime/onert/frontend/.clang-format @@ -0,0 +1 @@ +../../../.clang-format.8 \ No newline at end of file diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h index c0003e4..f9c97b4 100644 --- a/runtime/onert/frontend/base_loader/include/base_loader.h +++ b/runtime/onert/frontend/base_loader/include/base_loader.h @@ -68,7 +68,7 @@ public: * @param graph reference on subgraphs */ explicit BaseLoader(std::unique_ptr &subgs) - : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr} + : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr} { _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA); } @@ -114,23 +114,19 @@ private: // Operations template const OpIR *loadOperationTo(const Operator *op, ir::Graph &subg, Args &&... args); - void loadConv2D(const Operator *op, ir::Graph &subg); - void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg); - void loadTransposeConv(const Operator *op, ir::Graph &subg); - void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type); - void loadReshape(const Operator *op, ir::Graph &subg); - void loadSoftmax(const Operator *op, ir::Graph &subg); - void loadConcatenation(const Operator *op, ir::Graph &subg); - void loadFC(const Operator *op, ir::Graph &subg); + + void loadAddV2(const Operator *op, ir::Graph &subg); + void loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax); + void loadBatchMatMul(const Operator *op, ir::Graph &subg); void loadBinaryArithmetic(const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type); - void loadAddV2(const Operator *op, ir::Graph &subg); - void loadPack(const Operator *op, ir::Graph &subg); - void loadResizeBilinear(const Operator *op, ir::Graph &subg); - void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg); - void loadReduce(const Operator *op, ir::Graph &subg, - ir::operation::Reduce::ReduceType reduce_type); - void loadReduceAll(const Operator *op, ir::Graph &subg); + void loadComparison(const Operator *op, ir::Graph &subg); + void loadConcatenation(const Operator *op, ir::Graph &subg); + void loadConv2D(const Operator *op, ir::Graph &subg); + void loadCustom(const Operator *op, ir::Graph &subg); + void loadDepthToSpace(const Operator *op, ir::Graph &subg); + void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg); + void loadEinsum(const Operator *op, ir::Graph &subg); void loadElementwiseActivation(const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type, float alpha = 0.f, float beta = 0.f); @@ -138,25 +134,31 @@ private: ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type); void loadElementwiseUnary(const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type); + void loadFC(const Operator *op, ir::Graph &subg); + void loadFusedBatchNorm(const Operator *op, ir::Graph &subg); void loadGather(const Operator *op, ir::Graph &subg); - void loadCustom(const Operator *op, ir::Graph &subg); - void loadBatchMatMul(const Operator *op, ir::Graph &subg); - void loadSqueeze(const Operator *op, ir::Graph &subg); + void loadIf(const Operator *op, ir::Graph &subg); + void loadLeakyRelu(const Operator *op, ir::Graph &subg); + void loadLogSoftmax(const Operator *op, ir::Graph &subg); + void loadOneHot(const Operator *op, ir::Graph &subg); + void loadPack(const Operator *op, ir::Graph &subg); + void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type); + void loadReduce(const Operator *op, ir::Graph &subg, + ir::operation::Reduce::ReduceType reduce_type); + void loadReduceAll(const Operator *op, ir::Graph &subg); + void loadReshape(const Operator *op, ir::Graph &subg); + void loadResizeBilinear(const Operator *op, ir::Graph &subg); + void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg); + void loadSoftmax(const Operator *op, ir::Graph &subg); + void loadSpaceToDepth(const Operator *op, ir::Graph &subg); void loadSplit(const Operator *op, ir::Graph &subg); void loadSplitV(const Operator *op, ir::Graph &subg); + void loadSqueeze(const Operator *op, ir::Graph &subg); void loadStridedSlice(const Operator *op, ir::Graph &subg); + void loadTransposeConv(const Operator *op, ir::Graph &subg); + void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg); void loadUnpack(const Operator *op, ir::Graph &subg); - void loadComparison(const Operator *op, ir::Graph &subg); - void loadEinsum(const Operator *op, ir::Graph &subg); - void loadOneHot(const Operator *op, ir::Graph &subg); - void loadIf(const Operator *op, ir::Graph &subg); void loadWhile(const Operator *op, ir::Graph &subg); - void loadArgMax(const Operator *op, ir::Graph &subg); - void loadFusedBatchNorm(const Operator *op, ir::Graph &subg); - void loadLogSoftmax(const Operator *op, ir::Graph &subg); - void loadSpaceToDepth(const Operator *op, ir::Graph &subg); - void loadLeakyRelu(const Operator *op, ir::Graph &subg); - void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg); void verifySubgraphIndex(int subg_index) { @@ -255,19 +257,26 @@ ir::DataType BaseLoader::BaseLoader::tensorTypeToDataType(const Te { case TensorType::TensorType_FLOAT32: return ir::DataType::FLOAT32; + case TensorType::TensorType_FLOAT16: + return ir::DataType::FLOAT16; case TensorType::TensorType_INT32: return ir::DataType::INT32; - case TensorType::TensorType_BOOL: - return ir::DataType::BOOL8; case TensorType::TensorType_UINT8: return ir::DataType::QUANT_UINT8_ASYMM; - case TensorType::TensorType_INT8: - return ir::DataType::QUANT_INT8_ASYMM; case TensorType::TensorType_INT64: return ir::DataType::INT64; + // case TensorType::TensorType_STRING: + case TensorType::TensorType_BOOL: + return ir::DataType::BOOL8; + case TensorType::TensorType_INT16: + return ir::DataType::QUANT_INT16_ASYMM; + // case TensorType::TensorType_COMPLEX64 + case TensorType::TensorType_INT8: + return ir::DataType::QUANT_INT8_ASYMM; + // case TensorType::TensorType_FLOAT64 default: throw std::runtime_error( - std::string("Unsupported tensor type: ").append(EnumNameTensorType(type))); + std::string("Unsupported tensor type: ").append(EnumNameTensorType(type))); } } @@ -385,7 +394,7 @@ ir::OperandIndex BaseLoader::loadOperand(const Tensor *tensor, ir: { size_t offset = unaligned_offset_start - aligned_offset_start; uint8_t *mmap_base = static_cast( - mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start)); + mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start)); data_obj = std::make_unique(mmap_base + offset, data_size); munmap(mmap_base, mmap_size); } @@ -446,7 +455,7 @@ void BaseLoader::loadSparsity(const Tensor *tensor, const ir::Shap bool block2D_sparsity = dim_metadata_size == 4 && block_rank == 2; if (dim_metadata_size != !random_sparsity && !block2D_sparsity) throw std::runtime_error( - "sparsity is supported only for 2D tensor with random or 16x1 block sparsity."); + "sparsity is supported only for 2D tensor with random or 16x1 block sparsity."); const auto *src_metadata = src_sparsity->dim_metadata()->Get(0); if (src_metadata->format() != DimensionType::DimensionType_DENSE) @@ -514,8 +523,8 @@ void BaseLoader::loadOperationIO(const Operator *op, ir::OperandIn auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code)) throw std::runtime_error( - std::string("loader doesn't support optional input tensor yet for ") - .append(EnumNameBuiltinOperator(builtin_code))); + std::string("loader doesn't support optional input tensor yet for ") + .append(EnumNameBuiltinOperator(builtin_code))); }; check_optional_input(); inputs.append(tensorIdxToOperandIdx(idx)); @@ -691,9 +700,9 @@ void BaseLoader::loadFC(const Operator *op, ir::Graph &subg) const auto fc = loadOperationTo(op, subg, param); const auto &input_operand = - subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT)); + subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT)); auto &weights_operand = - subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT)); + subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT)); if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 && ((weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) || weights_operand.typeInfo().type() == ir::DataType::QUANT_INT8_ASYMM)) @@ -719,7 +728,7 @@ void BaseLoader::loadAddV2(const Operator *op, ir::Graph &subg) auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); auto attr_map = data_root.AsMap(); const auto fused_activation_func = static_cast( - attr_map["fused_activation_function"].AsInt8()); + attr_map["fused_activation_function"].AsInt8()); param.activation = convertActivation(fused_activation_func); } @@ -727,8 +736,18 @@ void BaseLoader::loadAddV2(const Operator *op, ir::Graph &subg) } template +void BaseLoader::loadDepthToSpace(const Operator *op, ir::Graph &subg) +{ + ir::operation::DepthToSpace::Param param; + const auto *options = op->builtin_options_as_DepthToSpaceOptions(); + param.block_size = options->block_size(); + + loadOperationTo(op, subg, param); +} + +template void BaseLoader::loadBinaryArithmetic( - const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type) + const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type) { ir::operation::BinaryArithmetic::Param param; param.arithmetic_type = op_type; @@ -780,8 +799,8 @@ void BaseLoader::loadPack(const Operator *op, ir::Graph &subg) template void BaseLoader::loadElementwiseActivation( - const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type, - float alpha, float beta) + const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type, + float alpha, float beta) { ir::operation::ElementwiseActivation::Param param; param.op_type = op_type; @@ -844,8 +863,8 @@ void BaseLoader::loadReduceAll(const Operator *op, ir::Graph &subg template void BaseLoader::loadElementwiseBinary( - const Operator *op, ir::Graph &subg, - ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) + const Operator *op, ir::Graph &subg, + ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) { ir::operation::ElementwiseBinary::Param param; param.op_type = op_type; @@ -870,7 +889,7 @@ void BaseLoader::loadElementwiseUnary(const Operator *op, ir::Grap } }; qasymm8ToUint8( - subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT))); + subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT))); qasymm8ToUint8(subg.operands().at(eu->getOutputs().at(0))); } } @@ -915,8 +934,8 @@ void BaseLoader::loadBatchMatMul(const Operator *op, ir::Graph &su break; default: throw std::runtime_error( - std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) + - " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL)); + std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) + + " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL)); } loadOperationTo(op, subg, param); @@ -959,15 +978,15 @@ void BaseLoader::loadCustom(const Operator *op, ir::Graph &subg) // Mapping from custom op name string to BuiltinOP enum std::map builtin_map = { - {"AddV2", BuiltinOP::AddV2}, - {"All", BuiltinOP::ReduceAll}, - {"MatrixBandPart", BuiltinOP::MatrixBandPart}, - {"BatchMatMulV2", BuiltinOP::BatchMatMul}, - {"Einsum", BuiltinOP::Einsum}, - {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm}, - {"BroadcastTo", BuiltinOP::BroadcastTo}, - {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform}, - {"Erf", BuiltinOP::Erf}, + {"AddV2", BuiltinOP::AddV2}, + {"All", BuiltinOP::ReduceAll}, + {"MatrixBandPart", BuiltinOP::MatrixBandPart}, + {"BatchMatMulV2", BuiltinOP::BatchMatMul}, + {"Einsum", BuiltinOP::Einsum}, + {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm}, + {"BroadcastTo", BuiltinOP::BroadcastTo}, + {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform}, + {"Erf", BuiltinOP::Erf}, }; try @@ -1005,7 +1024,7 @@ void BaseLoader::loadCustom(const Operator *op, ir::Graph &subg) break; default: throw std::runtime_error{ - "Loader: Custom OP map is defined but operation loader function is not defined"}; + "Loader: Custom OP map is defined but operation loader function is not defined"}; } return; @@ -1120,7 +1139,7 @@ void BaseLoader::loadComparison(const Operator *op, ir::Graph &sub break; default: throw std::runtime_error( - std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); + std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); } loadOperationTo(op, subg, param); @@ -1224,25 +1243,15 @@ void BaseLoader::loadWhile(const Operator *op, ir::Graph &subg) } template -void BaseLoader::loadArgMax(const Operator *op, ir::Graph &subg) +void BaseLoader::loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax) { - ir::operation::ArgMax::Param param; - const auto output_type = op->builtin_options_as_ArgMaxOptions()->output_type(); - switch (output_type) - { - case TensorType::TensorType_INT32: - case TensorType::TensorType_INT64: - param.output_type = tensorTypeToDataType(output_type); - break; - default: - throw std::runtime_error("ArgMax: `output_type` must be either int32 or int64."); - } - auto am = loadOperationTo(op, subg, param); + ir::operation::ArgMinMax::Param param; + const auto output_type = is_argmax ? op->builtin_options_as_ArgMaxOptions()->output_type() + : op->builtin_options_as_ArgMinOptions()->output_type(); + param.output_type = tensorTypeToDataType(output_type); + param.is_arg_max = is_argmax; - auto &axisOperand = subg.operands().at(am->getInputs().at(ir::operation::ArgMax::Input::AXIS)); - if (!(axisOperand.operandSize() == 4 && (axisOperand.typeInfo().type() == ir::DataType::INT32 || - axisOperand.typeInfo().type() == ir::DataType::INT64))) - throw std::runtime_error("ArgMax: `axis` with an int32 or int64 element is only supported."); + loadOperationTo(op, subg, param); } template @@ -1287,7 +1296,7 @@ void BaseLoader::loadUnidirectionalSequenceLSTM(const Operator *op { auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ") - .append(EnumNameBuiltinOperator(builtin_code))); + .append(EnumNameBuiltinOperator(builtin_code))); } for (size_t i = 0; i < ir::operation::LSTM::Output::OUTPUT; ++i) { @@ -1355,6 +1364,9 @@ void BaseLoader::loadOperation(const Operator *op, ir::Graph &subg case BuiltinOperator::BuiltinOperator_PACK: loadPack(op, subg); return; + case BuiltinOperator::BuiltinOperator_ELU: + loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::ELU); + return; case BuiltinOperator::BuiltinOperator_RELU: loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, ir::operation::ElementwiseActivation::infinity, 0.f); @@ -1383,6 +1395,9 @@ void BaseLoader::loadOperation(const Operator *op, ir::Graph &subg case BuiltinOperator::BuiltinOperator_SQRT: loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT); return; + case BuiltinOperator::BuiltinOperator_SQUARE: + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQUARE); + return; case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE: loadOperationTo(op, subg); return; @@ -1499,7 +1514,10 @@ void BaseLoader::loadOperation(const Operator *op, ir::Graph &subg loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG); return; case BuiltinOperator::BuiltinOperator_ARG_MAX: - loadArgMax(op, subg); + loadArgMinMax(op, subg, true); + return; + case BuiltinOperator::BuiltinOperator_ARG_MIN: + loadArgMinMax(op, subg, false); return; case BuiltinOperator::BuiltinOperator_LOG: loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG); @@ -1513,6 +1531,10 @@ void BaseLoader::loadOperation(const Operator *op, ir::Graph &subg case BuiltinOperator::BuiltinOperator_LOGICAL_NOT: loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT); return; + case BuiltinOperator::BuiltinOperator_LOGICAL_AND: + loadElementwiseBinary(op, subg, + ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND); + return; case BuiltinOperator::BuiltinOperator_LOGICAL_OR: loadElementwiseBinary(op, subg, ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR); @@ -1556,9 +1578,12 @@ void BaseLoader::loadOperation(const Operator *op, ir::Graph &subg case BuiltinOperator::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: loadUnidirectionalSequenceLSTM(op, subg); return; + case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE: + loadDepthToSpace(op, subg); + return; default: throw std::runtime_error( - std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); + std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); } } diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc index 33e1709..0d7b3ea 100644 --- a/runtime/onert/frontend/circle/src/circle_loader.cc +++ b/runtime/onert/frontend/circle/src/circle_loader.cc @@ -196,7 +196,7 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg) param.activation = convertActivation(options->fused_activation_function()); std::unique_ptr new_op( - new ir::operation::BCQFullyConnected(inputs, outputs, param)); + new ir::operation::BCQFullyConnected(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } diff --git a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h index 0ff1f72..eb17752 100644 --- a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h +++ b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h @@ -2155,9 +2155,8 @@ enum ActivationFunctionType inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] { static const ActivationFunctionType values[] = { - ActivationFunctionType_NONE, ActivationFunctionType_RELU, - ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6, - ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; + ActivationFunctionType_NONE, ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1, + ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; return values; } @@ -2218,9 +2217,8 @@ enum FullyConnectedOptionsWeightsFormat inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[3] { static const FullyConnectedOptionsWeightsFormat values[] = { - FullyConnectedOptionsWeightsFormat_DEFAULT, - FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8, - FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32}; + FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8, + FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32}; return values; } @@ -2478,8 +2476,8 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab const circle::CustomQuantization *details_as_CustomQuantization() const { return details_type() == circle::QuantizationDetails_CustomQuantization - ? static_cast(details()) - : nullptr; + ? static_cast(details()) + : nullptr; } int32_t quantized_dimension() const { return GetField(VT_QUANTIZED_DIMENSION, 0); } bool Verify(flatbuffers::Verifier &verifier) const @@ -2551,12 +2549,12 @@ struct QuantizationParametersBuilder }; inline flatbuffers::Offset CreateQuantizationParameters( - flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset> min = 0, - flatbuffers::Offset> max = 0, - flatbuffers::Offset> scale = 0, - flatbuffers::Offset> zero_point = 0, - circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE, - flatbuffers::Offset details = 0, int32_t quantized_dimension = 0) + flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset> min = 0, + flatbuffers::Offset> max = 0, + flatbuffers::Offset> scale = 0, + flatbuffers::Offset> zero_point = 0, + circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE, + flatbuffers::Offset details = 0, int32_t quantized_dimension = 0) { QuantizationParametersBuilder builder_(_fbb); builder_.add_quantized_dimension(quantized_dimension); @@ -2570,11 +2568,11 @@ inline flatbuffers::Offset CreateQuantizationParameters( } inline flatbuffers::Offset CreateQuantizationParametersDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector *min = nullptr, - const std::vector *max = nullptr, const std::vector *scale = nullptr, - const std::vector *zero_point = nullptr, - circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE, - flatbuffers::Offset details = 0, int32_t quantized_dimension = 0) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector *min = nullptr, + const std::vector *max = nullptr, const std::vector *scale = nullptr, + const std::vector *zero_point = nullptr, + circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE, + flatbuffers::Offset details = 0, int32_t quantized_dimension = 0) { auto min__ = min ? _fbb.CreateVector(*min) : 0; auto max__ = max ? _fbb.CreateVector(*max) : 0; @@ -2789,20 +2787,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const circle::Int32Vector *array_segments_as_Int32Vector() const { return array_segments_type() == circle::SparseIndexVector_Int32Vector - ? static_cast(array_segments()) - : nullptr; + ? static_cast(array_segments()) + : nullptr; } const circle::Uint16Vector *array_segments_as_Uint16Vector() const { return array_segments_type() == circle::SparseIndexVector_Uint16Vector - ? static_cast(array_segments()) - : nullptr; + ? static_cast(array_segments()) + : nullptr; } const circle::Uint8Vector *array_segments_as_Uint8Vector() const { return array_segments_type() == circle::SparseIndexVector_Uint8Vector - ? static_cast(array_segments()) - : nullptr; + ? static_cast(array_segments()) + : nullptr; } circle::SparseIndexVector array_indices_type() const { @@ -2813,20 +2811,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const circle::Int32Vector *array_indices_as_Int32Vector() const { return array_indices_type() == circle::SparseIndexVector_Int32Vector - ? static_cast(array_indices()) - : nullptr; + ? static_cast(array_indices()) + : nullptr; } const circle::Uint16Vector *array_indices_as_Uint16Vector() const { return array_indices_type() == circle::SparseIndexVector_Uint16Vector - ? static_cast(array_indices()) - : nullptr; + ? static_cast(array_indices()) + : nullptr; } const circle::Uint8Vector *array_indices_as_Uint8Vector() const { return array_indices_type() == circle::SparseIndexVector_Uint8Vector - ? static_cast(array_indices()) - : nullptr; + ? static_cast(array_indices()) + : nullptr; } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2924,12 +2922,12 @@ struct DimensionMetadataBuilder }; inline flatbuffers::Offset CreateDimensionMetadata( - flatbuffers::FlatBufferBuilder &_fbb, - circle::DimensionType format = circle::DimensionType_DENSE, int32_t dense_size = 0, - circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE, - flatbuffers::Offset array_segments = 0, - circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE, - flatbuffers::Offset array_indices = 0) + flatbuffers::FlatBufferBuilder &_fbb, circle::DimensionType format = circle::DimensionType_DENSE, + int32_t dense_size = 0, + circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE, + flatbuffers::Offset array_segments = 0, + circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE, + flatbuffers::Offset array_indices = 0) { DimensionMetadataBuilder builder_(_fbb); builder_.add_array_indices(array_indices); @@ -2961,7 +2959,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector> *dim_metadata() const { return GetPointer> *>( - VT_DIM_METADATA); + VT_DIM_METADATA); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2987,8 +2985,8 @@ struct SparsityParametersBuilder fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map); } void add_dim_metadata( - flatbuffers::Offset>> - dim_metadata) + flatbuffers::Offset>> + dim_metadata) { fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata); } @@ -3006,11 +3004,11 @@ struct SparsityParametersBuilder }; inline flatbuffers::Offset CreateSparsityParameters( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset> traversal_order = 0, - flatbuffers::Offset> block_map = 0, - flatbuffers::Offset>> - dim_metadata = 0) + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> traversal_order = 0, + flatbuffers::Offset> block_map = 0, + flatbuffers::Offset>> + dim_metadata = 0) { SparsityParametersBuilder builder_(_fbb); builder_.add_dim_metadata(dim_metadata); @@ -3020,16 +3018,15 @@ inline flatbuffers::Offset CreateSparsityParameters( } inline flatbuffers::Offset CreateSparsityParametersDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector *traversal_order = nullptr, - const std::vector *block_map = nullptr, - const std::vector> *dim_metadata = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector *traversal_order = nullptr, + const std::vector *block_map = nullptr, + const std::vector> *dim_metadata = nullptr) { auto traversal_order__ = traversal_order ? _fbb.CreateVector(*traversal_order) : 0; auto block_map__ = block_map ? _fbb.CreateVector(*block_map) : 0; auto dim_metadata__ = - dim_metadata - ? _fbb.CreateVector>(*dim_metadata) - : 0; + dim_metadata ? _fbb.CreateVector>(*dim_metadata) + : 0; return circle::CreateSparsityParameters(_fbb, traversal_order__, block_map__, dim_metadata__); } @@ -3155,12 +3152,11 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, } inline flatbuffers::Offset CreateTensorDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector *shape = nullptr, - circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0, - const char *name = nullptr, - flatbuffers::Offset quantization = 0, bool is_variable = false, - flatbuffers::Offset sparsity = 0, - const std::vector *shape_signature = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector *shape = nullptr, + circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0, + const char *name = nullptr, flatbuffers::Offset quantization = 0, + bool is_variable = false, flatbuffers::Offset sparsity = 0, + const std::vector *shape_signature = nullptr) { auto shape__ = shape ? _fbb.CreateVector(*shape) : 0; auto name__ = name ? _fbb.CreateString(name) : 0; @@ -3190,7 +3186,7 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } int32_t dilation_w_factor() const { return GetField(VT_DILATION_W_FACTOR, 1); } int32_t dilation_h_factor() const { return GetField(VT_DILATION_H_FACTOR, 1); } @@ -3249,10 +3245,10 @@ struct Conv2DOptionsBuilder }; inline flatbuffers::Offset CreateConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) + flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) { Conv2DOptionsBuilder builder_(_fbb); builder_.add_dilation_h_factor(dilation_h_factor); @@ -3287,7 +3283,7 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -3344,9 +3340,9 @@ struct Pool2DOptionsBuilder }; inline flatbuffers::Offset CreatePool2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { Pool2DOptionsBuilder builder_(_fbb); builder_.add_filter_height(filter_height); @@ -3381,7 +3377,7 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } int32_t dilation_w_factor() const { return GetField(VT_DILATION_W_FACTOR, 1); } int32_t dilation_h_factor() const { return GetField(VT_DILATION_H_FACTOR, 1); } @@ -3445,10 +3441,10 @@ struct DepthwiseConv2DOptionsBuilder }; inline flatbuffers::Offset CreateDepthwiseConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) + flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME, + int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) { DepthwiseConv2DOptionsBuilder builder_(_fbb); builder_.add_dilation_h_factor(dilation_h_factor); @@ -3499,12 +3495,12 @@ struct ConcatEmbeddingsOptionsBuilder fbb_.AddElement(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); } void add_num_columns_per_channel( - flatbuffers::Offset> num_columns_per_channel) + flatbuffers::Offset> num_columns_per_channel) { fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); } void add_embedding_dim_per_channel( - flatbuffers::Offset> embedding_dim_per_channel) + flatbuffers::Offset> embedding_dim_per_channel) { fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel); @@ -3523,9 +3519,9 @@ struct ConcatEmbeddingsOptionsBuilder }; inline flatbuffers::Offset CreateConcatEmbeddingsOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, - flatbuffers::Offset> num_columns_per_channel = 0, - flatbuffers::Offset> embedding_dim_per_channel = 0) + flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, + flatbuffers::Offset> num_columns_per_channel = 0, + flatbuffers::Offset> embedding_dim_per_channel = 0) { ConcatEmbeddingsOptionsBuilder builder_(_fbb); builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); @@ -3540,9 +3536,9 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_ const std::vector *embedding_dim_per_channel = nullptr) { auto num_columns_per_channel__ = - num_columns_per_channel ? _fbb.CreateVector(*num_columns_per_channel) : 0; + num_columns_per_channel ? _fbb.CreateVector(*num_columns_per_channel) : 0; auto embedding_dim_per_channel__ = - embedding_dim_per_channel ? _fbb.CreateVector(*embedding_dim_per_channel) : 0; + embedding_dim_per_channel ? _fbb.CreateVector(*embedding_dim_per_channel) : 0; return circle::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__, embedding_dim_per_channel__); } @@ -3609,7 +3605,7 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3653,9 +3649,9 @@ struct SVDFOptionsBuilder }; inline flatbuffers::Offset CreateSVDFOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { SVDFOptionsBuilder builder_(_fbb); builder_.add_rank(rank); @@ -3675,7 +3671,7 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3718,9 +3714,9 @@ struct RNNOptionsBuilder }; inline flatbuffers::Offset CreateRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { RNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3741,7 +3737,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool asymmetric_quantize_inputs() const { @@ -3789,9 +3785,9 @@ struct SequenceRNNOptionsBuilder }; inline flatbuffers::Offset CreateSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { SequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3814,7 +3810,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool merge_outputs() const { return GetField(VT_MERGE_OUTPUTS, 0) != 0; } bool asymmetric_quantize_inputs() const @@ -3869,9 +3865,9 @@ struct BidirectionalSequenceRNNOptionsBuilder }; inline flatbuffers::Offset CreateBidirectionalSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - bool merge_outputs = false, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + bool merge_outputs = false, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3894,12 +3890,12 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } circle::FullyConnectedOptionsWeightsFormat weights_format() const { return static_cast( - GetField(VT_WEIGHTS_FORMAT, 0)); + GetField(VT_WEIGHTS_FORMAT, 0)); } bool keep_num_dims() const { return GetField(VT_KEEP_NUM_DIMS, 0) != 0; } bool asymmetric_quantize_inputs() const @@ -3955,11 +3951,11 @@ struct FullyConnectedOptionsBuilder }; inline flatbuffers::Offset CreateFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - circle::FullyConnectedOptionsWeightsFormat weights_format = - circle::FullyConnectedOptionsWeightsFormat_DEFAULT, - bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + circle::FullyConnectedOptionsWeightsFormat weights_format = + circle::FullyConnectedOptionsWeightsFormat_DEFAULT, + bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) { FullyConnectedOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -4023,7 +4019,7 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4057,8 +4053,8 @@ struct ConcatenationOptionsBuilder }; inline flatbuffers::Offset CreateConcatenationOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { ConcatenationOptionsBuilder builder_(_fbb); builder_.add_axis(axis); @@ -4076,7 +4072,7 @@ struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4109,8 +4105,8 @@ struct AddOptionsBuilder }; inline flatbuffers::Offset CreateAddOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { AddOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -4127,7 +4123,7 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4160,8 +4156,8 @@ struct MulOptionsBuilder }; inline flatbuffers::Offset CreateMulOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { MulOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -4178,7 +4174,7 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -4211,8 +4207,8 @@ struct L2NormOptionsBuilder }; inline flatbuffers::Offset CreateL2NormOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { L2NormOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -4263,7 +4259,7 @@ struct LocalResponseNormalizationOptionsBuilder fbb_.AddElement(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); } explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -4303,7 +4299,7 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField(VT_PROJ_CLIP, 0.0f); } @@ -4367,11 +4363,11 @@ struct LSTMOptionsBuilder }; inline flatbuffers::Offset CreateLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, - circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, + circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL, + bool asymmetric_quantize_inputs = false) { LSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -4396,7 +4392,7 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField(VT_PROJ_CLIP, 0.0f); } @@ -4445,7 +4441,7 @@ struct UnidirectionalSequenceLSTMOptionsBuilder static_cast(asymmetric_quantize_inputs), 0); } explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -4461,10 +4457,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder inline flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, + bool asymmetric_quantize_inputs = false) { UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -4490,7 +4486,7 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } float cell_clip() const { return GetField(VT_CELL_CLIP, 0.0f); } float proj_clip() const { return GetField(VT_PROJ_CLIP, 0.0f); } @@ -4546,7 +4542,7 @@ struct BidirectionalSequenceLSTMOptionsBuilder static_cast(asymmetric_quantize_inputs), 0); } explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -4561,10 +4557,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder }; inline flatbuffers::Offset CreateBidirectionalSequenceLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, - bool time_major = true, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, + bool time_major = true, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -5075,7 +5071,7 @@ struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -5108,8 +5104,8 @@ struct SubOptionsBuilder }; inline flatbuffers::Offset CreateSubOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { SubOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -5126,7 +5122,7 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -5159,8 +5155,8 @@ struct DivOptionsBuilder }; inline flatbuffers::Offset CreateDivOptions( - flatbuffers::FlatBufferBuilder &_fbb, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { DivOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); @@ -7976,7 +7972,7 @@ struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -8014,8 +8010,8 @@ struct BCQFullyConnectedOptionsBuilder }; inline flatbuffers::Offset CreateBCQFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { BCQFullyConnectedOptionsBuilder builder_(_fbb); builder_.add_weights_hidden_size(weights_hidden_size); @@ -8035,7 +8031,7 @@ struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table circle::ActivationFunctionType fused_activation_function() const { return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -8072,8 +8068,8 @@ struct InstanceNormOptionsBuilder }; inline flatbuffers::Offset CreateInstanceNormOptions( - flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f, - circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f, + circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE) { InstanceNormOptionsBuilder builder_(_fbb); builder_.add_epsilon(epsilon); @@ -8191,632 +8187,632 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const circle::Conv2DOptions *builtin_options_as_Conv2DOptions() const { return builtin_options_type() == circle::BuiltinOptions_Conv2DOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { return builtin_options_type() == circle::BuiltinOptions_DepthwiseConv2DOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { return builtin_options_type() == circle::BuiltinOptions_ConcatEmbeddingsOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { return builtin_options_type() == circle::BuiltinOptions_LSHProjectionOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::Pool2DOptions *builtin_options_as_Pool2DOptions() const { return builtin_options_type() == circle::BuiltinOptions_Pool2DOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SVDFOptions *builtin_options_as_SVDFOptions() const { return builtin_options_type() == circle::BuiltinOptions_SVDFOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::RNNOptions *builtin_options_as_RNNOptions() const { return builtin_options_type() == circle::BuiltinOptions_RNNOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { return builtin_options_type() == circle::BuiltinOptions_FullyConnectedOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { return builtin_options_type() == circle::BuiltinOptions_SoftmaxOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { return builtin_options_type() == circle::BuiltinOptions_ConcatenationOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::AddOptions *builtin_options_as_AddOptions() const { return builtin_options_type() == circle::BuiltinOptions_AddOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::L2NormOptions *builtin_options_as_L2NormOptions() const { return builtin_options_type() == circle::BuiltinOptions_L2NormOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LocalResponseNormalizationOptions * builtin_options_as_LocalResponseNormalizationOptions() const { return builtin_options_type() == circle::BuiltinOptions_LocalResponseNormalizationOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LSTMOptions *builtin_options_as_LSTMOptions() const { return builtin_options_type() == circle::BuiltinOptions_LSTMOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { return builtin_options_type() == circle::BuiltinOptions_ResizeBilinearOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::CallOptions *builtin_options_as_CallOptions() const { return builtin_options_type() == circle::BuiltinOptions_CallOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ReshapeOptions *builtin_options_as_ReshapeOptions() const { return builtin_options_type() == circle::BuiltinOptions_ReshapeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SkipGramOptions *builtin_options_as_SkipGramOptions() const { return builtin_options_type() == circle::BuiltinOptions_SkipGramOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { return builtin_options_type() == circle::BuiltinOptions_SpaceToDepthOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::EmbeddingLookupSparseOptions * builtin_options_as_EmbeddingLookupSparseOptions() const { return builtin_options_type() == circle::BuiltinOptions_EmbeddingLookupSparseOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::MulOptions *builtin_options_as_MulOptions() const { return builtin_options_type() == circle::BuiltinOptions_MulOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::PadOptions *builtin_options_as_PadOptions() const { return builtin_options_type() == circle::BuiltinOptions_PadOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::GatherOptions *builtin_options_as_GatherOptions() const { return builtin_options_type() == circle::BuiltinOptions_GatherOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const { return builtin_options_type() == circle::BuiltinOptions_BatchToSpaceNDOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const { return builtin_options_type() == circle::BuiltinOptions_SpaceToBatchNDOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::TransposeOptions *builtin_options_as_TransposeOptions() const { return builtin_options_type() == circle::BuiltinOptions_TransposeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ReducerOptions *builtin_options_as_ReducerOptions() const { return builtin_options_type() == circle::BuiltinOptions_ReducerOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SubOptions *builtin_options_as_SubOptions() const { return builtin_options_type() == circle::BuiltinOptions_SubOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::DivOptions *builtin_options_as_DivOptions() const { return builtin_options_type() == circle::BuiltinOptions_DivOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SqueezeOptions *builtin_options_as_SqueezeOptions() const { return builtin_options_type() == circle::BuiltinOptions_SqueezeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const { return builtin_options_type() == circle::BuiltinOptions_SequenceRNNOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const { return builtin_options_type() == circle::BuiltinOptions_StridedSliceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ExpOptions *builtin_options_as_ExpOptions() const { return builtin_options_type() == circle::BuiltinOptions_ExpOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::TopKV2Options *builtin_options_as_TopKV2Options() const { return builtin_options_type() == circle::BuiltinOptions_TopKV2Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SplitOptions *builtin_options_as_SplitOptions() const { return builtin_options_type() == circle::BuiltinOptions_SplitOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const { return builtin_options_type() == circle::BuiltinOptions_LogSoftmaxOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::CastOptions *builtin_options_as_CastOptions() const { return builtin_options_type() == circle::BuiltinOptions_CastOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::DequantizeOptions *builtin_options_as_DequantizeOptions() const { return builtin_options_type() == circle::BuiltinOptions_DequantizeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const { return builtin_options_type() == circle::BuiltinOptions_MaximumMinimumOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const { return builtin_options_type() == circle::BuiltinOptions_ArgMaxOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LessOptions *builtin_options_as_LessOptions() const { return builtin_options_type() == circle::BuiltinOptions_LessOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::NegOptions *builtin_options_as_NegOptions() const { return builtin_options_type() == circle::BuiltinOptions_NegOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::PadV2Options *builtin_options_as_PadV2Options() const { return builtin_options_type() == circle::BuiltinOptions_PadV2Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::GreaterOptions *builtin_options_as_GreaterOptions() const { return builtin_options_type() == circle::BuiltinOptions_GreaterOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const { return builtin_options_type() == circle::BuiltinOptions_GreaterEqualOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LessEqualOptions *builtin_options_as_LessEqualOptions() const { return builtin_options_type() == circle::BuiltinOptions_LessEqualOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SelectOptions *builtin_options_as_SelectOptions() const { return builtin_options_type() == circle::BuiltinOptions_SelectOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SliceOptions *builtin_options_as_SliceOptions() const { return builtin_options_type() == circle::BuiltinOptions_SliceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const { return builtin_options_type() == circle::BuiltinOptions_TransposeConvOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const { return builtin_options_type() == circle::BuiltinOptions_SparseToDenseOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::TileOptions *builtin_options_as_TileOptions() const { return builtin_options_type() == circle::BuiltinOptions_TileOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const { return builtin_options_type() == circle::BuiltinOptions_ExpandDimsOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::EqualOptions *builtin_options_as_EqualOptions() const { return builtin_options_type() == circle::BuiltinOptions_EqualOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::NotEqualOptions *builtin_options_as_NotEqualOptions() const { return builtin_options_type() == circle::BuiltinOptions_NotEqualOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ShapeOptions *builtin_options_as_ShapeOptions() const { return builtin_options_type() == circle::BuiltinOptions_ShapeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::PowOptions *builtin_options_as_PowOptions() const { return builtin_options_type() == circle::BuiltinOptions_PowOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ArgMinOptions *builtin_options_as_ArgMinOptions() const { return builtin_options_type() == circle::BuiltinOptions_ArgMinOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const { return builtin_options_type() == circle::BuiltinOptions_FakeQuantOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::PackOptions *builtin_options_as_PackOptions() const { return builtin_options_type() == circle::BuiltinOptions_PackOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const { return builtin_options_type() == circle::BuiltinOptions_LogicalOrOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::OneHotOptions *builtin_options_as_OneHotOptions() const { return builtin_options_type() == circle::BuiltinOptions_OneHotOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const { return builtin_options_type() == circle::BuiltinOptions_LogicalAndOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const { return builtin_options_type() == circle::BuiltinOptions_LogicalNotOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::UnpackOptions *builtin_options_as_UnpackOptions() const { return builtin_options_type() == circle::BuiltinOptions_UnpackOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::FloorDivOptions *builtin_options_as_FloorDivOptions() const { return builtin_options_type() == circle::BuiltinOptions_FloorDivOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SquareOptions *builtin_options_as_SquareOptions() const { return builtin_options_type() == circle::BuiltinOptions_SquareOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { return builtin_options_type() == circle::BuiltinOptions_ZerosLikeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::FillOptions *builtin_options_as_FillOptions() const { return builtin_options_type() == circle::BuiltinOptions_FillOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::BidirectionalSequenceLSTMOptions * builtin_options_as_BidirectionalSequenceLSTMOptions() const { return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceLSTMOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::BidirectionalSequenceRNNOptions * builtin_options_as_BidirectionalSequenceRNNOptions() const { return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceRNNOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::UnidirectionalSequenceLSTMOptions * builtin_options_as_UnidirectionalSequenceLSTMOptions() const { return builtin_options_type() == circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::FloorModOptions *builtin_options_as_FloorModOptions() const { return builtin_options_type() == circle::BuiltinOptions_FloorModOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::RangeOptions *builtin_options_as_RangeOptions() const { return builtin_options_type() == circle::BuiltinOptions_RangeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ResizeNearestNeighborOptions * builtin_options_as_ResizeNearestNeighborOptions() const { return builtin_options_type() == circle::BuiltinOptions_ResizeNearestNeighborOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const { return builtin_options_type() == circle::BuiltinOptions_LeakyReluOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const { return builtin_options_type() == circle::BuiltinOptions_SquaredDifferenceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const { return builtin_options_type() == circle::BuiltinOptions_MirrorPadOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::AbsOptions *builtin_options_as_AbsOptions() const { return builtin_options_type() == circle::BuiltinOptions_AbsOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SplitVOptions *builtin_options_as_SplitVOptions() const { return builtin_options_type() == circle::BuiltinOptions_SplitVOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::UniqueOptions *builtin_options_as_UniqueOptions() const { return builtin_options_type() == circle::BuiltinOptions_UniqueOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ReverseV2Options *builtin_options_as_ReverseV2Options() const { return builtin_options_type() == circle::BuiltinOptions_ReverseV2Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::AddNOptions *builtin_options_as_AddNOptions() const { return builtin_options_type() == circle::BuiltinOptions_AddNOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::GatherNdOptions *builtin_options_as_GatherNdOptions() const { return builtin_options_type() == circle::BuiltinOptions_GatherNdOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::CosOptions *builtin_options_as_CosOptions() const { return builtin_options_type() == circle::BuiltinOptions_CosOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::WhereOptions *builtin_options_as_WhereOptions() const { return builtin_options_type() == circle::BuiltinOptions_WhereOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::RankOptions *builtin_options_as_RankOptions() const { return builtin_options_type() == circle::BuiltinOptions_RankOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const { return builtin_options_type() == circle::BuiltinOptions_ReverseSequenceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const { return builtin_options_type() == circle::BuiltinOptions_MatrixDiagOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::QuantizeOptions *builtin_options_as_QuantizeOptions() const { return builtin_options_type() == circle::BuiltinOptions_QuantizeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const { return builtin_options_type() == circle::BuiltinOptions_MatrixSetDiagOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::HardSwishOptions *builtin_options_as_HardSwishOptions() const { return builtin_options_type() == circle::BuiltinOptions_HardSwishOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::IfOptions *builtin_options_as_IfOptions() const { return builtin_options_type() == circle::BuiltinOptions_IfOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::WhileOptions *builtin_options_as_WhileOptions() const { return builtin_options_type() == circle::BuiltinOptions_WhileOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const { return builtin_options_type() == circle::BuiltinOptions_DepthToSpaceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const { return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV4Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const { return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV5Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const { return builtin_options_type() == circle::BuiltinOptions_ScatterNdOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SelectV2Options *builtin_options_as_SelectV2Options() const { return builtin_options_type() == circle::BuiltinOptions_SelectV2Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::DensifyOptions *builtin_options_as_DensifyOptions() const { return builtin_options_type() == circle::BuiltinOptions_DensifyOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const { return builtin_options_type() == circle::BuiltinOptions_SegmentSumOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { return builtin_options_type() == circle::BuiltinOptions_BatchMatMulOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const { return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::BCQFullyConnectedOptions *builtin_options_as_BCQFullyConnectedOptions() const { return builtin_options_type() == circle::BuiltinOptions_BCQFullyConnectedOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const circle::InstanceNormOptions *builtin_options_as_InstanceNormOptions() const { return builtin_options_type() == circle::BuiltinOptions_InstanceNormOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const flatbuffers::Vector *custom_options() const { @@ -9558,7 +9554,7 @@ struct OperatorBuilder static_cast(custom_options_format), 0); } void add_mutating_variable_inputs( - flatbuffers::Offset> mutating_variable_inputs) + flatbuffers::Offset> mutating_variable_inputs) { fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs); } @@ -9580,15 +9576,15 @@ struct OperatorBuilder }; inline flatbuffers::Offset CreateOperator( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, - flatbuffers::Offset> inputs = 0, - flatbuffers::Offset> outputs = 0, - circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE, - flatbuffers::Offset builtin_options = 0, - flatbuffers::Offset> custom_options = 0, - circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS, - flatbuffers::Offset> mutating_variable_inputs = 0, - flatbuffers::Offset> intermediates = 0) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + flatbuffers::Offset> inputs = 0, + flatbuffers::Offset> outputs = 0, + circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE, + flatbuffers::Offset builtin_options = 0, + flatbuffers::Offset> custom_options = 0, + circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS, + flatbuffers::Offset> mutating_variable_inputs = 0, + flatbuffers::Offset> intermediates = 0) { OperatorBuilder builder_(_fbb); builder_.add_intermediates(intermediates); @@ -9604,20 +9600,20 @@ inline flatbuffers::Offset CreateOperator( } inline flatbuffers::Offset CreateOperatorDirect( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, - const std::vector *inputs = nullptr, const std::vector *outputs = nullptr, - circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE, - flatbuffers::Offset builtin_options = 0, - const std::vector *custom_options = nullptr, - circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS, - const std::vector *mutating_variable_inputs = nullptr, - const std::vector *intermediates = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + const std::vector *inputs = nullptr, const std::vector *outputs = nullptr, + circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE, + flatbuffers::Offset builtin_options = 0, + const std::vector *custom_options = nullptr, + circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS, + const std::vector *mutating_variable_inputs = nullptr, + const std::vector *intermediates = nullptr) { auto inputs__ = inputs ? _fbb.CreateVector(*inputs) : 0; auto outputs__ = outputs ? _fbb.CreateVector(*outputs) : 0; auto custom_options__ = custom_options ? _fbb.CreateVector(*custom_options) : 0; auto mutating_variable_inputs__ = - mutating_variable_inputs ? _fbb.CreateVector(*mutating_variable_inputs) : 0; + mutating_variable_inputs ? _fbb.CreateVector(*mutating_variable_inputs) : 0; auto intermediates__ = intermediates ? _fbb.CreateVector(*intermediates) : 0; return circle::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type, builtin_options, custom_options__, custom_options_format, @@ -9651,7 +9647,7 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector> *operators() const { return GetPointer> *>( - VT_OPERATORS); + VT_OPERATORS); } const flatbuffers::String *name() const { @@ -9693,7 +9689,7 @@ struct SubGraphBuilder fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); } void add_operators( - flatbuffers::Offset>> operators) + flatbuffers::Offset>> operators) { fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); } @@ -9719,13 +9715,13 @@ struct SubGraphBuilder }; inline flatbuffers::Offset CreateSubGraph( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> tensors = 0, - flatbuffers::Offset> inputs = 0, - flatbuffers::Offset> outputs = 0, - flatbuffers::Offset>> operators = 0, - flatbuffers::Offset name = 0, - circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST) + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset>> tensors = 0, + flatbuffers::Offset> inputs = 0, + flatbuffers::Offset> outputs = 0, + flatbuffers::Offset>> operators = 0, + flatbuffers::Offset name = 0, + circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST) { SubGraphBuilder builder_(_fbb); builder_.add_name(name); @@ -9738,17 +9734,17 @@ inline flatbuffers::Offset CreateSubGraph( } inline flatbuffers::Offset CreateSubGraphDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *tensors = nullptr, - const std::vector *inputs = nullptr, const std::vector *outputs = nullptr, - const std::vector> *operators = nullptr, - const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST) + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector> *tensors = nullptr, + const std::vector *inputs = nullptr, const std::vector *outputs = nullptr, + const std::vector> *operators = nullptr, + const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST) { auto tensors__ = tensors ? _fbb.CreateVector>(*tensors) : 0; auto inputs__ = inputs ? _fbb.CreateVector(*inputs) : 0; auto outputs__ = outputs ? _fbb.CreateVector(*outputs) : 0; auto operators__ = - operators ? _fbb.CreateVector>(*operators) : 0; + operators ? _fbb.CreateVector>(*operators) : 0; auto name__ = name ? _fbb.CreateString(name) : 0; return circle::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__, data_format); @@ -9893,12 +9889,12 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector> *operator_codes() const { return GetPointer> *>( - VT_OPERATOR_CODES); + VT_OPERATOR_CODES); } const flatbuffers::Vector> *subgraphs() const { return GetPointer> *>( - VT_SUBGRAPHS); + VT_SUBGRAPHS); } const flatbuffers::String *description() const { @@ -9915,7 +9911,7 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector> *metadata() const { return GetPointer> *>( - VT_METADATA); + VT_METADATA); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -9939,13 +9935,13 @@ struct ModelBuilder flatbuffers::uoffset_t start_; void add_version(uint32_t version) { fbb_.AddElement(Model::VT_VERSION, version, 0); } void add_operator_codes( - flatbuffers::Offset>> - operator_codes) + flatbuffers::Offset>> + operator_codes) { fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); } void add_subgraphs( - flatbuffers::Offset>> subgraphs) + flatbuffers::Offset>> subgraphs) { fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); } @@ -9963,7 +9959,7 @@ struct ModelBuilder fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer); } void add_metadata( - flatbuffers::Offset>> metadata) + flatbuffers::Offset>> metadata) { fbb_.AddOffset(Model::VT_METADATA, metadata); } @@ -9981,14 +9977,14 @@ struct ModelBuilder }; inline flatbuffers::Offset CreateModel( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - flatbuffers::Offset>> - operator_codes = 0, - flatbuffers::Offset>> subgraphs = 0, - flatbuffers::Offset description = 0, - flatbuffers::Offset>> buffers = 0, - flatbuffers::Offset> metadata_buffer = 0, - flatbuffers::Offset>> metadata = 0) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + flatbuffers::Offset>> + operator_codes = 0, + flatbuffers::Offset>> subgraphs = 0, + flatbuffers::Offset description = 0, + flatbuffers::Offset>> buffers = 0, + flatbuffers::Offset> metadata_buffer = 0, + flatbuffers::Offset>> metadata = 0) { ModelBuilder builder_(_fbb); builder_.add_metadata(metadata); @@ -10002,24 +9998,24 @@ inline flatbuffers::Offset CreateModel( } inline flatbuffers::Offset CreateModelDirect( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - const std::vector> *operator_codes = nullptr, - const std::vector> *subgraphs = nullptr, - const char *description = nullptr, - const std::vector> *buffers = nullptr, - const std::vector *metadata_buffer = nullptr, - const std::vector> *metadata = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + const std::vector> *operator_codes = nullptr, + const std::vector> *subgraphs = nullptr, + const char *description = nullptr, + const std::vector> *buffers = nullptr, + const std::vector *metadata_buffer = nullptr, + const std::vector> *metadata = nullptr) { auto operator_codes__ = - operator_codes ? _fbb.CreateVector>(*operator_codes) - : 0; + operator_codes ? _fbb.CreateVector>(*operator_codes) + : 0; auto subgraphs__ = - subgraphs ? _fbb.CreateVector>(*subgraphs) : 0; + subgraphs ? _fbb.CreateVector>(*subgraphs) : 0; auto description__ = description ? _fbb.CreateString(description) : 0; auto buffers__ = buffers ? _fbb.CreateVector>(*buffers) : 0; auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector(*metadata_buffer) : 0; auto metadata__ = - metadata ? _fbb.CreateVector>(*metadata) : 0; + metadata ? _fbb.CreateVector>(*metadata) : 0; return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__, metadata_buffer__, metadata__); } diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc index 81cd38f..63036a3 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc @@ -20,7 +20,9 @@ // TODO Support multiple subgraphs ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept - : _subgraphs{model->getSubGraphs()}, _compiler{new onert::compiler::Compiler{_subgraphs}} + : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique( + _subgraphs.get())}, + _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}} { if (model->allowedToFp16()) { diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h index 5f0650b..bd61f9d 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h @@ -23,6 +23,7 @@ #include "ir/Graph.h" #include "ir/Subgraphs.h" #include "exec/IExecutor.h" +#include "util/TracingCtx.h" struct ANeuralNetworksCompilation { @@ -40,6 +41,14 @@ public: private: std::shared_ptr _subgraphs; + // TODO Refine the ownership of TracingCtx + // In case of nnfw API, nnfw_session has ownership of TracingCtx. + // In case of nnapi, there is no concept of session and primary model might have the ownership + // of TracingCtx. + // Since we don't support multiple models yet with nnapi in ONE, let's implement this later + // and let's make it work with one model for now. + std::unique_ptr _tracing_ctx; + std::shared_ptr _compiler; std::shared_ptr _executors; }; diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc index 2bea729..b0ea519 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc @@ -20,7 +20,7 @@ #include "util/logging.h" ANeuralNetworksEvent::ANeuralNetworksEvent(const std::shared_ptr &execution) - : _execution{execution} + : _execution{execution} { // DO NOTHING } diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc index 6114b74..21c7cdd 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc @@ -140,8 +140,8 @@ bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOpe const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo(); const auto shape = (type != nullptr) - ? NNAPIConvert::getShape(type) - : _execution->primary_subgraph().operands().at(operand_index).shape(); + ? NNAPIConvert::getShape(type) + : _execution->primary_subgraph().operands().at(operand_index).shape(); // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other // words, we can assume that io_layout from nnapi always is the same as layout of the used @@ -173,8 +173,8 @@ bool ANeuralNetworksExecution::setOptionalInput(uint32_t index, const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo(); const auto shape = (type != nullptr) - ? NNAPIConvert::getShape(type) - : _execution->primary_subgraph().operands().at(operand_index).shape(); + ? NNAPIConvert::getShape(type) + : _execution->primary_subgraph().operands().at(operand_index).shape(); // ANeuralNetworksExecution::setInput() uses only shape information ANeuralNetworksOperandType optional_input_type; @@ -208,8 +208,8 @@ bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOp const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo(); const auto shape = (type != nullptr) - ? NNAPIConvert::getShape(type) - : _execution->primary_subgraph().operands().at(operand_index).shape(); + ? NNAPIConvert::getShape(type) + : _execution->primary_subgraph().operands().at(operand_index).shape(); // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other // words, we can assume that io_layout from nnapi always is the same as layout of the used diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h index 1f4b868..70c5d2a 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h @@ -27,7 +27,7 @@ struct ANeuralNetworksExecution { public: ANeuralNetworksExecution(const std::shared_ptr &executors) - : _execution{std::make_shared(executors)} + : _execution{std::make_shared(executors)} { // DO NOTHING } diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc index 97b820a..3e2bea1 100644 --- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc +++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc @@ -27,7 +27,7 @@ // ANeuralNetworksModel // ANeuralNetworksModel::ANeuralNetworksModel() noexcept - : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false} + : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false} { _graph = std::make_shared(); } @@ -72,12 +72,12 @@ bool ANeuralNetworksModel::setOperandValue(uint32_t index, const void *buffer, s if (copy) { _graph->operands().at(ind).data( - std::make_unique(reinterpret_cast(buffer), length)); + std::make_unique(reinterpret_cast(buffer), length)); } else { _graph->operands().at(ind).data( - std::make_unique(reinterpret_cast(buffer), length)); + std::make_unique(reinterpret_cast(buffer), length)); } } catch (const std::exception &e) @@ -111,9 +111,9 @@ bool ANeuralNetworksModel::addOperation(ANeuralNetworksOperationType type, uint3 if (type == ANEURALNETWORKS_FULLY_CONNECTED) { const auto &input_operand = - _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT)); + _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT)); auto &weights_operand = - _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT)); + _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT)); if (input_operand.typeInfo().type() == onert::ir::DataType::FLOAT32 && weights_operand.typeInfo().type() == onert::ir::DataType::QUANT_UINT8_ASYMM) { diff --git a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc index 63d4e3c..94b8f02 100644 --- a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc +++ b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc @@ -39,6 +39,13 @@ DataType NNAPIConvert::getDataType(OperandCode type) case ANEURALNETWORKS_BOOL: case ANEURALNETWORKS_TENSOR_BOOL8: return DataType::BOOL8; + case ANEURALNETWORKS_TENSOR_FLOAT16: + case ANEURALNETWORKS_FLOAT16: + return DataType::FLOAT16; + case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL: + return DataType::QUANT_INT8_SYMM_PER_CHANNEL; + case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED: + return DataType::QUANT_INT8_ASYMM; default: throw std::runtime_error("Unsupported type"); } diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc index a84ce1b..9ecb7d1 100644 --- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc +++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc @@ -107,7 +107,7 @@ getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivat } OperationFactory::Generator getElementwiseBinaryGenerator( - const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) + const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) { return [op_type](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 2); @@ -182,7 +182,7 @@ getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::Arith param.arithmetic_type = op_type; const auto activation_index = OperandIndex{init_param.inputs[2]}; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); return new operation::BinaryArithmetic{inputs, outputs, param}; }; @@ -221,12 +221,12 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type) const auto activation_index = OperandIndex{init_param.inputs[6]}; param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar()); + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar()); param.stride = makeStride(operands, hstride_index, vstride_index); param.kw = getUint32Scalar(operands, kw_index); param.kh = operands.at(kh_index).asScalar(); param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); } else // support explicit padding { @@ -259,7 +259,7 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type) param.kw = getUint32Scalar(operands, kw_index); param.kh = getUint32Scalar(operands, kh_index); param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); } return new operation::Pool2D{inputs, outputs, param}; }; @@ -382,11 +382,11 @@ OperationFactory::OperationFactory() const auto activation_index = OperandIndex{init_param.inputs[7]}; param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar()); + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar()); param.stride = makeStride(operands, hstride_index, vstride_index); param.multiplier = getUint32Scalar(operands, multiplier_index); param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); } else { @@ -417,7 +417,7 @@ OperationFactory::OperationFactory() param.stride = makeStride(operands, hstride_index, vstride_index); param.multiplier = getUint32Scalar(operands, multiplier_index); param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); } // TODO set dilation @@ -490,7 +490,7 @@ OperationFactory::OperationFactory() operation::FullyConnected::Param param; const auto activation_index = OperandIndex{init_param.inputs[3]}; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); param.weights_format = FullyConnectedWeightsFormat::Default; return new operation::FullyConnected{inputs, outputs, param}; @@ -517,7 +517,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_CAST] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST); // ANEURALNETWORKS_CAST_EX is deprecated // TODO Remove ANEURALNETWORKS_CAST_EX @@ -557,14 +557,14 @@ OperationFactory::OperationFactory() const auto activation_index = OperandIndex{init_param.inputs[6]}; param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar()); + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar()); param.stride = makeStride(operands, hstride_index, vstride_index); param.dilation.width_factor = 1; param.dilation.height_factor = 1; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); } else if (init_param.input_count == 10) // support explicit padding { @@ -595,7 +595,7 @@ OperationFactory::OperationFactory() param.dilation.height_factor = 1; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); } else if (init_param.input_count == 13) // support dilation { @@ -633,7 +633,7 @@ OperationFactory::OperationFactory() param.dilation.height_factor = height_factor; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); } else { @@ -644,19 +644,19 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_ADD] = - getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD); + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD); _map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD]; _map[ANEURALNETWORKS_REDUCE_SUM] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM); // ANEURALNETWORKS_REDUCE_SUM_EX is deprecated // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM]; _map[ANEURALNETWORKS_SUB] = - getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB); + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB); _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -708,7 +708,7 @@ OperationFactory::OperationFactory() param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar(); param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar(); param.shrink_axis_mask = - operands.at(OperandIndex{init_param.inputs[6]}).asScalar(); + operands.at(OperandIndex{init_param.inputs[6]}).asScalar(); return new operation::StridedSlice{inputs, outputs, param}; }; @@ -716,7 +716,7 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_TRANSPOSE] = createSimpleBinaryOp; _map[ANEURALNETWORKS_MUL] = - getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL); + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL); _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -758,15 +758,15 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator( - onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f); + onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f); _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG); - _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator( - onert::ir::operation::ElementwiseActivation::Type::LOGISTIC); + _map[ANEURALNETWORKS_LOGISTIC] = + getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::LOGISTIC); _map[ANEURALNETWORKS_DIV] = - getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV); + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV); _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP); @@ -780,16 +780,16 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp; _map[ANEURALNETWORKS_GREATER] = - getComparisonGenerator(operation::Comparison::ComparisonType::Greater); + getComparisonGenerator(operation::Comparison::ComparisonType::Greater); _map[ANEURALNETWORKS_GREATER_EQUAL] = - getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual); + getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual); _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less); _map[ANEURALNETWORKS_LESS_EQUAL] = - getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual); + getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual); _map[ANEURALNETWORKS_NOT_EQUAL] = - getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual); + getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual); _map[ANEURALNETWORKS_EQUAL] = - getComparisonGenerator(operation::Comparison::ComparisonType::Equal); + getComparisonGenerator(operation::Comparison::ComparisonType::Equal); // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX @@ -838,13 +838,13 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_REDUCE_ALL] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL); _map[ANEURALNETWORKS_REDUCE_ANY] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY); _map[ANEURALNETWORKS_REDUCE_MAX] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX); // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX @@ -873,8 +873,8 @@ OperationFactory::OperationFactory() return new operation::Comparison{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator( - operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND); + _map[ANEURALNETWORKS_LOGICAL_AND] = + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND); // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX @@ -902,7 +902,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_RSQRT] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT); _map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -939,8 +939,8 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT]; _map[ANEURALNETWORKS_RELU] = - getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU, - onert::ir::operation::ElementwiseActivation::infinity, 0); + getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU, + onert::ir::operation::ElementwiseActivation::infinity, 0); _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -986,10 +986,10 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator( - onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f); + onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f); _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator( - onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f); + onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f); _map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 2 && init_param.output_count == 1); @@ -1031,13 +1031,13 @@ OperationFactory::OperationFactory() operation::RNN::Param param; const auto activation_index = OperandIndex{init_param.inputs[5]}; param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar()); return new operation::RNN{inputs, outputs, param}; }; _map[ANEURALNETWORKS_FLOOR] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR); _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param, Operands &) { @@ -1169,21 +1169,21 @@ OperationFactory::OperationFactory() const auto vstride_index = OperandIndex{init_param.inputs[5]}; param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar()); + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar()); param.stride = makeStride(operands, hstride_index, vstride_index); return new operation::TransposeConv{inputs, outputs, param}; }; _map[ANEURALNETWORKS_SQRT] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT); // ANEURALNETWORKS_SQRT_EX is deprecated // TODO Remove ANEURALNETWORKS_SQRT_EX _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT]; - _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator( - operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR); + _map[ANEURALNETWORKS_LOGICAL_OR] = + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR); // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX @@ -1211,7 +1211,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_LOGICAL_NOT] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT); // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX @@ -1370,9 +1370,9 @@ OperationFactory::OperationFactory() // 2 -> Cell State Out Tensor Index const OperandIndex scratch_buffer_index; OperandIndex output_state_index = - init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex(); + init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex(); OperandIndex cell_state_index = - init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex(); + init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex(); const OperandIndex output_index = OperandIndex{init_param.outputs[0]}; OperandIndexSequence outputs{scratch_buffer_index, output_state_index, cell_state_index, output_index}; @@ -1519,19 +1519,39 @@ OperationFactory::OperationFactory() // 1 -> Axis Tensor Index OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - operation::ArgMax::Param param; + operation::ArgMinMax::Param param; // NNAPI ARGMAX output type is always int32 param.output_type = DataType::INT32; + param.is_arg_max = true; - return new operation::ArgMax{inputs, outputs, param}; + return new operation::ArgMinMax{inputs, outputs, param}; }; // ANEURALNETWORKS_ARGMAX_EX is deprecated // TODO Remove ANEURALNETWORKS_ARGMAX_EX _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX]; + _map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 2 && init_param.output_count == 1); + + OperandIndexSequence outputs{init_param.outputs[0]}; + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + // 1 -> Axis Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + + operation::ArgMinMax::Param param; + // NNAPI ARGMIN output type is always int32 + param.output_type = DataType::INT32; + param.is_arg_max = false; + + return new operation::ArgMinMax{inputs, outputs, param}; + }; + _map[ANEURALNETWORKS_DEQUANTIZE] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE); _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -1608,7 +1628,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_REDUCE_MIN] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN); // ANEURALNETWORKS_REDUCE_MIN_EX is deprecated // TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX @@ -1689,10 +1709,10 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD]; _map[ANEURALNETWORKS_MINIMUM] = - getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN); + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN); _map[ANEURALNETWORKS_MAXIMUM] = - getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX); + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX); _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -1719,7 +1739,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_COS_EX] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS); _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN); @@ -1733,10 +1753,10 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_REDUCE_PROD] = - getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD); + getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD); _map[ANEURALNETWORKS_ROUND_EX] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND); _map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -1764,7 +1784,7 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp; _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE); // Each input should be interpreted as follows: // 0 -> Input Tensor Index // 1 -> Multiple Tensor Index @@ -1904,7 +1924,7 @@ OperationFactory::OperationFactory() }; _map[ANEURALNETWORKS_QUANTIZE] = - getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE); + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE); } Operation *OperationFactory::create(ANeuralNetworksOperationType type, diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h index 367cf74..74e1874 100644 --- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h +++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h @@ -40,7 +40,7 @@ public: public: using Generator = - std::function; + std::function; public: static OperationFactory &get(); diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h index c6e9147..8e1b84e 100644 --- a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h +++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h @@ -1710,9 +1710,8 @@ enum ActivationFunctionType inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] { static const ActivationFunctionType values[] = { - ActivationFunctionType_NONE, ActivationFunctionType_RELU, - ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6, - ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; + ActivationFunctionType_NONE, ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1, + ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; return values; } @@ -1768,8 +1767,8 @@ enum FullyConnectedOptionsWeightsFormat inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] { static const FullyConnectedOptionsWeightsFormat values[] = { - FullyConnectedOptionsWeightsFormat_DEFAULT, - FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8}; + FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8}; return values; } @@ -1981,8 +1980,8 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab const CustomQuantization *details_as_CustomQuantization() const { return details_type() == QuantizationDetails_CustomQuantization - ? static_cast(details()) - : nullptr; + ? static_cast(details()) + : nullptr; } int32_t quantized_dimension() const { return GetField(VT_QUANTIZED_DIMENSION, 0); } bool Verify(flatbuffers::Verifier &verifier) const @@ -2072,17 +2071,17 @@ CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, } inline flatbuffers::Offset CreateQuantizationParametersDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector *min = nullptr, - const std::vector *max = nullptr, const std::vector *scale = nullptr, - const std::vector *zero_point = nullptr, - QuantizationDetails details_type = QuantizationDetails_NONE, - flatbuffers::Offset details = 0, int32_t quantized_dimension = 0) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector *min = nullptr, + const std::vector *max = nullptr, const std::vector *scale = nullptr, + const std::vector *zero_point = nullptr, + QuantizationDetails details_type = QuantizationDetails_NONE, + flatbuffers::Offset details = 0, int32_t quantized_dimension = 0) { return onert_tflite::CreateQuantizationParameters( - _fbb, min ? _fbb.CreateVector(*min) : 0, max ? _fbb.CreateVector(*max) : 0, - scale ? _fbb.CreateVector(*scale) : 0, - zero_point ? _fbb.CreateVector(*zero_point) : 0, details_type, details, - quantized_dimension); + _fbb, min ? _fbb.CreateVector(*min) : 0, max ? _fbb.CreateVector(*max) : 0, + scale ? _fbb.CreateVector(*scale) : 0, + zero_point ? _fbb.CreateVector(*zero_point) : 0, details_type, details, + quantized_dimension); } struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -2272,20 +2271,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const Int32Vector *array_segments_as_Int32Vector() const { return array_segments_type() == SparseIndexVector_Int32Vector - ? static_cast(array_segments()) - : nullptr; + ? static_cast(array_segments()) + : nullptr; } const Uint16Vector *array_segments_as_Uint16Vector() const { return array_segments_type() == SparseIndexVector_Uint16Vector - ? static_cast(array_segments()) - : nullptr; + ? static_cast(array_segments()) + : nullptr; } const Uint8Vector *array_segments_as_Uint8Vector() const { return array_segments_type() == SparseIndexVector_Uint8Vector - ? static_cast(array_segments()) - : nullptr; + ? static_cast(array_segments()) + : nullptr; } SparseIndexVector array_indices_type() const { @@ -2296,20 +2295,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const Int32Vector *array_indices_as_Int32Vector() const { return array_indices_type() == SparseIndexVector_Int32Vector - ? static_cast(array_indices()) - : nullptr; + ? static_cast(array_indices()) + : nullptr; } const Uint16Vector *array_indices_as_Uint16Vector() const { return array_indices_type() == SparseIndexVector_Uint16Vector - ? static_cast(array_indices()) - : nullptr; + ? static_cast(array_indices()) + : nullptr; } const Uint8Vector *array_indices_as_Uint8Vector() const { return array_indices_type() == SparseIndexVector_Uint8Vector - ? static_cast(array_indices()) - : nullptr; + ? static_cast(array_indices()) + : nullptr; } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2435,7 +2434,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector> *dim_metadata() const { return GetPointer> *>( - VT_DIM_METADATA); + VT_DIM_METADATA); } bool Verify(flatbuffers::Verifier &verifier) const { @@ -2460,7 +2459,7 @@ struct SparsityParametersBuilder fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map); } void add_dim_metadata( - flatbuffers::Offset>> dim_metadata) + flatbuffers::Offset>> dim_metadata) { fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata); } @@ -2478,11 +2477,10 @@ struct SparsityParametersBuilder }; inline flatbuffers::Offset CreateSparsityParameters( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset> traversal_order = 0, - flatbuffers::Offset> block_map = 0, - flatbuffers::Offset>> dim_metadata = - 0) + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> traversal_order = 0, + flatbuffers::Offset> block_map = 0, + flatbuffers::Offset>> dim_metadata = 0) { SparsityParametersBuilder builder_(_fbb); builder_.add_dim_metadata(dim_metadata); @@ -2492,14 +2490,14 @@ inline flatbuffers::Offset CreateSparsityParameters( } inline flatbuffers::Offset CreateSparsityParametersDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector *traversal_order = nullptr, - const std::vector *block_map = nullptr, - const std::vector> *dim_metadata = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector *traversal_order = nullptr, + const std::vector *block_map = nullptr, + const std::vector> *dim_metadata = nullptr) { return onert_tflite::CreateSparsityParameters( - _fbb, traversal_order ? _fbb.CreateVector(*traversal_order) : 0, - block_map ? _fbb.CreateVector(*block_map) : 0, - dim_metadata ? _fbb.CreateVector>(*dim_metadata) : 0); + _fbb, traversal_order ? _fbb.CreateVector(*traversal_order) : 0, + block_map ? _fbb.CreateVector(*block_map) : 0, + dim_metadata ? _fbb.CreateVector>(*dim_metadata) : 0); } struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -2619,16 +2617,16 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, } inline flatbuffers::Offset CreateTensorDirect( - flatbuffers::FlatBufferBuilder &_fbb, const std::vector *shape = nullptr, - TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr, - flatbuffers::Offset quantization = 0, bool is_variable = false, - flatbuffers::Offset sparsity = 0, - const std::vector *shape_signature = nullptr) + flatbuffers::FlatBufferBuilder &_fbb, const std::vector *shape = nullptr, + TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr, + flatbuffers::Offset quantization = 0, bool is_variable = false, + flatbuffers::Offset sparsity = 0, + const std::vector *shape_signature = nullptr) { return onert_tflite::CreateTensor( - _fbb, shape ? _fbb.CreateVector(*shape) : 0, type, buffer, - name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity, - shape_signature ? _fbb.CreateVector(*shape_signature) : 0); + _fbb, shape ? _fbb.CreateVector(*shape) : 0, type, buffer, + name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity, + shape_signature ? _fbb.CreateVector(*shape_signature) : 0); } struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -2890,10 +2888,10 @@ struct DepthwiseConv2DOptionsBuilder }; inline flatbuffers::Offset CreateDepthwiseConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0, - int32_t stride_h = 0, int32_t depth_multiplier = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) + flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0, + int32_t stride_h = 0, int32_t depth_multiplier = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1) { DepthwiseConv2DOptionsBuilder builder_(_fbb); builder_.add_dilation_h_factor(dilation_h_factor); @@ -2942,12 +2940,12 @@ struct ConcatEmbeddingsOptionsBuilder fbb_.AddElement(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); } void add_num_columns_per_channel( - flatbuffers::Offset> num_columns_per_channel) + flatbuffers::Offset> num_columns_per_channel) { fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); } void add_embedding_dim_per_channel( - flatbuffers::Offset> embedding_dim_per_channel) + flatbuffers::Offset> embedding_dim_per_channel) { fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel); @@ -2966,9 +2964,9 @@ struct ConcatEmbeddingsOptionsBuilder }; inline flatbuffers::Offset CreateConcatEmbeddingsOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, - flatbuffers::Offset> num_columns_per_channel = 0, - flatbuffers::Offset> embedding_dim_per_channel = 0) + flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, + flatbuffers::Offset> num_columns_per_channel = 0, + flatbuffers::Offset> embedding_dim_per_channel = 0) { ConcatEmbeddingsOptionsBuilder builder_(_fbb); builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); @@ -2983,9 +2981,9 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_ const std::vector *embedding_dim_per_channel = nullptr) { return onert_tflite::CreateConcatEmbeddingsOptions( - _fbb, num_channels, - num_columns_per_channel ? _fbb.CreateVector(*num_columns_per_channel) : 0, - embedding_dim_per_channel ? _fbb.CreateVector(*embedding_dim_per_channel) : 0); + _fbb, num_channels, + num_columns_per_channel ? _fbb.CreateVector(*num_columns_per_channel) : 0, + embedding_dim_per_channel ? _fbb.CreateVector(*embedding_dim_per_channel) : 0); } struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -3219,9 +3217,9 @@ struct SequenceRNNOptionsBuilder }; inline flatbuffers::Offset CreateSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { SequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3296,9 +3294,9 @@ struct BidirectionalSequenceRNNOptionsBuilder }; inline flatbuffers::Offset CreateBidirectionalSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - bool merge_outputs = false, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + bool merge_outputs = false, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3378,10 +3376,10 @@ struct FullyConnectedOptionsBuilder }; inline flatbuffers::Offset CreateFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT, - bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT, + bool keep_num_dims = false, bool asymmetric_quantize_inputs = false) { FullyConnectedOptionsBuilder builder_(_fbb); builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); @@ -3474,8 +3472,8 @@ struct ConcatenationOptionsBuilder }; inline flatbuffers::Offset CreateConcatenationOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) + flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { ConcatenationOptionsBuilder builder_(_fbb); builder_.add_axis(axis); @@ -3669,7 +3667,7 @@ struct LocalResponseNormalizationOptionsBuilder fbb_.AddElement(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); } explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -3845,7 +3843,7 @@ struct UnidirectionalSequenceLSTMOptionsBuilder static_cast(asymmetric_quantize_inputs), 0); } explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -3861,10 +3859,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder inline flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, - bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false, + bool asymmetric_quantize_inputs = false) { UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -3943,7 +3941,7 @@ struct BidirectionalSequenceLSTMOptionsBuilder static_cast(asymmetric_quantize_inputs), 0); } explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } @@ -3958,10 +3956,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder }; inline flatbuffers::Offset CreateBidirectionalSequenceLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, - bool time_major = true, bool asymmetric_quantize_inputs = false) + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false, + bool time_major = true, bool asymmetric_quantize_inputs = false) { BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); @@ -4844,7 +4842,7 @@ CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const std::vector *squeeze_dims = nullptr) { return onert_tflite::CreateSqueezeOptions( - _fbb, squeeze_dims ? _fbb.CreateVector(*squeeze_dims) : 0); + _fbb, squeeze_dims ? _fbb.CreateVector(*squeeze_dims) : 0); } struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -7206,7 +7204,7 @@ CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *custom_code = nullptr, int32_t version = 1) { return onert_tflite::CreateOperatorCode( - _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version); + _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version); } struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -7241,611 +7239,611 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const Conv2DOptions *builtin_options_as_Conv2DOptions() const { return builtin_options_type() == BuiltinOptions_Conv2DOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { return builtin_options_type() == BuiltinOptions_LSHProjectionOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const Pool2DOptions *builtin_options_as_Pool2DOptions() const { return builtin_options_type() == BuiltinOptions_Pool2DOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SVDFOptions *builtin_options_as_SVDFOptions() const { return builtin_options_type() == BuiltinOptions_SVDFOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const RNNOptions *builtin_options_as_RNNOptions() const { return builtin_options_type() == BuiltinOptions_RNNOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { return builtin_options_type() == BuiltinOptions_FullyConnectedOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { return builtin_options_type() == BuiltinOptions_SoftmaxOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { return builtin_options_type() == BuiltinOptions_ConcatenationOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const AddOptions *builtin_options_as_AddOptions() const { return builtin_options_type() == BuiltinOptions_AddOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const L2NormOptions *builtin_options_as_L2NormOptions() const { return builtin_options_type() == BuiltinOptions_L2NormOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LocalResponseNormalizationOptions * builtin_options_as_LocalResponseNormalizationOptions() const { return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LSTMOptions *builtin_options_as_LSTMOptions() const { return builtin_options_type() == BuiltinOptions_LSTMOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const CallOptions *builtin_options_as_CallOptions() const { return builtin_options_type() == BuiltinOptions_CallOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ReshapeOptions *builtin_options_as_ReshapeOptions() const { return builtin_options_type() == BuiltinOptions_ReshapeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SkipGramOptions *builtin_options_as_SkipGramOptions() const { return builtin_options_type() == BuiltinOptions_SkipGramOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const { return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const MulOptions *builtin_options_as_MulOptions() const { return builtin_options_type() == BuiltinOptions_MulOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const PadOptions *builtin_options_as_PadOptions() const { return builtin_options_type() == BuiltinOptions_PadOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const GatherOptions *builtin_options_as_GatherOptions() const { return builtin_options_type() == BuiltinOptions_GatherOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const { return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const { return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const TransposeOptions *builtin_options_as_TransposeOptions() const { return builtin_options_type() == BuiltinOptions_TransposeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ReducerOptions *builtin_options_as_ReducerOptions() const { return builtin_options_type() == BuiltinOptions_ReducerOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SubOptions *builtin_options_as_SubOptions() const { return builtin_options_type() == BuiltinOptions_SubOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const DivOptions *builtin_options_as_DivOptions() const { return builtin_options_type() == BuiltinOptions_DivOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SqueezeOptions *builtin_options_as_SqueezeOptions() const { return builtin_options_type() == BuiltinOptions_SqueezeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const { return builtin_options_type() == BuiltinOptions_SequenceRNNOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const { return builtin_options_type() == BuiltinOptions_StridedSliceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ExpOptions *builtin_options_as_ExpOptions() const { return builtin_options_type() == BuiltinOptions_ExpOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const TopKV2Options *builtin_options_as_TopKV2Options() const { return builtin_options_type() == BuiltinOptions_TopKV2Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SplitOptions *builtin_options_as_SplitOptions() const { return builtin_options_type() == BuiltinOptions_SplitOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const { return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const CastOptions *builtin_options_as_CastOptions() const { return builtin_options_type() == BuiltinOptions_CastOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const DequantizeOptions *builtin_options_as_DequantizeOptions() const { return builtin_options_type() == BuiltinOptions_DequantizeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const { return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const { return builtin_options_type() == BuiltinOptions_ArgMaxOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LessOptions *builtin_options_as_LessOptions() const { return builtin_options_type() == BuiltinOptions_LessOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const NegOptions *builtin_options_as_NegOptions() const { return builtin_options_type() == BuiltinOptions_NegOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const PadV2Options *builtin_options_as_PadV2Options() const { return builtin_options_type() == BuiltinOptions_PadV2Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const GreaterOptions *builtin_options_as_GreaterOptions() const { return builtin_options_type() == BuiltinOptions_GreaterOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const { return builtin_options_type() == BuiltinOptions_GreaterEqualOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LessEqualOptions *builtin_options_as_LessEqualOptions() const { return builtin_options_type() == BuiltinOptions_LessEqualOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SelectOptions *builtin_options_as_SelectOptions() const { return builtin_options_type() == BuiltinOptions_SelectOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SliceOptions *builtin_options_as_SliceOptions() const { return builtin_options_type() == BuiltinOptions_SliceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const { return builtin_options_type() == BuiltinOptions_TransposeConvOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const { return builtin_options_type() == BuiltinOptions_SparseToDenseOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const TileOptions *builtin_options_as_TileOptions() const { return builtin_options_type() == BuiltinOptions_TileOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const { return builtin_options_type() == BuiltinOptions_ExpandDimsOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const EqualOptions *builtin_options_as_EqualOptions() const { return builtin_options_type() == BuiltinOptions_EqualOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const NotEqualOptions *builtin_options_as_NotEqualOptions() const { return builtin_options_type() == BuiltinOptions_NotEqualOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ShapeOptions *builtin_options_as_ShapeOptions() const { return builtin_options_type() == BuiltinOptions_ShapeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const PowOptions *builtin_options_as_PowOptions() const { return builtin_options_type() == BuiltinOptions_PowOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ArgMinOptions *builtin_options_as_ArgMinOptions() const { return builtin_options_type() == BuiltinOptions_ArgMinOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const { return builtin_options_type() == BuiltinOptions_FakeQuantOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const PackOptions *builtin_options_as_PackOptions() const { return builtin_options_type() == BuiltinOptions_PackOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const { return builtin_options_type() == BuiltinOptions_LogicalOrOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const OneHotOptions *builtin_options_as_OneHotOptions() const { return builtin_options_type() == BuiltinOptions_OneHotOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const { return builtin_options_type() == BuiltinOptions_LogicalAndOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const { return builtin_options_type() == BuiltinOptions_LogicalNotOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const UnpackOptions *builtin_options_as_UnpackOptions() const { return builtin_options_type() == BuiltinOptions_UnpackOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const FloorDivOptions *builtin_options_as_FloorDivOptions() const { return builtin_options_type() == BuiltinOptions_FloorDivOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SquareOptions *builtin_options_as_SquareOptions() const { return builtin_options_type() == BuiltinOptions_SquareOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { return builtin_options_type() == BuiltinOptions_ZerosLikeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const FillOptions *builtin_options_as_FillOptions() const { return builtin_options_type() == BuiltinOptions_FillOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const BidirectionalSequenceLSTMOptions * builtin_options_as_BidirectionalSequenceLSTMOptions() const { return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const { return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const UnidirectionalSequenceLSTMOptions * builtin_options_as_UnidirectionalSequenceLSTMOptions() const { return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const FloorModOptions *builtin_options_as_FloorModOptions() const { return builtin_options_type() == BuiltinOptions_FloorModOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const RangeOptions *builtin_options_as_RangeOptions() const { return builtin_options_type() == BuiltinOptions_RangeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const { return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const { return builtin_options_type() == BuiltinOptions_LeakyReluOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const { return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const { return builtin_options_type() == BuiltinOptions_MirrorPadOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const AbsOptions *builtin_options_as_AbsOptions() const { return builtin_options_type() == BuiltinOptions_AbsOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SplitVOptions *builtin_options_as_SplitVOptions() const { return builtin_options_type() == BuiltinOptions_SplitVOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const UniqueOptions *builtin_options_as_UniqueOptions() const { return builtin_options_type() == BuiltinOptions_UniqueOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ReverseV2Options *builtin_options_as_ReverseV2Options() const { return builtin_options_type() == BuiltinOptions_ReverseV2Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const AddNOptions *builtin_options_as_AddNOptions() const { return builtin_options_type() == BuiltinOptions_AddNOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const GatherNdOptions *builtin_options_as_GatherNdOptions() const { return builtin_options_type() == BuiltinOptions_GatherNdOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const CosOptions *builtin_options_as_CosOptions() const { return builtin_options_type() == BuiltinOptions_CosOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const WhereOptions *builtin_options_as_WhereOptions() const { return builtin_options_type() == BuiltinOptions_WhereOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const RankOptions *builtin_options_as_RankOptions() const { return builtin_options_type() == BuiltinOptions_RankOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const { return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const { return builtin_options_type() == BuiltinOptions_MatrixDiagOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const QuantizeOptions *builtin_options_as_QuantizeOptions() const { return builtin_options_type() == BuiltinOptions_QuantizeOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const { return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const HardSwishOptions *builtin_options_as_HardSwishOptions() const { return builtin_options_type() == BuiltinOptions_HardSwishOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const IfOptions *builtin_options_as_IfOptions() const { return builtin_options_type() == BuiltinOptions_IfOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const WhileOptions *builtin_options_as_WhileOptions() const { return builtin_options_type() == BuiltinOptions_WhileOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const { return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const { return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const { return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const { return builtin_options_type() == BuiltinOptions_ScatterNdOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SelectV2Options *builtin_options_as_SelectV2Options() const { return builtin_options_type() == BuiltinOptions_SelectV2Options - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const DensifyOptions *builtin_options_as_DensifyOptions() const { return builtin_options_type() == BuiltinOptions_DensifyOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const { return builtin_options_type() == BuiltinOptions_SegmentSumOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { return builtin_options_type() == BuiltinOptions_BatchMatMulOptions - ? static_cast(builtin_options()) - : nullptr; + ? static_cast(builtin_options()) + : nullptr; } const flatbuffers::Vector *custom_options() const { @@ -8457,7 +8455,7 @@ struct OperatorBuilder static_cast(custom_options_format), 0); } void add_mutating_variable_inputs( - flatbuffers::Offset> mutating_variable_inputs) + flatbuffers::Offset> mutating_variable_inputs) { fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs); } @@ -8514,11 +8512,11 @@ CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index const std::vector *intermediates = nullptr) { return onert_tflite::CreateOperator( - _fbb, opcode_index, inputs ? _fbb.CreateVector(*inputs) : 0, - outputs ? _fbb.CreateVector(*outputs) : 0, builtin_options_type, builtin_options, - custom_options ? _fbb.CreateVector(*custom_options) : 0, custom_options_format, - mutating_variable_inputs ? _fbb.CreateVector(*mutating_variable_inputs) : 0, - intermediates ? _fbb.CreateVector(*intermediates) : 0); + _fbb, opcode_index, inputs ? _fbb.CreateVector(*inputs) : 0, + outputs ? _fbb.CreateVector(*outputs) : 0, builtin_options_type, builtin_options, + custom_options ? _fbb.CreateVector(*custom_options) : 0, custom_options_format, + mutating_variable_inputs ? _fbb.CreateVector(*mutating_variable_inputs) : 0, + intermediates ? _fbb.CreateVector(*intermediates) : 0); } struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -8602,12 +8600,12 @@ struct SubGraphBuilder }; inline flatbuffers::Offset CreateSubGraph( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> tensors = 0, - flatbuffers::Offset> inputs = 0, - flatbuffers::Offset> outputs = 0, - flatbuffers::Offset>> operators = 0, - flatbuffers::Offset name = 0) + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset>> tensors = 0, + flatbuffers::Offset> inputs = 0, + flatbuffers::Offset> outputs = 0, + flatbuffers::Offset>> operators = 0, + flatbuffers::Offset name = 0) { SubGraphBuilder builder_(_fbb); builder_.add_name(name); @@ -8618,20 +8616,18 @@ inline flatbuffers::Offset CreateSubGraph( return builder_.Finish(); } -inline flatbuffers::Offset -CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *tensors = nullptr, - const std::vector *inputs = nullptr, - const std::vector *outputs = nullptr, - const std::vector> *operators = nullptr, - const char *name = nullptr) +inline flatbuffers::Offset CreateSubGraphDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector> *tensors = nullptr, + const std::vector *inputs = nullptr, const std::vector *outputs = nullptr, + const std::vector> *operators = nullptr, const char *name = nullptr) { return onert_tflite::CreateSubGraph( - _fbb, tensors ? _fbb.CreateVector>(*tensors) : 0, - inputs ? _fbb.CreateVector(*inputs) : 0, - outputs ? _fbb.CreateVector(*outputs) : 0, - operators ? _fbb.CreateVector>(*operators) : 0, - name ? _fbb.CreateString(name) : 0); + _fbb, tensors ? _fbb.CreateVector>(*tensors) : 0, + inputs ? _fbb.CreateVector(*inputs) : 0, + outputs ? _fbb.CreateVector(*outputs) : 0, + operators ? _fbb.CreateVector>(*operators) : 0, + name ? _fbb.CreateString(name) : 0); } struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table @@ -8762,7 +8758,7 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table const flatbuffers::Vector> *operator_codes() const { return GetPointer> *>( - VT_OPERATOR_CODES); + VT_OPERATOR_CODES); } const flatbuffers::Vector> *subgraphs() const { @@ -8805,7 +8801,7 @@ struct ModelBuilder flatbuffers::uoffset_t start_; void add_version(uint32_t version) { fbb_.AddElement(Model::VT_VERSION, version, 0); } void add_operator_codes( - flatbuffers::Offset>> operator_codes) + flatbuffers::Offset>> operator_codes) { fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); } @@ -8845,13 +8841,13 @@ struct ModelBuilder }; inline flatbuffers::Offset CreateModel( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - flatbuffers::Offset>> operator_codes = 0, - flatbuffers::Offset>> subgraphs = 0, - flatbuffers::Offset description = 0, - flatbuffers::Offset>> buffers = 0, - flatbuffers::Offset> metadata_buffer = 0, - flatbuffers::Offset>> metadata = 0) + flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, + flatbuffers::Offset>> operator_codes = 0, + flatbuffers::Offset>> subgraphs = 0, + flatbuffers::Offset description = 0, + flatbuffers::Offset>> buffers = 0, + flatbuffers::Offset> metadata_buffer = 0, + flatbuffers::Offset>> metadata = 0) { ModelBuilder builder_(_fbb); builder_.add_metadata(metadata); @@ -8874,13 +8870,13 @@ CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, const std::vector> *metadata = nullptr) { return onert_tflite::CreateModel( - _fbb, version, - operator_codes ? _fbb.CreateVector>(*operator_codes) : 0, - subgraphs ? _fbb.CreateVector>(*subgraphs) : 0, - description ? _fbb.CreateString(description) : 0, - buffers ? _fbb.CreateVector>(*buffers) : 0, - metadata_buffer ? _fbb.CreateVector(*metadata_buffer) : 0, - metadata ? _fbb.CreateVector>(*metadata) : 0); + _fbb, version, + operator_codes ? _fbb.CreateVector>(*operator_codes) : 0, + subgraphs ? _fbb.CreateVector>(*subgraphs) : 0, + description ? _fbb.CreateString(description) : 0, + buffers ? _fbb.CreateVector>(*buffers) : 0, + metadata_buffer ? _fbb.CreateVector(*metadata_buffer) : 0, + metadata ? _fbb.CreateVector>(*metadata) : 0); } inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, diff --git a/runtime/onert/sample/.clang-format b/runtime/onert/sample/.clang-format new file mode 120000 index 0000000..83185fe --- /dev/null +++ b/runtime/onert/sample/.clang-format @@ -0,0 +1 @@ +../../../.clang-format.8 \ No newline at end of file diff --git a/runtime/onert/test/.clang-format b/runtime/onert/test/.clang-format new file mode 120000 index 0000000..83185fe --- /dev/null +++ b/runtime/onert/test/.clang-format @@ -0,0 +1 @@ +../../../.clang-format.8 \ No newline at end of file diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc similarity index 95% rename from runtime/onert/test/core/compiler/Scheduler.cc rename to runtime/onert/test/core/compiler/HEScheduler.cc index 50f3964..c77ebb8 100644 --- a/runtime/onert/test/core/compiler/Scheduler.cc +++ b/runtime/onert/test/core/compiler/HEScheduler.cc @@ -55,8 +55,7 @@ struct MockBackendCPU : public Backend std::unique_ptr newContext(const Graph &, const std::shared_ptr &, bool) const override { - return std::unique_ptr( - new BackendContext{this, nullptr, nullptr, nullptr, nullptr}); + return std::unique_ptr(new BackendContext{this, nullptr}); } }; @@ -79,8 +78,7 @@ struct MockBackendGPU : public Backend std::unique_ptr newContext(const Graph &, const std::shared_ptr &, bool) const override { - return std::unique_ptr( - new BackendContext{this, nullptr, nullptr, nullptr, nullptr}); + return std::unique_ptr(new BackendContext{this, nullptr}); } }; @@ -103,8 +101,7 @@ struct MockBackendNPU : public Backend std::unique_ptr newContext(const Graph &, const std::shared_ptr &, bool) const override { - return std::unique_ptr( - new BackendContext{this, nullptr, nullptr, nullptr, nullptr}); + return std::unique_ptr(new BackendContext{this, nullptr}); } }; @@ -165,7 +162,7 @@ void setOperationsExecutionTime(const std::vector &backends, for (auto &backend : backends) setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time); } - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } // Set permute time from one backend to another. This method is needed since ExecutionTime has only @@ -195,7 +192,7 @@ void setPermutationsExecutionTime(const std::vector &backends, setPermutationTime(et, backend, other_backend, false, operand_size, exec_time); } } - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } // @@ -304,7 +301,7 @@ std::shared_ptr createBranchedGraph() // // SetUp/TearDown methods runs before/after each test and performs actions common for each test -class SchedulerTest : public ::testing::Test +class HESchedulerTest : public ::testing::Test { protected: void SetUp() override @@ -359,8 +356,8 @@ protected: std::string _original_profiling_mode; }; -class SchedulerTestWithExecutorParam : public SchedulerTest, - public testing::WithParamInterface +class HESchedulerTestWithExecutorParam : public HESchedulerTest, + public testing::WithParamInterface { }; @@ -369,7 +366,7 @@ class SchedulerTestWithExecutorParam : public SchedulerTest, // // Test scheduler behavior for straight graph with known execution time of all nodes and permutes. -TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time) +TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time) { setExecutor(GetParam()); @@ -392,7 +389,7 @@ TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time) setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1); setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1); setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); @@ -422,7 +419,7 @@ TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time) } // Test scheduler behavior for branched graph with known execution time of all nodes and permutes -TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) +TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time) { const int64_t NPU_ET = 5000; setExecutor(GetParam()); @@ -432,7 +429,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) auto graph(createBranchedGraph()); subgs.push(ir::SubgraphIndex{0}, graph); OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), - sub_op_idx(5); + sub_op_idx(5); // Set default execution and transfer time setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000); @@ -451,7 +448,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET); setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000); setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); @@ -463,7 +460,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) if (GetParam() == PARALLEL) { branch1_expected_backend = - br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu"; + br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu"; branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu"; } @@ -486,7 +483,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) * branching or scheduler assigns another backend to a node*/ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1); setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); @@ -504,11 +501,11 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time) // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times - // one time for each executor -INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam, +INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam, testing::Values(LINEAR, DATAFLOW, PARALLEL)); // Test scheduler behavior for branched graph and enabled profiling mode -TEST_F(SchedulerTest, branched_graph_profiling_mode) +TEST_F(HESchedulerTest, branched_graph_profiling_mode) { const int ET = 1e5; @@ -521,7 +518,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode) auto graph(createBranchedGraph()); subgs.push(ir::SubgraphIndex{0}, graph); OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4), - sub_op_idx(5); + sub_op_idx(5); // Test 1 // Expected behaviour: scheduler assigns backends to nodes with unknown execution time @@ -537,7 +534,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode) setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET); setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1); setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); @@ -560,7 +557,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode) setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET); setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1); setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); // Test scheduler auto backend_contexts = buildBackendContexts(*graph); diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc index 806b47e..0e742e1 100644 --- a/runtime/onert/test/core/exec/ExecInstance.cc +++ b/runtime/onert/test/core/exec/ExecInstance.cc @@ -21,6 +21,7 @@ #include "compiler/Compiler.h" #include "exec/Execution.h" #include "ir/operation/BinaryArithmetic.h" +#include "util/TracingCtx.h" namespace { @@ -51,8 +52,8 @@ public: auto operand_rhs2 = graph->addOperand(shape, type); auto operand_result2 = graph->addOperand(shape, type); graph->operands() - .at(operand_rhs2) - .data(std::make_unique(reinterpret_cast(&rhs2_data), 16)); + .at(operand_rhs2) + .data(std::make_unique(reinterpret_cast(&rhs2_data), 16)); // 2nd add operations (result2 <= result1 + rhs2) operation::BinaryArithmetic::Param param1; param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; @@ -60,14 +61,14 @@ public: auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; auto output_set1 = OperandIndexSequence{operand_result1}; graph->addOperation( - std::make_unique(input_set1, output_set1, param1)); + std::make_unique(input_set1, output_set1, param1)); operation::BinaryArithmetic::Param param2; param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; param2.activation = Activation::NONE; auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; auto output_set2 = OperandIndexSequence{operand_result2}; graph->addOperation( - std::make_unique(input_set2, output_set2, param2)); + std::make_unique(input_set2, output_set2, param2)); // Identify model inputs and outputs graph->addInput(operand_lhs); graph->addInput(operand_rhs1); @@ -77,13 +78,15 @@ public: // Compile auto subgs = std::make_shared(); subgs->push(onert::ir::SubgraphIndex{0}, graph); - onert::compiler::Compiler compiler{subgs}; + tracing_ctx = std::make_unique(subgs.get()); + onert::compiler::Compiler compiler{subgs, tracing_ctx.get()}; executors = compiler.compile(); } public: std::shared_ptr graph; std::shared_ptr executors; + std::unique_ptr tracing_ctx; }; TEST(ExecInstance, simple) @@ -137,7 +140,8 @@ TEST(ExecInstance, twoCompile) // Make new executor: compile again auto subgs = std::make_shared(); subgs->push(onert::ir::SubgraphIndex{0}, graph); - onert::compiler::Compiler compiler{subgs}; + auto tracing_ctx = std::make_unique(subgs.get()); + onert::compiler::Compiler compiler{subgs, tracing_ctx.get()}; std::shared_ptr executors2 = compiler.compile(); onert::exec::Execution execution2{executors2}; @@ -205,7 +209,7 @@ class Inference public: Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4], std::shared_ptr &executors) - : _input1{input1}, _input2{input2}, _output{output}, _executors{executors} + : _input1{input1}, _input2{input2}, _output{output}, _executors{executors} { // DO NOTHING } diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc index 8c2e34d..6b0c35a 100644 --- a/runtime/onert/test/core/exec/ExecTime.test.cc +++ b/runtime/onert/test/core/exec/ExecTime.test.cc @@ -62,7 +62,7 @@ TEST(ExecTime, roundtrip_ok) et.updateOperationExecTime(b, "op1", true, 100, 100); et.updateOperationExecTime(b, "op1", true, 200, 200); et.updateOperationExecTime(b, "op1", false, 100, 888); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } { ExecTime et(bs); @@ -73,7 +73,7 @@ TEST(ExecTime, roundtrip_ok) ASSERT_EQ(time, 150); time = et.getOperationExecTime(b, "op1", false, 100); ASSERT_EQ(time, 888); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } // clean up EXPECT_EQ(remove("exec_time.json"), 0); @@ -88,7 +88,7 @@ TEST(ExecTime, structure) ExecTime et(bs); et.updateOperationExecTime(b, "op1", true, 100, 100); et.updateOperationExecTime(b, "op1", true, 200, 200); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } { ExecTime et(bs); @@ -97,7 +97,7 @@ TEST(ExecTime, structure) // Check interpolation time = et.getOperationExecTime(b, "op1", true, 200); ASSERT_EQ(time, 200); - et.uploadOperationsExecTime(); + et.storeOperationsExecTime(); } // clean up EXPECT_EQ(remove("exec_time.json"), 0); diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc index 0c7b1b7..327c38f 100644 --- a/runtime/onert/test/core/interp/ExecManager.cc +++ b/runtime/onert/test/core/interp/ExecManager.cc @@ -63,7 +63,7 @@ protected: auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; auto output_set = OperandIndexSequence{operand_result}; _graph->addOperation( - std::make_unique(input_set, output_set, param)); + std::make_unique(input_set, output_set, param)); // Identify model inputs and outputs @@ -79,7 +79,7 @@ protected: _executors = std::make_shared(); _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); + std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); } void CreateTwoStepModel() @@ -109,8 +109,8 @@ protected: auto operand_rhs2 = _graph->addOperand(shape, type); auto operand_result2 = _graph->addOperand(shape, type); _graph->operands() - .at(operand_rhs2) - .data(std::make_unique(reinterpret_cast(&rhs2_data), 16)); + .at(operand_rhs2) + .data(std::make_unique(reinterpret_cast(&rhs2_data), 16)); // 2nd add operations (result2 <= result1 + rhs2) @@ -120,7 +120,7 @@ protected: auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; auto output_set1 = OperandIndexSequence{operand_result1}; _graph->addOperation( - std::make_unique(input_set1, output_set1, param1)); + std::make_unique(input_set1, output_set1, param1)); operation::BinaryArithmetic::Param param2; param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; @@ -128,7 +128,7 @@ protected: auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; auto output_set2 = OperandIndexSequence{operand_result2}; _graph->addOperation( - std::make_unique(input_set2, output_set2, param2)); + std::make_unique(input_set2, output_set2, param2)); // Identify model inputs and outputs @@ -144,7 +144,7 @@ protected: _executors = std::make_shared(); _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); + std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); } void CreateUnspecifiedDimensionsModel() @@ -168,9 +168,8 @@ protected: auto operand_activation = _graph->addOperand(shape_scalar, type_scalar); _graph->operands() - .at(operand_activation) - .data( - std::make_unique(reinterpret_cast(&_activation_value), 4)); + .at(operand_activation) + .data(std::make_unique(reinterpret_cast(&_activation_value), 4)); auto operand_result = _graph->addOperand(shape, type); @@ -182,7 +181,7 @@ protected: auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; auto output_set = OperandIndexSequence{operand_result}; _graph->addOperation( - std::make_unique(input_set, output_set, param)); + std::make_unique(input_set, output_set, param)); // Identify model inputs and outputs @@ -198,7 +197,7 @@ protected: _executors = std::make_shared(); _executors->insert( - std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); + std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique(*_graph))); } void createExecution() { _execution = std::make_unique(_executors); } diff --git a/runtime/onert/test/graph/MockNode.h b/runtime/onert/test/graph/MockNode.h index 60b4719..0e7ed97 100644 --- a/runtime/onert/test/graph/MockNode.h +++ b/runtime/onert/test/graph/MockNode.h @@ -30,7 +30,7 @@ class SimpleMock : public onert::ir::Operation public: SimpleMock(const onert::ir::OperandIndexSequence &inputs, const onert::ir::OperandIndexSequence &outputs) - : Operation{onert::ir::OperandConstraint::createAny()} + : Operation{onert::ir::OperandConstraint::createAny()} { setInputs(inputs); setOutputs(outputs); diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc index 206e402..5ef1002 100644 --- a/runtime/onert/test/graph/operand/UseDef.cc +++ b/runtime/onert/test/graph/operand/UseDef.cc @@ -49,16 +49,16 @@ TEST(ir_Operand, neg_usedef) // MockNode1 auto operand_index1 = graph.addOperand(shape, type); auto mocknode_index1 = - graph.addOperation(std::make_unique(IndexSet{input_operand}, IndexSet{operand_index1})); + graph.addOperation(std::make_unique(IndexSet{input_operand}, IndexSet{operand_index1})); // MockNode2 auto operand_index2 = graph.addOperand(shape, type); auto mocknode_index2 = - graph.addOperation(std::make_unique(IndexSet{input_operand}, IndexSet{operand_index2})); + graph.addOperation(std::make_unique(IndexSet{input_operand}, IndexSet{operand_index2})); // MockNode3(two input) auto multiinput_index = graph.addOperation( - std::make_unique(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand})); + std::make_unique(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand})); graph.finishBuilding(); diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc index f1cbfd6..2ecaa28 100644 --- a/runtime/onert/test/util/ShapeInference.cc +++ b/runtime/onert/test/util/ShapeInference.cc @@ -48,7 +48,7 @@ TEST(ShapeInference, Pool2DNodeSame) Padding padding{PaddingType::SAME}; operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -58,7 +58,7 @@ TEST(ShapeInference, Pool2DNodeSame) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -75,7 +75,7 @@ TEST(ShapeInference, Pool2DNodeValid) Padding padding{PaddingType::VALID}; operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -85,7 +85,7 @@ TEST(ShapeInference, Pool2DNodeValid) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -103,7 +103,7 @@ TEST(ShapeInference, Pool2DNodeExplicit) Padding padding{4, 3, 2, 1}; operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -113,7 +113,7 @@ TEST(ShapeInference, Pool2DNodeExplicit) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); operation::Pool2D::Param max_pool_param{ - operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -130,7 +130,7 @@ TEST(ShapeInference, neg_Pool2DNode_InvalidStride) Padding padding{PaddingType::SAME}; operation::Pool2D::Param avg_pool_param{ - operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param), std::runtime_error); } @@ -161,7 +161,7 @@ TEST(ShapeInference, Conv2D) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); param = - operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}}; + operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}}; infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -190,7 +190,7 @@ TEST(ShapeInference, DepthwiseConv2D) operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3, Activation::NONE, Dilation{1, 1}}; auto infered_out_shape = - onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); + onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param); ASSERT_EQ(infered_out_shape.rank(), 4); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); @@ -364,7 +364,7 @@ TEST(ShapeInference, Transpose) ASSERT_EQ(in_shape.rank(), perm.size()); ASSERT_EQ(expected.rank(), perm.size()); auto inferred_out_shape = - onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()); + onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()); // post-conditions ASSERT_EQ(inferred_out_shape.rank(), perm.size()); for (int32_t dim = 0; dim < expected.rank(); dim++) @@ -479,8 +479,8 @@ TEST(ShapeInference, BCQFullyConnected) { auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector cluster, Shape &expected) { - auto actual = onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, - cluster.data()); + auto actual = + onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data()); ASSERT_EQ(actual.rank(), expected.rank()); for (int32_t dim = 0; dim < expected.rank(); dim++) diff --git a/tests/.clang-format b/tests/.clang-format new file mode 120000 index 0000000..0ff66f3 --- /dev/null +++ b/tests/.clang-format @@ -0,0 +1 @@ +../.clang-format.8 \ No newline at end of file diff --git a/tests/custom_op/FillFrom/FillFrom_runner.cc b/tests/custom_op/FillFrom/FillFrom_runner.cc index 7313086..6b09d5d 100644 --- a/tests/custom_op/FillFrom/FillFrom_runner.cc +++ b/tests/custom_op/FillFrom/FillFrom_runner.cc @@ -87,7 +87,7 @@ std::vector genData(uint64_t size) template static auto findMaxDifference(InIter1 first1, InIter1 last1, InIter2 first2) - -> decltype(*first1 - *first2) + -> decltype(*first1 - *first2) { auto max_difference = std::abs(*first1 - *first2); for (; first1 != last1; ++first1, ++first2) @@ -227,7 +227,7 @@ int main(const int argc, char **argv) const float tolerance = 0.01f; auto max_difference = - findMaxDifference(outputs[0].begin(), outputs[0].end(), std::begin(ref_data)); + findMaxDifference(outputs[0].begin(), outputs[0].end(), std::begin(ref_data)); int exit_code = 0; if (max_difference > tolerance) diff --git a/tests/nnapi/CMakeLists.txt b/tests/nnapi/CMakeLists.txt index b1215d8..67ac90f 100644 --- a/tests/nnapi/CMakeLists.txt +++ b/tests/nnapi/CMakeLists.txt @@ -7,6 +7,16 @@ if (NOT BUILD_ONERT) return() endif(NOT BUILD_ONERT) +# GCC Compiler under 6.2 is not support this test build +if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2) + return() +endif() + +if (ANDROID_BOOST_ROOT) + set(BOOST_ROOT ${ANDROID_BOOST_ROOT}) +endif (ANDROID_BOOST_ROOT) + +nnfw_find_package(Boost REQUIRED) nnfw_find_package(GTest) @@ -46,6 +56,7 @@ endif(GENERATE_RUNTIME_NNAPI_TESTS) set(RUNTIME_NNAPI_TEST_SRC_INC ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/src) target_include_directories(${RUNTIME_NNAPI_TEST} PRIVATE ${RUNTIME_NNAPI_TEST_SRC_INC}) +target_include_directories(${RUNTIME_NNAPI_TEST} PRIVATE ${Boost_INCLUDE_DIRS}) # Define NNTEST_ONLY_PUBLIC_API to avoid android dependency target_compile_definitions(${RUNTIME_NNAPI_TEST} PRIVATE NNTEST_ONLY_PUBLIC_API) diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl new file mode 100644 index 0000000..4e4d688 --- /dev/null +++ b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl @@ -0,0 +1,305 @@ +GeneratedTests.abs_ +GeneratedTests.abs_dynamic_nnfw +GeneratedTests.add_dynamic_nnfw +GeneratedTests.argmax_3_axis_as_input_nnfw +GeneratedTests.argmax_3_axis_as_input_nnfw_quant8 +GeneratedTests.argmax_dynamic_nnfw +GeneratedTests.batch_matmul_ex_dynamic_nnfw +GeneratedTests.batch_matmul_ex_float_adj_x +GeneratedTests.batch_matmul_ex_float_adj_y +GeneratedTests.batch_matmul_ex_float_batch2 +GeneratedTests.batch_matmul_ex_float_broadcast +GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy +GeneratedTests.batch_matmul_ex_float_broadcast_adj_x +GeneratedTests.batch_matmul_ex_float_simple +GeneratedTests.broadcast_to_ex_1D_nnfw +GeneratedTests.broadcast_to_ex_2D_nnfw +GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw +GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw +GeneratedTests.cast_dynamic_float32_to_int32_nnfw +GeneratedTests.cast_float16_to_float16 +GeneratedTests.cast_float16_to_float32 +GeneratedTests.cast_float16_to_float32_relaxed +GeneratedTests.cast_float16_to_int32 +GeneratedTests.cast_float16_to_quant8 +GeneratedTests.cast_float16_to_quant8_overflow +GeneratedTests.cast_float32_to_float16 +GeneratedTests.cast_float32_to_float16_relaxed +GeneratedTests.cast_float32_to_int32_nnfw +GeneratedTests.cast_int32_to_float16 +GeneratedTests.cast_quant8_to_float16 +GeneratedTests.concat_dynamic_nnfw +GeneratedTests.conv_dynamic_nnfw +GeneratedTests.conv_float_channels_weights_as_inputs +GeneratedTests.conv_float_channels_weights_as_inputs_relaxed +GeneratedTests.conv_float_large_weights_as_inputs +GeneratedTests.conv_float_large_weights_as_inputs_relaxed +GeneratedTests.conv_float_weights_as_inputs +GeneratedTests.conv_float_weights_as_inputs_relaxed +GeneratedTests.conv_quant8_channels_weights_as_inputs +GeneratedTests.conv_quant8_large_weights_as_inputs +GeneratedTests.conv_quant8_overflow_weights_as_inputs +GeneratedTests.conv_quant8_weights_as_inputs +GeneratedTests.conv2d_dilation_nnfw +GeneratedTests.conv2d_dilation_nnfw_quant8 +GeneratedTests.conv2d_dilation_nnfw_weight_as_input +GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8 +GeneratedTests.conv2d_dilation_nnfw_2 +GeneratedTests.conv2d_dilation_nnfw_quant8_2 +GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2 +GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2 +GeneratedTests.cos_ex_1D_float_nnfw +GeneratedTests.cos_ex_4D_float_nnfw +GeneratedTests.cos_ex_dynamic_nnfw +GeneratedTests.dequantize_v1_2_3d_quant8_symm +GeneratedTests.dequantize_v1_2_4d_quant8_symm +GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim +GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim +GeneratedTests.dequantize_v1_2 +GeneratedTests.dequantize_v1_2_zero_sized +GeneratedTests.dequantize_v1_2_zero_sized_float16 +GeneratedTests.div_dynamic_nnfw +GeneratedTests.einsum_ex_float_matmul_2x2_2 +GeneratedTests.einsum_ex_float_matmul_3x2_3 +GeneratedTests.einsum_ex_float_matmul_3x3_4 +GeneratedTests.einsum_ex_float_matmul_4x4_4 +GeneratedTests.einsum_ex_float_matmul_4x4_4_2 +GeneratedTests.equal_dynamic_float_nnfw +GeneratedTests.exp_ +GeneratedTests.exp_dynamic_nnfw +GeneratedTests.expand_dims_dynamic_nnfw_1 +GeneratedTests.expand_dims_dynamic_nnfw_2 +GeneratedTests.fill_ex_1D_float +GeneratedTests.fill_ex_4D_float +GeneratedTests.fill_ex_dynamic_nnfw +GeneratedTests.fully_connected_dynamic_nnfw +GeneratedTests.fully_connected_float_2_weights_as_inputs +GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw +GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141 +GeneratedTests.gather_dynamic_nnfw +GeneratedTests.gather_float16 +GeneratedTests.gather_float16_2 +GeneratedTests.gather_float16_3 +GeneratedTests.gather_float16_4 +GeneratedTests.gather_float16_5 +GeneratedTests.gather_float16_6 +GeneratedTests.gather_float16_7 +GeneratedTests.gather_float16_8 +GeneratedTests.greater_dynamic_float_nnfw +GeneratedTests.greater_equal_dynamic_float_nnfw +GeneratedTests.l2_normalization_quant8_nnfw +GeneratedTests.less_dynamic_float_nnfw +GeneratedTests.less_equal_dynamic_float_nnfw +GeneratedTests.log_4D_float_nnfw +GeneratedTests.log_dynamic_nnfw +GeneratedTests.log_softmax_nnfw +GeneratedTests.log_softmax_nnfw_2 +GeneratedTests.log_softmax_nnfw_3 +GeneratedTests.log_softmax_nnfw_4 +GeneratedTests.log_softmax_nnfw_5 +GeneratedTests.log_softmax_nnfw_quant8 +GeneratedTests.logical_not +GeneratedTests.logical_not_1D_nnfw +GeneratedTests.logical_not_4D_nnfw +GeneratedTests.logical_not_dynamic_nnfw +GeneratedTests.logical_or_broadcast +GeneratedTests.logical_or_dynamic_nnfw +GeneratedTests.logistic_dynamic_nnfw +GeneratedTests.lsh_projection +GeneratedTests.lsh_projection_2 +GeneratedTests.lsh_projection_weights_as_inputs +GeneratedTests.lstm +GeneratedTests.lstm2 +GeneratedTests.lstm2_state +GeneratedTests.lstm2_state2 +GeneratedTests.lstm3 +GeneratedTests.lstm3_state +GeneratedTests.lstm3_state2 +GeneratedTests.lstm3_state3 +GeneratedTests.lstm_state +GeneratedTests.lstm_state2 +GeneratedTests.matrix_band_part_ex_4D_float +GeneratedTests.matrix_band_part_ex_dynamic_nnfw +GeneratedTests.maximum_dynamic_nnfw +GeneratedTests.minimum_dynamic_nnfw +GeneratedTests.minimum_int32 +GeneratedTests.mul_dynamic_nnfw +GeneratedTests.neg +GeneratedTests.neg_dynamic_nnfw +GeneratedTests.not_equal_dynamic_float_nnfw +GeneratedTests.one_hot_ex_dynamic_nnfw +GeneratedTests.pack_ex_dynamic_nnfw +GeneratedTests.pad_dynamic_nnfw +GeneratedTests.pad_v2_1_float +GeneratedTests.pad_v2_1_quant8 +GeneratedTests.pad_v2_all_dims +GeneratedTests.pad_v2_all_dims_quant8 +GeneratedTests.pad_v2_low_rank +GeneratedTests.pad_v2_low_rank_quant8 +GeneratedTests.pow_2D_float_nnfw +GeneratedTests.pow_broadcast_float_nnfw +GeneratedTests.pow_broadcast_float_nnfw_2 +GeneratedTests.pow_broadcast_float_nnfw_3 +GeneratedTests.pow_dynamic_nnfw +GeneratedTests.quantize_quant8 +GeneratedTests.quantize_quant8_2 +GeneratedTests.quantize_quant8_3 +GeneratedTests.quantize_quant8_4 +GeneratedTests.quantize_quant8_5 +GeneratedTests.quantize_quant8_6 +GeneratedTests.quantize_quant8_7 +GeneratedTests.quantize_quant8_8 +GeneratedTests.quantize_zero_sized +GeneratedTests.range_ex_float_1 +GeneratedTests.range_ex_float_1_all_constant_inputs +GeneratedTests.range_ex_float_1_dynamic_nnfw +GeneratedTests.range_ex_float_2 +GeneratedTests.range_ex_float_2_dynamic_nnfw +GeneratedTests.reduce_all +GeneratedTests.reduce_all_2 +GeneratedTests.reduce_all_2D_nnfw +GeneratedTests.reduce_all_3 +GeneratedTests.reduce_all_4D_nnfw +GeneratedTests.reduce_all_dynamic_nnfw +GeneratedTests.reduce_any +GeneratedTests.reduce_any_2 +GeneratedTests.reduce_any_2D_nnfw +GeneratedTests.reduce_any_3 +GeneratedTests.reduce_any_4D_nnfw +GeneratedTests.reduce_mean_dynamic_1_nnfw +GeneratedTests.reduce_mean_dynamic_2_nnfw +GeneratedTests.reduce_min_dynamic_nnfw +GeneratedTests.reduce_prod +GeneratedTests.reduce_prod_2 +GeneratedTests.reduce_prod_2D_float_nnfw +GeneratedTests.reduce_prod_3 +GeneratedTests.reduce_prod_4 +GeneratedTests.reduce_prod_4D_float_nnfw +GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw +GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw +GeneratedTests.reduce_prod_dynamic_1_nnfw +GeneratedTests.reduce_prod_dynamic_2_nnfw +GeneratedTests.reduce_sum_dynamic_1_nnfw +GeneratedTests.reduce_sum_dynamic_2_nnfw +GeneratedTests.reshape_dynamic_nnfw +GeneratedTests.resize_nearest_neighbor_shape_nchw_2 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2 +GeneratedTests.resize_nearest_neighbor_scale_nchw_2 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2 +GeneratedTests.resize_nearest_neighbor_shape_nchw_4 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4 +GeneratedTests.resize_nearest_neighbor_scale_nchw_4 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4 +GeneratedTests.resize_nearest_neighbor_shape_nchw_7 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7 +GeneratedTests.resize_nearest_neighbor_scale_nchw_7 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7 +GeneratedTests.resize_nearest_neighbor_shape_nchw_8 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8 +GeneratedTests.resize_nearest_neighbor_scale_nchw_8 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2 +GeneratedTests.reverse_ex_1d +GeneratedTests.reverse_ex_3d +GeneratedTests.reverse_ex_dynamic_1D +GeneratedTests.reverse_ex_dynamic_3D +GeneratedTests.rnn +GeneratedTests.rnn_state +GeneratedTests.round_ex_1D_float +GeneratedTests.round_ex_4D_float +GeneratedTests.round_ex_dynamic_nnfw +GeneratedTests.rsqrt +GeneratedTests.rsqrt_dynamic_nnfw +GeneratedTests.select_v1_2_five_dim +GeneratedTests.select_v1_2_five_dim_quant8 +GeneratedTests.select_v1_2_one_dim +GeneratedTests.select_v1_2_one_dim_quant8 +GeneratedTests.select_v1_2_two_dim +GeneratedTests.select_v1_2_two_dim_quant8 +GeneratedTests.select_v2_ex_broadcast_1d_single_value +GeneratedTests.select_v2_ex_broadcast_2d_one +GeneratedTests.select_v2_ex_broadcast_2d_two +GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw +GeneratedTests.select_v2_ex_broadcast_less_4d +GeneratedTests.select_v2_ex_float +GeneratedTests.shape_ex_dynamic_nnfw +GeneratedTests.sin_1D_float_nnfw +GeneratedTests.sin_4D_float_nnfw +GeneratedTests.sin_dynamic_nnfw +GeneratedTests.slice +GeneratedTests.slice_2 +GeneratedTests.slice_3 +GeneratedTests.slice_4 +GeneratedTests.slice_5 +GeneratedTests.slice_6 +GeneratedTests.slice_7 +GeneratedTests.slice_8 +GeneratedTests.slice_dynamic_nnfw +GeneratedTests.slice_zero_sized +GeneratedTests.slice_zero_sized_quant8 +GeneratedTests.softmax_dynamic_nnfw +GeneratedTests.space_to_batch_dynamic_float_nnfw +GeneratedTests.split_dynamic_float_nnfw +GeneratedTests.split_float_5_axis_as_input_nnfw +GeneratedTests.split_v_ex_1D_float_1_nnfw +GeneratedTests.split_v_ex_1D_float_2_nnfw +GeneratedTests.split_v_ex_1D_int32_nnfw +GeneratedTests.split_v_ex_4D_float_1_nnfw +GeneratedTests.split_v_ex_4D_float_2_nnfw +GeneratedTests.split_v_ex_4D_float_3_nnfw +GeneratedTests.split_v_ex_4D_float_4_nnfw +GeneratedTests.split_v_ex_4D_int32_1_nnfw +GeneratedTests.split_v_ex_4D_int32_2_nnfw +GeneratedTests.split_v_ex_4D_int32_3_nnfw +GeneratedTests.split_v_ex_4D_int32_4_nnfw +GeneratedTests.sqrt_ +GeneratedTests.squared_difference_ex_dynamic_nnfw +GeneratedTests.squeeze_dynamic_float_nnfw +GeneratedTests.stateless_random_uniform_ex_nnfw +GeneratedTests.strided_slice_dynamic_nnfw +GeneratedTests.sub_dynamic_nnfw +GeneratedTests.sub_v1_2_zero_sized +GeneratedTests.sub_v1_2_zero_sized_quant8 +GeneratedTests.svdf +GeneratedTests.svdf2 +GeneratedTests.svdf_bias_present +GeneratedTests.svdf_state +GeneratedTests.tanh_v1_2 +GeneratedTests.tanh_v1_2_zero_sized +GeneratedTests.tanh_v1_2_zero_sized_quant8 +GeneratedTests.tanh_v1_dynamic_nnfw +GeneratedTests.tile_1 +GeneratedTests.tile_1_dynamic_float32_nnfw +GeneratedTests.tile_1_float16 +GeneratedTests.tile_1_quant8 +GeneratedTests.tile_2 +GeneratedTests.tile_2_dynamic_float32_nnfw +GeneratedTests.tile_2_float16 +GeneratedTests.tile_2_int32 +GeneratedTests.tile_2_quant8 +GeneratedTests.tile_3 +GeneratedTests.tile_3_dynamic_float32_nnfw +GeneratedTests.tile_3_float16 +GeneratedTests.tile_3_int32 +GeneratedTests.tile_3_quant8 +GeneratedTests.transpose_dynamic_nnfw +GeneratedTests.transpose_float_1_perms_as_input_nnfw +GeneratedTests.transpose_v1_2_zero_sized +GeneratedTests.transpose_v1_2_zero_sized_quant8 +GeneratedTests.unidirectional_sequence_lstm_1step +GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection +GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias +GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw +GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole +GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection +GeneratedTests.unpack_ex_dynamic_nnfw +GeneratedTests.zeros_like_ex_2D_float +GeneratedTests.zeros_like_ex_4D_int32 +GeneratedTests.zeros_like_ex_dynamic_float32 diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon new file mode 100644 index 0000000..d443eba --- /dev/null +++ b/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon @@ -0,0 +1,376 @@ +GeneratedTests.abs_ +GeneratedTests.abs_dynamic_nnfw +GeneratedTests.add_dynamic_nnfw +GeneratedTests.argmax_3_axis_as_input_nnfw +GeneratedTests.argmax_3_axis_as_input_nnfw_quant8 +GeneratedTests.argmax_dynamic_nnfw +GeneratedTests.batch_matmul_ex_dynamic_nnfw +GeneratedTests.batch_matmul_ex_float_adj_x +GeneratedTests.batch_matmul_ex_float_adj_y +GeneratedTests.batch_matmul_ex_float_batch2 +GeneratedTests.batch_matmul_ex_float_broadcast +GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy +GeneratedTests.batch_matmul_ex_float_broadcast_adj_x +GeneratedTests.batch_matmul_ex_float_simple +GeneratedTests.broadcast_to_ex_1D_nnfw +GeneratedTests.broadcast_to_ex_2D_nnfw +GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw +GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw +GeneratedTests.cast_dynamic_float32_to_int32_nnfw +GeneratedTests.cast_float16_to_float16 +GeneratedTests.cast_float16_to_float32 +GeneratedTests.cast_float16_to_float32_relaxed +GeneratedTests.cast_float16_to_int32 +GeneratedTests.cast_float16_to_quant8 +GeneratedTests.cast_float16_to_quant8_overflow +GeneratedTests.cast_float32_to_float16 +GeneratedTests.cast_float32_to_float16_relaxed +GeneratedTests.cast_int32_to_float16 +GeneratedTests.cast_quant8_to_float16 +GeneratedTests.concat_dynamic_nnfw +GeneratedTests.conv_dynamic_nnfw +GeneratedTests.conv_float_channels_weights_as_inputs +GeneratedTests.conv_float_channels_weights_as_inputs_relaxed +GeneratedTests.conv_float_large_weights_as_inputs +GeneratedTests.conv_float_large_weights_as_inputs_relaxed +GeneratedTests.conv_float_weights_as_inputs +GeneratedTests.conv_float_weights_as_inputs_relaxed +GeneratedTests.conv_quant8_channels_weights_as_inputs +GeneratedTests.conv_quant8_large_weights_as_inputs +GeneratedTests.conv_quant8_overflow_weights_as_inputs +GeneratedTests.conv_quant8_weights_as_inputs +GeneratedTests.conv2d_dilation_nnfw +GeneratedTests.conv2d_dilation_nnfw_quant8 +GeneratedTests.conv2d_dilation_nnfw_weight_as_input +GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8 +GeneratedTests.conv2d_dilation_nnfw_2 +GeneratedTests.conv2d_dilation_nnfw_quant8_2 +GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2 +GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2 +GeneratedTests.cos_ex_1D_float_nnfw +GeneratedTests.cos_ex_4D_float_nnfw +GeneratedTests.cos_ex_dynamic_nnfw +GeneratedTests.dequantize_v1_2_3d_quant8_symm +GeneratedTests.dequantize_v1_2_4d_quant8_symm +GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim +GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim +GeneratedTests.dequantize_v1_2 +GeneratedTests.dequantize_v1_2_zero_sized +GeneratedTests.dequantize_v1_2_zero_sized_float16 +GeneratedTests.div_dynamic_nnfw +GeneratedTests.einsum_ex_float_matmul_2x2_2 +GeneratedTests.einsum_ex_float_matmul_3x2_3 +GeneratedTests.einsum_ex_float_matmul_3x3_4 +GeneratedTests.einsum_ex_float_matmul_4x4_4 +GeneratedTests.einsum_ex_float_matmul_4x4_4_2 +GeneratedTests.equal_boolean +GeneratedTests.equal_dynamic_float_nnfw +GeneratedTests.exp_ +GeneratedTests.exp_2D_float_nnfw +GeneratedTests.exp_dynamic_nnfw +GeneratedTests.expand_dims_dynamic_nnfw_1 +GeneratedTests.expand_dims_dynamic_nnfw_2 +GeneratedTests.fill_ex_1D_float +GeneratedTests.fill_ex_4D_float +GeneratedTests.fill_ex_dynamic_nnfw +GeneratedTests.fully_connected_dynamic_nnfw +GeneratedTests.fully_connected_hybrid_1_nnfw +GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw +GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141 +GeneratedTests.gather_dynamic_nnfw +GeneratedTests.gather_float16 +GeneratedTests.gather_float16_2 +GeneratedTests.gather_float16_3 +GeneratedTests.gather_float16_4 +GeneratedTests.gather_float16_5 +GeneratedTests.gather_float16_6 +GeneratedTests.gather_float16_7 +GeneratedTests.gather_float16_8 +GeneratedTests.greater_dynamic_float_nnfw +GeneratedTests.greater_equal_boolean +GeneratedTests.greater_equal_dynamic_float_nnfw +GeneratedTests.l2_normalization_quant8_nnfw +GeneratedTests.less_boolean +GeneratedTests.less_dynamic_float_nnfw +GeneratedTests.less_equal_dynamic_float_nnfw +GeneratedTests.log_4D_float_nnfw +GeneratedTests.log_dynamic_nnfw +GeneratedTests.log_softmax_nnfw +GeneratedTests.log_softmax_nnfw_2 +GeneratedTests.log_softmax_nnfw_3 +GeneratedTests.log_softmax_nnfw_4 +GeneratedTests.log_softmax_nnfw_5 +GeneratedTests.log_softmax_nnfw_quant8 +GeneratedTests.logical_not +GeneratedTests.logical_not_1D_nnfw +GeneratedTests.logical_not_4D_nnfw +GeneratedTests.logical_not_dynamic_nnfw +GeneratedTests.logical_or_dynamic_nnfw +GeneratedTests.logistic_dynamic_nnfw +GeneratedTests.lsh_projection +GeneratedTests.lsh_projection_2 +GeneratedTests.lsh_projection_weights_as_inputs +GeneratedTests.lstm +GeneratedTests.lstm2 +GeneratedTests.lstm2_state +GeneratedTests.lstm2_state2 +GeneratedTests.lstm3 +GeneratedTests.lstm3_state +GeneratedTests.lstm3_state2 +GeneratedTests.lstm3_state3 +GeneratedTests.lstm_state +GeneratedTests.lstm_state2 +GeneratedTests.matrix_band_part_ex_4D_float +GeneratedTests.matrix_band_part_ex_dynamic_nnfw +GeneratedTests.maximum_dynamic_nnfw +GeneratedTests.minimum_dynamic_nnfw +GeneratedTests.mul_dynamic_nnfw +GeneratedTests.neg +GeneratedTests.neg_dynamic_nnfw +GeneratedTests.not_equal_boolean +GeneratedTests.not_equal_dynamic_float_nnfw +GeneratedTests.one_hot_ex_dynamic_nnfw +GeneratedTests.pack_ex_dynamic_nnfw +GeneratedTests.pad_dynamic_nnfw +GeneratedTests.pad_v2_1_float +GeneratedTests.pad_v2_1_quant8 +GeneratedTests.pad_v2_all_dims +GeneratedTests.pad_v2_all_dims_quant8 +GeneratedTests.pad_v2_low_rank +GeneratedTests.pad_v2_low_rank_quant8 +GeneratedTests.pow_2D_float_nnfw +GeneratedTests.pow_broadcast_float_nnfw +GeneratedTests.pow_broadcast_float_nnfw_2 +GeneratedTests.pow_broadcast_float_nnfw_3 +GeneratedTests.pow_dynamic_nnfw +GeneratedTests.quantize_quant8 +GeneratedTests.quantize_quant8_2 +GeneratedTests.quantize_quant8_3 +GeneratedTests.quantize_quant8_4 +GeneratedTests.quantize_quant8_5 +GeneratedTests.quantize_quant8_6 +GeneratedTests.quantize_quant8_7 +GeneratedTests.quantize_quant8_8 +GeneratedTests.quantize_zero_sized +GeneratedTests.range_ex_float_1 +GeneratedTests.range_ex_float_1_all_constant_inputs +GeneratedTests.range_ex_float_1_dynamic_nnfw +GeneratedTests.range_ex_float_2 +GeneratedTests.range_ex_float_2_dynamic_nnfw +GeneratedTests.reduce_all +GeneratedTests.reduce_all_2 +GeneratedTests.reduce_all_2D_nnfw +GeneratedTests.reduce_all_3 +GeneratedTests.reduce_all_4D_nnfw +GeneratedTests.reduce_all_dynamic_nnfw +GeneratedTests.reduce_any +GeneratedTests.reduce_any_2 +GeneratedTests.reduce_any_2D_nnfw +GeneratedTests.reduce_any_3 +GeneratedTests.reduce_any_4D_nnfw +GeneratedTests.reduce_max_2D_int32_nnfw +GeneratedTests.reduce_max_quant8 +GeneratedTests.reduce_mean_dynamic_1_nnfw +GeneratedTests.reduce_mean_dynamic_2_nnfw +GeneratedTests.reduce_min_dynamic_nnfw +GeneratedTests.reduce_prod +GeneratedTests.reduce_prod_2 +GeneratedTests.reduce_prod_2D_float_nnfw +GeneratedTests.reduce_prod_3 +GeneratedTests.reduce_prod_4 +GeneratedTests.reduce_prod_4D_float_nnfw +GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw +GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw +GeneratedTests.reduce_prod_dynamic_1_nnfw +GeneratedTests.reduce_prod_dynamic_2_nnfw +GeneratedTests.reduce_sum_dynamic_1_nnfw +GeneratedTests.reduce_sum_dynamic_2_nnfw +GeneratedTests.reshape_dynamic_nnfw +GeneratedTests.resize_nearest_neighbor_shape_nhwc +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8 +GeneratedTests.resize_nearest_neighbor_shape_nchw +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8 +GeneratedTests.resize_nearest_neighbor_scale_nhwc +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8 +GeneratedTests.resize_nearest_neighbor_scale_nchw +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_2 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2 +GeneratedTests.resize_nearest_neighbor_shape_nchw_2 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_2 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2 +GeneratedTests.resize_nearest_neighbor_scale_nchw_2 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_3 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3 +GeneratedTests.resize_nearest_neighbor_shape_nchw_3 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_3 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3 +GeneratedTests.resize_nearest_neighbor_scale_nchw_3 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_4 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4 +GeneratedTests.resize_nearest_neighbor_shape_nchw_4 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_4 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4 +GeneratedTests.resize_nearest_neighbor_scale_nchw_4 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_5 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5 +GeneratedTests.resize_nearest_neighbor_shape_nchw_5 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_5 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5 +GeneratedTests.resize_nearest_neighbor_scale_nchw_5 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_6 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6 +GeneratedTests.resize_nearest_neighbor_shape_nchw_6 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_6 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6 +GeneratedTests.resize_nearest_neighbor_scale_nchw_6 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_7 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7 +GeneratedTests.resize_nearest_neighbor_shape_nchw_7 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_7 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7 +GeneratedTests.resize_nearest_neighbor_scale_nchw_7 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_8 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8 +GeneratedTests.resize_nearest_neighbor_shape_nchw_8 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_8 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8 +GeneratedTests.resize_nearest_neighbor_scale_nchw_8 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2 +GeneratedTests.reverse_ex_1d +GeneratedTests.reverse_ex_3d +GeneratedTests.reverse_ex_dynamic_1D +GeneratedTests.reverse_ex_dynamic_3D +GeneratedTests.rnn +GeneratedTests.rnn_state +GeneratedTests.round_ex_1D_float +GeneratedTests.round_ex_4D_float +GeneratedTests.round_ex_dynamic_nnfw +GeneratedTests.rsqrt +GeneratedTests.rsqrt_dynamic_nnfw +GeneratedTests.select_v1_2_five_dim +GeneratedTests.select_v1_2_five_dim_quant8 +GeneratedTests.select_v1_2_one_dim +GeneratedTests.select_v1_2_one_dim_quant8 +GeneratedTests.select_v1_2_two_dim +GeneratedTests.select_v1_2_two_dim_quant8 +GeneratedTests.select_v2_ex_broadcast_1d_single_value +GeneratedTests.select_v2_ex_broadcast_2d_one +GeneratedTests.select_v2_ex_broadcast_2d_two +GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw +GeneratedTests.select_v2_ex_broadcast_less_4d +GeneratedTests.select_v2_ex_float +GeneratedTests.shape_ex_dynamic_nnfw +GeneratedTests.sin_1D_float_nnfw +GeneratedTests.sin_4D_float_nnfw +GeneratedTests.sin_dynamic_nnfw +GeneratedTests.slice +GeneratedTests.slice_2 +GeneratedTests.slice_3 +GeneratedTests.slice_4 +GeneratedTests.slice_5 +GeneratedTests.slice_6 +GeneratedTests.slice_7 +GeneratedTests.slice_8 +GeneratedTests.slice_dynamic_nnfw +GeneratedTests.slice_zero_sized +GeneratedTests.slice_zero_sized_quant8 +GeneratedTests.softmax_dynamic_nnfw +GeneratedTests.space_to_batch_float_1_nnfw +GeneratedTests.space_to_batch_float_2 +GeneratedTests.space_to_batch_float_3 +GeneratedTests.space_to_batch_dynamic_float_nnfw +GeneratedTests.space_to_batch_quant8_1_nnfw +GeneratedTests.space_to_batch_quant8_2 +GeneratedTests.space_to_batch_quant8_2_nnfw +GeneratedTests.space_to_batch_quant8_3 +GeneratedTests.split_dynamic_float_nnfw +GeneratedTests.split_float_5_axis_as_input_nnfw +GeneratedTests.split_v_ex_1D_float_1_nnfw +GeneratedTests.split_v_ex_1D_float_2_nnfw +GeneratedTests.split_v_ex_1D_int32_nnfw +GeneratedTests.split_v_ex_4D_float_1_nnfw +GeneratedTests.split_v_ex_4D_float_2_nnfw +GeneratedTests.split_v_ex_4D_float_3_nnfw +GeneratedTests.split_v_ex_4D_float_4_nnfw +GeneratedTests.split_v_ex_4D_int32_1_nnfw +GeneratedTests.split_v_ex_4D_int32_2_nnfw +GeneratedTests.split_v_ex_4D_int32_3_nnfw +GeneratedTests.split_v_ex_4D_int32_4_nnfw +GeneratedTests.sqrt_ +GeneratedTests.squared_difference_ex_dynamic_nnfw +GeneratedTests.squeeze_dynamic_float_nnfw +GeneratedTests.stateless_random_uniform_ex_nnfw +GeneratedTests.strided_slice_dynamic_nnfw +GeneratedTests.sub_dynamic_nnfw +GeneratedTests.sub_v1_2_zero_sized +GeneratedTests.sub_v1_2_zero_sized_quant8 +GeneratedTests.svdf +GeneratedTests.svdf2 +GeneratedTests.svdf_bias_present +GeneratedTests.svdf_state +GeneratedTests.tanh_v1_2 +GeneratedTests.tanh_v1_2_zero_sized +GeneratedTests.tanh_v1_2_zero_sized_quant8 +GeneratedTests.tanh_v1_dynamic_nnfw +GeneratedTests.tile_1 +GeneratedTests.tile_1_dynamic_float32_nnfw +GeneratedTests.tile_1_float16 +GeneratedTests.tile_1_quant8 +GeneratedTests.tile_2 +GeneratedTests.tile_2_dynamic_float32_nnfw +GeneratedTests.tile_2_float16 +GeneratedTests.tile_2_int32 +GeneratedTests.tile_2_quant8 +GeneratedTests.tile_3 +GeneratedTests.tile_3_dynamic_float32_nnfw +GeneratedTests.tile_3_float16 +GeneratedTests.tile_3_int32 +GeneratedTests.tile_3_quant8 +GeneratedTests.topk_v2 +GeneratedTests.topk_v2_1D_float_nnfw +GeneratedTests.topk_v2_1D_int32_nnfw +GeneratedTests.topk_v2_1D_quant8_nnfw +GeneratedTests.topk_v2_2 +GeneratedTests.topk_v2_2D_float_nnfw +GeneratedTests.topk_v2_2D_int32_nnfw +GeneratedTests.topk_v2_2D_quant8_nnfw +GeneratedTests.topk_v2_3 +GeneratedTests.topk_v2_4 +GeneratedTests.topk_v2_5 +GeneratedTests.topk_v2_6 +GeneratedTests.transpose_dynamic_nnfw +GeneratedTests.transpose_float_1_perms_as_input_nnfw +GeneratedTests.transpose_v1_2_zero_sized +GeneratedTests.transpose_v1_2_zero_sized_quant8 +GeneratedTests.unidirectional_sequence_lstm_1step +GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection +GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias +GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw +GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole +GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection +GeneratedTests.unpack_ex_dynamic_nnfw +GeneratedTests.zeros_like_ex_2D_float +GeneratedTests.zeros_like_ex_4D_int32 +GeneratedTests.zeros_like_ex_dynamic_float32 diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-android.cpu b/tests/nnapi/nnapi_gtest.skip.aarch64-android.cpu new file mode 100644 index 0000000..a64ffca --- /dev/null +++ b/tests/nnapi/nnapi_gtest.skip.aarch64-android.cpu @@ -0,0 +1,231 @@ +GeneratedTests.abs_ +GeneratedTests.cast_float16_to_float16 +GeneratedTests.cast_float16_to_float32 +GeneratedTests.cast_float16_to_float32_relaxed +GeneratedTests.cast_float16_to_int32 +GeneratedTests.cast_float16_to_quant8 +GeneratedTests.cast_float16_to_quant8_overflow +GeneratedTests.cast_float32_to_float16 +GeneratedTests.cast_float32_to_float16_relaxed +GeneratedTests.cast_float32_to_quant8_overflow +GeneratedTests.cast_float32_to_quant8_overflow_relaxed +GeneratedTests.cast_int32_to_float16 +GeneratedTests.cast_int32_to_quant8_overflow +GeneratedTests.cast_quant8_to_float16 +GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim +GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim +GeneratedTests.dequantize_v1_2 +GeneratedTests.dequantize_v1_2_zero_sized +GeneratedTests.dequantize_v1_2_zero_sized_float16 +GeneratedTests.embedding_lookup +GeneratedTests.embedding_lookup_2d_nnfw +GeneratedTests.embedding_lookup_4d_nnfw +GeneratedTests.equal_broadcast_float_nnfw +GeneratedTests.exp_ +GeneratedTests.floor_ +GeneratedTests.gather_float16 +GeneratedTests.gather_float16_2 +GeneratedTests.gather_float16_3 +GeneratedTests.gather_float16_4 +GeneratedTests.gather_float16_5 +GeneratedTests.gather_float16_6 +GeneratedTests.gather_float16_7 +GeneratedTests.gather_float16_8 +GeneratedTests.hashtable_lookup_float +GeneratedTests.hashtable_lookup_float_4D_nnfw +GeneratedTests.hashtable_lookup_quant8 +GeneratedTests.l2_pool_float +GeneratedTests.l2_pool_float_2 +GeneratedTests.l2_pool_float_large +GeneratedTests.local_response_norm_float_1 +GeneratedTests.local_response_norm_float_2 +GeneratedTests.local_response_norm_float_3 +GeneratedTests.local_response_norm_float_4 +GeneratedTests.logical_not +GeneratedTests.lsh_projection +GeneratedTests.lsh_projection_2 +GeneratedTests.lsh_projection_weights_as_inputs +GeneratedTests.lstm2 +GeneratedTests.lstm2_state +GeneratedTests.lstm2_state2 +GeneratedTests.maximum_broadcast_quant8 +GeneratedTests.maximum_overflow +GeneratedTests.maximum_simple_quant8 +GeneratedTests.minimum_broadcast_quant8 +GeneratedTests.minimum_overflow +GeneratedTests.minimum_simple_quant8 +GeneratedTests.neg +GeneratedTests.neg_3D_int_nnfw +GeneratedTests.neg_4D_int_nnfw +GeneratedTests.prelu +GeneratedTests.prelu_broadcast_float_1_nnfw +GeneratedTests.prelu_broadcast_quant8_1_nnfw +GeneratedTests.prelu_float_1_nnfw +GeneratedTests.prelu_quant8 +GeneratedTests.prelu_quant8_1_nnfw +GeneratedTests.prelu_quant8_2 +GeneratedTests.prelu_quant8_3 +GeneratedTests.prelu_quant8_4 +GeneratedTests.prelu_weight_as_input +GeneratedTests.prelu_weight_as_input_quant8 +GeneratedTests.prelu_weight_as_input_quant8_2 +GeneratedTests.prelu_weight_as_input_quant8_3 +GeneratedTests.prelu_weight_as_input_quant8_4 +GeneratedTests.quantize_quant8_5 +GeneratedTests.quantize_quant8_6 +GeneratedTests.quantize_quant8_7 +GeneratedTests.quantize_quant8_8 +GeneratedTests.quantize_zero_sized +GeneratedTests.reduce_max_quant8 +GeneratedTests.reduce_max_quant8_1_nnfw +GeneratedTests.reduce_max_quant8_2 +GeneratedTests.reduce_max_quant8_2_nnfw +GeneratedTests.reduce_max_quant8_3 +GeneratedTests.reduce_max_quant8_4 +GeneratedTests.reduce_min_quant8 +GeneratedTests.reduce_min_quant8_2 +GeneratedTests.reduce_min_quant8_3 +GeneratedTests.reduce_min_quant8_4 +GeneratedTests.relu1_float_1 +GeneratedTests.relu1_float_2 +GeneratedTests.relu1_quant8_1 +GeneratedTests.relu1_quant8_2 +GeneratedTests.relu6_quant8_1 +GeneratedTests.relu6_quant8_2 +GeneratedTests.relu_quant8_1 +GeneratedTests.relu_quant8_2 +GeneratedTests.resize_nearest_neighbor_shape_nhwc +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8 +GeneratedTests.resize_nearest_neighbor_shape_nchw +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8 +GeneratedTests.resize_nearest_neighbor_scale_nhwc +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8 +GeneratedTests.resize_nearest_neighbor_scale_nchw +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_2 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2 +GeneratedTests.resize_nearest_neighbor_shape_nchw_2 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_2 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2 +GeneratedTests.resize_nearest_neighbor_scale_nchw_2 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_3 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3 +GeneratedTests.resize_nearest_neighbor_shape_nchw_3 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_3 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3 +GeneratedTests.resize_nearest_neighbor_scale_nchw_3 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_4 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4 +GeneratedTests.resize_nearest_neighbor_shape_nchw_4 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_4 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4 +GeneratedTests.resize_nearest_neighbor_scale_nchw_4 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_5 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5 +GeneratedTests.resize_nearest_neighbor_shape_nchw_5 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_5 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5 +GeneratedTests.resize_nearest_neighbor_scale_nchw_5 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_6 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6 +GeneratedTests.resize_nearest_neighbor_shape_nchw_6 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_6 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6 +GeneratedTests.resize_nearest_neighbor_scale_nchw_6 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_7 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7 +GeneratedTests.resize_nearest_neighbor_shape_nchw_7 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_7 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7 +GeneratedTests.resize_nearest_neighbor_scale_nchw_7 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_8 +GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8 +GeneratedTests.resize_nearest_neighbor_shape_nchw_8 +GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_8 +GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8 +GeneratedTests.resize_nearest_neighbor_scale_nchw_8 +GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2 +GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2 +GeneratedTests.rnn +GeneratedTests.rnn_state +GeneratedTests.rsqrt +GeneratedTests.select_v1_2_five_dim +GeneratedTests.select_v1_2_five_dim_quant8 +GeneratedTests.select_v1_2_one_dim_quant8 +GeneratedTests.select_v1_2_two_dim_quant8 +GeneratedTests.slice_5 +GeneratedTests.slice_6 +GeneratedTests.slice_8 +GeneratedTests.slice_zero_sized +GeneratedTests.slice_zero_sized_quant8 +GeneratedTests.sqrt_ +GeneratedTests.sqrt_1D_float_nnfw +GeneratedTests.sqrt_2D_float_nnfw +GeneratedTests.sqrt_3D_float_nnfw +GeneratedTests.sqrt_4D_float_nnfw +GeneratedTests.strided_slice_qaunt8_10 +GeneratedTests.strided_slice_qaunt8_11 +GeneratedTests.strided_slice_quant8_1 +GeneratedTests.strided_slice_quant8_2 +GeneratedTests.strided_slice_quant8_3 +GeneratedTests.strided_slice_quant8_4 +GeneratedTests.strided_slice_quant8_5 +GeneratedTests.strided_slice_quant8_6 +GeneratedTests.strided_slice_quant8_7 +GeneratedTests.strided_slice_quant8_8 +GeneratedTests.strided_slice_quant8_9 +GeneratedTests.sub_v1_2_zero_sized +GeneratedTests.sub_v1_2_zero_sized_quant8 +GeneratedTests.svdf +GeneratedTests.svdf2 +GeneratedTests.svdf_bias_present +GeneratedTests.svdf_state +GeneratedTests.tanh_v1_2 +GeneratedTests.tanh_v1_2_zero_sized +GeneratedTests.tanh_v1_2_zero_sized_quant8 +GeneratedTests.tile_1_float16 +GeneratedTests.tile_1_quant8 +GeneratedTests.tile_2_float16 +GeneratedTests.tile_2_int32 +GeneratedTests.tile_2_quant8 +GeneratedTests.tile_3_float16 +GeneratedTests.tile_3_int32 +GeneratedTests.tile_3_quant8 +GeneratedTests.topk_v2 +GeneratedTests.topk_v2_1D_float_nnfw +GeneratedTests.topk_v2_1D_int32_nnfw +GeneratedTests.topk_v2_1D_quant8_nnfw +GeneratedTests.topk_v2_2 +GeneratedTests.topk_v2_2D_float_nnfw +GeneratedTests.topk_v2_2D_int32_nnfw +GeneratedTests.topk_v2_2D_quant8_nnfw +GeneratedTests.topk_v2_3 +GeneratedTests.topk_v2_4 +GeneratedTests.topk_v2_5 +GeneratedTests.topk_v2_6 +GeneratedTests.transpose_conv_ex_float_1 +GeneratedTests.transpose_conv_ex_float_2 +GeneratedTests.transpose_conv_ex_float_3 +GeneratedTests.transpose_conv_ex_float_4 +GeneratedTests.transpose_v1_2_zero_sized +GeneratedTests.transpose_v1_2_zero_sized_quant8 diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu index 8d5428a..a64ffca 100644 --- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu +++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu @@ -12,11 +12,6 @@ GeneratedTests.cast_float32_to_quant8_overflow_relaxed GeneratedTests.cast_int32_to_float16 GeneratedTests.cast_int32_to_quant8_overflow GeneratedTests.cast_quant8_to_float16 -GeneratedTests.depth_to_space_float_1 -GeneratedTests.depth_to_space_float_2 -GeneratedTests.depth_to_space_float_3 -GeneratedTests.depth_to_space_quant8_1 -GeneratedTests.depth_to_space_quant8_2 GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim GeneratedTests.dequantize_v1_2 @@ -46,14 +41,6 @@ GeneratedTests.local_response_norm_float_1 GeneratedTests.local_response_norm_float_2 GeneratedTests.local_response_norm_float_3 GeneratedTests.local_response_norm_float_4 -GeneratedTests.logical_and_1D_nnfw -GeneratedTests.logical_and_2D_nnfw -GeneratedTests.logical_and_3D_nnfw -GeneratedTests.logical_and_4D_nnfw -GeneratedTests.logical_and_broadcast -GeneratedTests.logical_and_broadcast_4D_2D_nnfw -GeneratedTests.logical_and_broadcast_nnfw -GeneratedTests.logical_and_simple GeneratedTests.logical_not GeneratedTests.lsh_projection GeneratedTests.lsh_projection_2 diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu index 8d5428a..a64ffca 100644 --- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu +++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu @@ -12,11 +12,6 @@ GeneratedTests.cast_float32_to_quant8_overflow_relaxed GeneratedTests.cast_int32_to_float16 GeneratedTests.cast_int32_to_quant8_overflow GeneratedTests.cast_quant8_to_float16 -GeneratedTests.depth_to_space_float_1 -GeneratedTests.depth_to_space_float_2 -GeneratedTests.depth_to_space_float_3 -GeneratedTests.depth_to_space_quant8_1 -GeneratedTests.depth_to_space_quant8_2 GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim GeneratedTests.dequantize_v1_2 @@ -46,14 +41,6 @@ GeneratedTests.local_response_norm_float_1 GeneratedTests.local_response_norm_float_2 GeneratedTests.local_response_norm_float_3 GeneratedTests.local_response_norm_float_4 -GeneratedTests.logical_and_1D_nnfw -GeneratedTests.logical_and_2D_nnfw -GeneratedTests.logical_and_3D_nnfw -GeneratedTests.logical_and_4D_nnfw -GeneratedTests.logical_and_broadcast -GeneratedTests.logical_and_broadcast_4D_2D_nnfw -GeneratedTests.logical_and_broadcast_nnfw -GeneratedTests.logical_and_simple GeneratedTests.logical_not GeneratedTests.lsh_projection GeneratedTests.lsh_projection_2 diff --git a/tests/nnapi/nnapi_gtest.skip.noarch.interp b/tests/nnapi/nnapi_gtest.skip.noarch.interp index ba14120..e0ed8d7 100644 --- a/tests/nnapi/nnapi_gtest.skip.noarch.interp +++ b/tests/nnapi/nnapi_gtest.skip.noarch.interp @@ -23,6 +23,12 @@ GeneratedTests.argmax_neg_axis_float_nnfw GeneratedTests.argmax_neg_axis_int32_nnfw GeneratedTests.argmax_quant8_neg_axis_nnfw GeneratedTests.argmax_quant8_nnfw +GeneratedTests.argmin_1 +GeneratedTests.argmin_1_quant8 +GeneratedTests.argmin_2 +GeneratedTests.argmin_2_quant8 +GeneratedTests.argmin_3 +GeneratedTests.argmin_3_quant8 GeneratedTests.avg_pool_quant8_1 GeneratedTests.avg_pool_quant8_2 GeneratedTests.avg_pool_quant8_3 diff --git a/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu b/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu index cb0d07c..cad0729 100644 --- a/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu +++ b/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu @@ -12,11 +12,6 @@ GeneratedTests.cast_float32_to_quant8_overflow_relaxed GeneratedTests.cast_int32_to_float16 GeneratedTests.cast_int32_to_quant8_overflow GeneratedTests.cast_quant8_to_float16 -GeneratedTests.depth_to_space_float_1 -GeneratedTests.depth_to_space_float_2 -GeneratedTests.depth_to_space_float_3 -GeneratedTests.depth_to_space_quant8_1 -GeneratedTests.depth_to_space_quant8_2 GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim GeneratedTests.dequantize_v1_2 @@ -46,14 +41,6 @@ GeneratedTests.local_response_norm_float_1 GeneratedTests.local_response_norm_float_2 GeneratedTests.local_response_norm_float_3 GeneratedTests.local_response_norm_float_4 -GeneratedTests.logical_and_1D_nnfw -GeneratedTests.logical_and_2D_nnfw -GeneratedTests.logical_and_3D_nnfw -GeneratedTests.logical_and_4D_nnfw -GeneratedTests.logical_and_broadcast -GeneratedTests.logical_and_broadcast_4D_2D_nnfw -GeneratedTests.logical_and_broadcast_nnfw -GeneratedTests.logical_and_simple GeneratedTests.logical_not GeneratedTests.lsh_projection GeneratedTests.lsh_projection_2 diff --git a/tests/nnapi/specs/skip/V1_2/argmin_1.mod.py b/tests/nnapi/specs/V1_2/argmin_1.mod.py similarity index 100% rename from tests/nnapi/specs/skip/V1_2/argmin_1.mod.py rename to tests/nnapi/specs/V1_2/argmin_1.mod.py diff --git a/tests/nnapi/specs/skip/V1_2/argmin_2.mod.py b/tests/nnapi/specs/V1_2/argmin_2.mod.py similarity index 100% rename from tests/nnapi/specs/skip/V1_2/argmin_2.mod.py rename to tests/nnapi/specs/V1_2/argmin_2.mod.py diff --git a/tests/nnapi/specs/skip/V1_2/argmin_3.mod.py b/tests/nnapi/specs/V1_2/argmin_3.mod.py similarity index 100% rename from tests/nnapi/specs/skip/V1_2/argmin_3.mod.py rename to tests/nnapi/specs/V1_2/argmin_3.mod.py diff --git a/tests/nnapi/src/TestGenerated.cpp b/tests/nnapi/src/TestGenerated.cpp index 2347353..093e5a9 100644 --- a/tests/nnapi/src/TestGenerated.cpp +++ b/tests/nnapi/src/TestGenerated.cpp @@ -256,8 +256,11 @@ void GeneratedTests::SetUp() { mOldComputeMode = Execution::setComputeMode(GetParam()); #endif // Fix for onert: Fix file path for linux +#ifndef __ANDROID__ char cacheDirTemp[] = "/tmp/TestCompilationCachingXXXXXX"; - //char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX"; +#else + char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX"; +#endif char* cacheDir = mkdtemp(cacheDirTemp); ASSERT_NE(cacheDir, nullptr); mCacheDir = cacheDir; diff --git a/tests/nnapi/src/TestValidation.cpp b/tests/nnapi/src/TestValidation.cpp index 45432c0..3e749b8 100644 --- a/tests/nnapi/src/TestValidation.cpp +++ b/tests/nnapi/src/TestValidation.cpp @@ -29,13 +29,19 @@ // This file tests all the validations done by the Neural Networks API. namespace { +#ifndef PATH_MAX #define PATH_MAX 256 +#endif static int shmem_num = 0; static int shmem_create_region(size_t size) { char temp[PATH_MAX]; +#ifndef __ANDROID__ snprintf(temp, sizeof(temp), "/tmp/nn-shmem-%d-%d-XXXXXXXXX", getpid(), shmem_num++); +#else + snprintf(temp, sizeof(temp), "/data/local/tmp/nn-shmem-%d-%d-XXXXXXXXX", getpid(), shmem_num++); +#endif // Set umask and recover after generate temporary file to avoid security issue mode_t umaskPrev = umask(S_IRUSR|S_IWUSR); diff --git a/tests/nnfw_api/CMakeLists.txt b/tests/nnfw_api/CMakeLists.txt index aa3a942..40142dd 100644 --- a/tests/nnfw_api/CMakeLists.txt +++ b/tests/nnfw_api/CMakeLists.txt @@ -19,6 +19,11 @@ if(ARMCompute_FOUND) target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_ACL_BACKEND) endif(ARMCompute_FOUND) +nnfw_find_package(Xnnpack QUIET) +if(Xnnpack_FOUND) + target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_XNNPACK_BACKEND) +endif(Xnnpack_FOUND) + set(RUNTIME_NNFW_API_TEST_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/src) target_include_directories(${RUNTIME_NNFW_API_TEST} PRIVATE ${RUNTIME_NNFW_API_TEST_INCLUDE}) diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc index 1dd3f9b..87b38f2 100644 --- a/tests/nnfw_api/src/CircleGen.cc +++ b/tests/nnfw_api/src/CircleGen.cc @@ -78,7 +78,7 @@ CircleBuffer CircleGen::finish() for (auto &ctx : _subgraph_contexts) subgraphs.push_back(buildSubGraph(ctx)); auto model = - circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers); + circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers); _fbb.Finish(model); return CircleBuffer{std::move(_fbb)}; } @@ -107,13 +107,20 @@ uint32_t CircleGen::addOperatorArgMax(const OperatorParams ¶ms, circle::Tens circle::BuiltinOptions_ArgMaxOptions, options); } +uint32_t CircleGen::addOperatorArgMin(const OperatorParams ¶ms, circle::TensorType output_type) +{ + auto options = circle::CreateArgMaxOptions(_fbb, output_type).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_ARG_MIN, + circle::BuiltinOptions_ArgMinOptions, options); +} + uint32_t CircleGen::addOperatorAveragePool2D(const OperatorParams ¶ms, circle::Padding padding, int stride_w, int stride_h, int filter_w, int filter_h, circle::ActivationFunctionType actfn) { auto options = - circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn) - .Union(); + circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn) + .Union(); return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D, circle::BuiltinOptions_Pool2DOptions, options); } @@ -134,6 +141,18 @@ uint32_t CircleGen::addOperatorConcatenation(const OperatorParams ¶ms, int a circle::BuiltinOptions_ConcatenationOptions, options); } +uint32_t CircleGen::addOperatorConv2D(const OperatorParams ¶ms, circle::Padding padding, + int stride_w, int stride_h, + circle::ActivationFunctionType actfn, int dilation_w, + int dilation_h) +{ + auto options = + circle::CreateConv2DOptions(_fbb, padding, stride_w, stride_h, actfn, dilation_w, dilation_h) + .Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_CONV_2D, + circle::BuiltinOptions_Conv2DOptions, options); +} + uint32_t CircleGen::addOperatorCos(const OperatorParams ¶ms) { auto options = circle::CreateCosOptions(_fbb).Union(); @@ -141,6 +160,13 @@ uint32_t CircleGen::addOperatorCos(const OperatorParams ¶ms) circle::BuiltinOptions_CosOptions, options); } +uint32_t CircleGen::addOperatorDepthToSpace(const OperatorParams ¶ms, int32_t block_size) +{ + auto options = circle::CreateDepthToSpaceOptions(_fbb, block_size).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_DEPTH_TO_SPACE, + circle::BuiltinOptions_DepthToSpaceOptions, options); +} + uint32_t CircleGen::addOperatorDepthwiseConv2D(const OperatorParams ¶ms, circle::Padding padding, int stride_w, int stride_h, int depth_multiplier, @@ -148,13 +174,19 @@ uint32_t CircleGen::addOperatorDepthwiseConv2D(const OperatorParams ¶ms, int dilation_h) { auto options = - circle::CreateDepthwiseConv2DOptions(_fbb, padding, stride_w, stride_h, depth_multiplier, - actfn, dilation_w, dilation_h) - .Union(); + circle::CreateDepthwiseConv2DOptions(_fbb, padding, stride_w, stride_h, depth_multiplier, actfn, + dilation_w, dilation_h) + .Union(); return addOperatorWithOptions(params, circle::BuiltinOperator_DEPTHWISE_CONV_2D, circle::BuiltinOptions_DepthwiseConv2DOptions, options); } +uint32_t CircleGen::addOperatorElu(const OperatorParams ¶ms) +{ + return addOperatorWithOptions(params, circle::BuiltinOperator_ELU, circle::BuiltinOptions_NONE, + 0); +} + uint32_t CircleGen::addOperatorEqual(const OperatorParams ¶ms) { auto options = circle::CreateEqualOptions(_fbb).Union(); @@ -162,13 +194,20 @@ uint32_t CircleGen::addOperatorEqual(const OperatorParams ¶ms) circle::BuiltinOptions_EqualOptions, options); } +uint32_t CircleGen::addOperatorExpandDims(const OperatorParams ¶ms) +{ + auto options = circle::CreateEqualOptions(_fbb).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_EXPAND_DIMS, + circle::BuiltinOptions_ExpandDimsOptions, options); +} + uint32_t CircleGen::addOperatorFullyConnected(const OperatorParams ¶ms, circle::FullyConnectedOptionsWeightsFormat weights_format) { auto options = - circle::CreateFullyConnectedOptions(_fbb, circle::ActivationFunctionType_NONE, weights_format) - .Union(); + circle::CreateFullyConnectedOptions(_fbb, circle::ActivationFunctionType_NONE, weights_format) + .Union(); return addOperatorWithOptions(params, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions, options); } @@ -214,6 +253,13 @@ uint32_t CircleGen::addOperatorLogSoftmax(const OperatorParams ¶ms) circle::BuiltinOptions_LogSoftmaxOptions, options); } +uint32_t CircleGen::addOperatorMean(const OperatorParams ¶ms, bool keep_dims) +{ + auto options = circle::CreateReducerOptions(_fbb, keep_dims).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_MEAN, + circle::BuiltinOptions_ReducerOptions, options); +} + uint32_t CircleGen::addOperatorNeg(const OperatorParams ¶ms) { auto options = circle::CreatePadOptions(_fbb).Union(); @@ -277,7 +323,7 @@ uint32_t CircleGen::addOperatorResizeBilinear(const OperatorParams ¶ms, bool bool half_pixel_centers) { auto options = - circle::CreateResizeBilinearOptions(_fbb, align_corners, half_pixel_centers).Union(); + circle::CreateResizeBilinearOptions(_fbb, align_corners, half_pixel_centers).Union(); return addOperatorWithOptions(params, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions, options); } @@ -329,7 +375,7 @@ uint32_t CircleGen::addOperatorStridedSlice(const OperatorParams ¶ms, int32_ { auto options = circle::CreateStridedSliceOptions(_fbb, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask) - .Union(); + .Union(); return addOperatorWithOptions(params, circle::BuiltinOperator_STRIDED_SLICE, circle::BuiltinOptions_StridedSliceOptions, options); } @@ -371,6 +417,19 @@ uint32_t CircleGen::addOperatorTranspose(const OperatorParams ¶ms) circle::BuiltinOptions_TransposeOptions, options); } +uint32_t CircleGen::addOperatorSqrt(const OperatorParams ¶ms) +{ + return addOperatorWithOptions(params, circle::BuiltinOperator_SQRT, circle::BuiltinOptions_NONE, + 0); +} + +uint32_t CircleGen::addOperatorSquare(const OperatorParams ¶ms) +{ + auto options = circle::CreateSquareOptions(_fbb).Union(); + return addOperatorWithOptions(params, circle::BuiltinOperator_SQUARE, + circle::BuiltinOptions_SquareOptions, options); +} + // NOTE Please add addOperator functions ABOVE this lie // // % How to add a new addOperatorXXX fuction @@ -379,6 +438,9 @@ uint32_t CircleGen::addOperatorTranspose(const OperatorParams ¶ms) // 2. Change enum BuiltinOperator // 3. Change enum BuiltinOptions // 4. Change CreateXXXOptions accordingly +// +// If operator don't have option table, remove CreateXXXOptions call, +// call addOperatorWithOptions with options_type = circle::BuiltinOptions_NONE and options = 0 // ===== Add Operator methods end ===== @@ -440,7 +502,7 @@ CircleGen::buildSparsityParameters(const SparsityParams &sp) flatbuffers::Offset> traversal_order; flatbuffers::Offset> block_map; flatbuffers::Offset>> - dim_metadata; + dim_metadata; traversal_order = _fbb.CreateVector(sp.traversal_order); block_map = _fbb.CreateVector(sp.block_map); @@ -451,8 +513,8 @@ CircleGen::buildSparsityParameters(const SparsityParams &sp) auto fb_array_segments = circle::CreateUint16VectorDirect(_fbb, &it._array_segments.u16); auto fb_array_indices = circle::CreateUint16VectorDirect(_fbb, &it._array_indices.u16); auto dim_metadata = circle::CreateDimensionMetadata( - _fbb, it._format, it._dense_size, it._array_segments_type, fb_array_segments.Union(), - it._array_indices_type, fb_array_indices.Union()); + _fbb, it._format, it._dense_size, it._array_segments_type, fb_array_segments.Union(), + it._array_indices_type, fb_array_indices.Union()); dim_metadata_vec.emplace_back(dim_metadata); } dim_metadata = _fbb.CreateVector(dim_metadata_vec); diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h index 7da2459..6662183 100644 --- a/tests/nnfw_api/src/CircleGen.h +++ b/tests/nnfw_api/src/CircleGen.h @@ -67,15 +67,15 @@ public: DimMetaData() = delete; DimMetaData(SparseDimensionType format, std::vector array_segments, std::vector array_indices) - : _format{format}, - _array_segments_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector), - _array_indices_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector) + : _format{format}, + _array_segments_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector), + _array_indices_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector) { _array_segments.u16 = array_segments; _array_indices.u16 = array_indices; } DimMetaData(SparseDimensionType format, int32_t dense_size) - : _format{format}, _dense_size{dense_size} + : _format{format}, _dense_size{dense_size} { } SparseDimensionType _format{circle::DimensionType_DENSE}; @@ -139,6 +139,8 @@ public: uint32_t addOperatorAddN(const OperatorParams ¶ms); uint32_t addOperatorArgMax(const OperatorParams ¶ms, circle::TensorType output_type = circle::TensorType::TensorType_INT32); + uint32_t addOperatorArgMin(const OperatorParams ¶ms, + circle::TensorType output_type = circle::TensorType::TensorType_INT32); uint32_t addOperatorAveragePool2D(const OperatorParams ¶ms, circle::Padding padding, int stride_w, int stride_h, int filter_w, int filter_h, circle::ActivationFunctionType actfn); @@ -146,17 +148,23 @@ public: circle::TensorType output_type); uint32_t addOperatorConcatenation(const OperatorParams ¶ms, int axis, circle::ActivationFunctionType actfn); + uint32_t addOperatorConv2D(const OperatorParams ¶ms, circle::Padding padding, int stride_w, + int stride_h, circle::ActivationFunctionType actfn, int dilation_w = 1, + int dilation_h = 1); uint32_t addOperatorCos(const OperatorParams ¶ms); + uint32_t addOperatorDepthToSpace(const OperatorParams ¶ms, int32_t block_size); uint32_t addOperatorDepthwiseConv2D(const OperatorParams ¶ms, circle::Padding padding, int stride_w, int stride_h, int depth_multiplier, circle::ActivationFunctionType actfn, int dilation_w = 1, int dilation_h = 1); + uint32_t addOperatorElu(const OperatorParams ¶ms); uint32_t addOperatorEqual(const OperatorParams ¶ms); + uint32_t addOperatorExpandDims(const OperatorParams ¶ms); uint32_t addOperatorFill(const OperatorParams ¶ms); uint32_t addOperatorFloor(const OperatorParams ¶ms); uint32_t addOperatorFullyConnected(const OperatorParams ¶ms, circle::FullyConnectedOptionsWeightsFormat weights_format = - circle::FullyConnectedOptionsWeightsFormat_DEFAULT); + circle::FullyConnectedOptionsWeightsFormat_DEFAULT); uint32_t addOperatorIf(const OperatorParams ¶ms, uint32_t then_subg, uint32_t else_subg); uint32_t addOperatorInstanceNorm(const OperatorParams ¶ms, float epsilon, circle::ActivationFunctionType actfn); @@ -164,6 +172,7 @@ public: uint32_t addOperatorLeakyRelu(const OperatorParams ¶ms, float alpha); uint32_t addOperatorLess(const OperatorParams ¶ms); uint32_t addOperatorLogSoftmax(const OperatorParams ¶ms); + uint32_t addOperatorMean(const OperatorParams ¶ms, bool keep_dims); uint32_t addOperatorNeg(const OperatorParams ¶ms); uint32_t addOperatorOneHot(const OperatorParams ¶ms, int32_t axis); uint32_t addOperatorPad(const OperatorParams ¶ms); @@ -185,6 +194,8 @@ public: uint32_t addOperatorSelect(const OperatorParams ¶ms); uint32_t addOperatorSelectV2(const OperatorParams ¶ms); uint32_t addOperatorSplit(const OperatorParams ¶ms, int32_t num_split); + uint32_t addOperatorSqrt(const OperatorParams ¶ms); + uint32_t addOperatorSquare(const OperatorParams ¶ms); uint32_t addOperatorStridedSlice(const OperatorParams ¶ms, int32_t begin_mask = 0, int32_t end_mask = 0, int32_t ellipsis_mask = 0, int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0); diff --git a/tests/nnfw_api/src/GenModelTest.h b/tests/nnfw_api/src/GenModelTest.h index 144c379..3583ce0 100644 --- a/tests/nnfw_api/src/GenModelTest.h +++ b/tests/nnfw_api/src/GenModelTest.h @@ -14,6 +14,9 @@ * limitations under the License. */ +#ifndef __NNFW_API_TEST_GEN_MODEL_TEST_H__ +#define __NNFW_API_TEST_GEN_MODEL_TEST_H__ + #include #include @@ -224,10 +227,16 @@ public: _backends.push_back(backend); } #endif - if (backend == "cpu") + if (backend == "cpu" || backend == "ruy") { _backends.push_back(backend); } +#ifdef TEST_XNNPACK_BACKEND + if (backend == "xnnpack") + { + _backends.push_back(backend); + } +#endif } } @@ -241,6 +250,11 @@ public: */ void expectFailCompile() { _expected_fail_compile = true; } + /** + * @brief Expect failure while execution + */ + void expectFailExecution() { _expected_fail_execution = true; } + private: CircleBuffer _cbuf; std::vector _test_cases; @@ -248,6 +262,7 @@ private: std::unordered_map _output_sizes; bool _expected_fail_model_load{false}; bool _expected_fail_compile{false}; + bool _expected_fail_execution{false}; }; /** @@ -277,7 +292,7 @@ protected: NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session)); auto &cbuf = _context->cbuf(); auto model_load_result = - nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size()); + nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size()); if (_context->expected_fail_model_load()) { ASSERT_NE(model_load_result, NNFW_STATUS_NO_ERROR); @@ -290,7 +305,7 @@ protected: if (_context->expected_fail_compile()) { - ASSERT_EQ(nnfw_prepare(_so.session), NNFW_STATUS_ERROR); + ASSERT_NE(nnfw_prepare(_so.session), NNFW_STATUS_NO_ERROR); NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session)); continue; @@ -362,7 +377,7 @@ protected: if (test_case.expected_fail_run()) { - ASSERT_EQ(nnfw_run(_so.session), NNFW_STATUS_ERROR); + ASSERT_NE(nnfw_run(_so.session), NNFW_STATUS_NO_ERROR); continue; } @@ -447,3 +462,5 @@ protected: SessionObjectGeneric _so; std::unique_ptr _context; }; + +#endif // __NNFW_API_TEST_GEN_MODEL_TEST_H__ diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.cc index 459c2e8..1ed8f95 100644 --- a/tests/nnfw_api/src/ModelTestDynamicTensor.cc +++ b/tests/nnfw_api/src/ModelTestDynamicTensor.cc @@ -21,6 +21,7 @@ #include "fixtures.h" #include "CircleGen.h" #include "GenModelTest.h" +#include "NNPackages.h" // This macro can be used instead of using NNFW_ENSURE_SUCCESS especially with negative test. // E.g., setInputOutput() is written with this macro and the following check is available to check diff --git a/tests/nnfw_api/src/NNPackages.cc b/tests/nnfw_api/src/NNPackages.cc index d9b2526..11e0c8e 100644 --- a/tests/nnfw_api/src/NNPackages.cc +++ b/tests/nnfw_api/src/NNPackages.cc @@ -25,11 +25,14 @@ // NOTE Must match `enum TestPackages` const char *TEST_PACKAGE_NAMES[] = { - // for validation test - "add", "add_no_manifest", "add_invalid_manifest", + // for validation test + "add", + "add_no_manifest", + "add_invalid_manifest", - // for dynamic tensor test - "while_dynamic", "if_dynamic", + // for dynamic tensor test + "while_dynamic", + "if_dynamic", }; NNPackages &NNPackages::get() @@ -43,11 +46,11 @@ void NNPackages::init(const char *argv0) char raw_dir[1024]; char cwd[1024]; strncpy(raw_dir, argv0, sizeof(raw_dir) - 1); - dirname(raw_dir); - if (raw_dir[0] == '/') + char *dir_path = dirname(raw_dir); + if (dir_path[0] == '/') { // If it is an absolute path, just use it - _base_path = raw_dir; + _base_path = dir_path; } else { @@ -55,7 +58,7 @@ void NNPackages::init(const char *argv0) getcwd(cwd, sizeof(cwd)); _base_path = cwd; _base_path += "/"; - _base_path += raw_dir; + _base_path += dir_path; } } diff --git a/tests/nnfw_api/src/RegressionTests.cc b/tests/nnfw_api/src/RegressionTests.cc index 10d6e5d..de23339 100644 --- a/tests/nnfw_api/src/RegressionTests.cc +++ b/tests/nnfw_api/src/RegressionTests.cc @@ -116,11 +116,11 @@ TEST_F(RegressionTest, github_11748) uint8_t input_buf[new_dim * sizeof(float)]; NNFW_ENSURE_SUCCESS( - nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float))); + nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float))); uint8_t output_buf[new_dim * sizeof(float)]; NNFW_ENSURE_SUCCESS( - nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float))); + nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float))); NNFW_ENSURE_SUCCESS(nnfw_run(session)); @@ -134,9 +134,9 @@ TEST_F(RegressionTest, github_11748) // seems weird calling but anyway nnstreamer people case calls this again. // Anyways, runtime should work NNFW_ENSURE_SUCCESS( - nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float))); + nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float))); NNFW_ENSURE_SUCCESS( - nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float))); + nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float))); NNFW_ENSURE_SUCCESS(nnfw_run(session)); } @@ -166,9 +166,9 @@ TEST_F(RegressionTest, github_4585) std::vector out_buf{-1, -1}; NNFW_ENSURE_SUCCESS( - nnfw_set_input(session, 0, ti_new.dtype, in_buf.data(), in_buf.size() * sizeof(float))); + nnfw_set_input(session, 0, ti_new.dtype, in_buf.data(), in_buf.size() * sizeof(float))); NNFW_ENSURE_SUCCESS( - nnfw_set_output(session, 0, ti_new.dtype, out_buf.data(), out_buf.size() * sizeof(float))); + nnfw_set_output(session, 0, ti_new.dtype, out_buf.data(), out_buf.size() * sizeof(float))); NNFW_ENSURE_SUCCESS(nnfw_run(session)); diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc index fbe7214..5fbb844 100644 --- a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc +++ b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc @@ -102,7 +102,7 @@ TEST_F(ValidationTestAddModelLoaded, neg_load_model) { // load model twice ASSERT_EQ(nnfw_load_model_from_file( - _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()), + _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()), NNFW_STATUS_INVALID_STATE); } diff --git a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc index 02bbd0e..d668a1c 100644 --- a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc +++ b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc @@ -173,7 +173,7 @@ TEST_F(ValidationTestAddSessionPrepared, neg_load_model) { // Load model twice ASSERT_EQ(nnfw_load_model_from_file( - _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()), + _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()), NNFW_STATUS_INVALID_STATE); } diff --git a/tests/nnfw_api/src/ValidationTestMultipleSessions.cc b/tests/nnfw_api/src/ValidationTestMultipleSessions.cc new file mode 100644 index 0000000..758e1db --- /dev/null +++ b/tests/nnfw_api/src/ValidationTestMultipleSessions.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fixtures.h" + +TEST_F(ValidationTestTwoSessions, neg_two_sessions_create) +{ + ASSERT_EQ(nnfw_create_session(&_session1), NNFW_STATUS_NO_ERROR); + ASSERT_EQ(nnfw_create_session(nullptr), NNFW_STATUS_UNEXPECTED_NULL); + + ASSERT_EQ(nnfw_close_session(_session1), NNFW_STATUS_NO_ERROR); +} + +class AveragePoolModel +{ +public: + AveragePoolModel(int N, int H, int W, int C) + { + CircleGen cgen; + int in = cgen.addTensor({{N, H, W, C}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{N, H / 2, W / 2, C}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + cbuf = cgen.finish(); + }; + + CircleBuffer cbuf; +}; + +TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_model) +{ + constexpr int N = 64, H = 64, W = 64, C = 3; + AveragePoolModel model(N, H, W, C); + + NNFW_ENSURE_SUCCESS( + nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size())); + NNFW_ENSURE_SUCCESS( + nnfw_load_circle_from_buffer(_session2, model.cbuf.buffer(), model.cbuf.size())); + + NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session1, "cpu")); + NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session2, "cpu")); + + NNFW_ENSURE_SUCCESS(nnfw_prepare(_session1)); + NNFW_ENSURE_SUCCESS(nnfw_prepare(_session2)); + + constexpr int input_count = N * H * W * C; + constexpr int output_count = N * H / 2 * W / 2 * C; + + std::vector in_buf1(input_count); // any value + std::vector out_buf1(output_count); + + NNFW_ENSURE_SUCCESS(nnfw_set_input(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf1.data(), + in_buf1.size() * sizeof(float))); + NNFW_ENSURE_SUCCESS(nnfw_set_output(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf1.data(), + out_buf1.size() * sizeof(float))); + + std::vector in_buf2(input_count); // any value + std::vector out_buf2(output_count); + + NNFW_ENSURE_SUCCESS(nnfw_set_input(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf2.data(), + in_buf2.size() * sizeof(float))); + NNFW_ENSURE_SUCCESS(nnfw_set_output(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf2.data(), + out_buf2.size() * sizeof(float))); + + NNFW_ENSURE_SUCCESS(nnfw_run_async(_session1)); + NNFW_ENSURE_SUCCESS(nnfw_run_async(_session2)); + + NNFW_ENSURE_SUCCESS(nnfw_await(_session1)); + NNFW_ENSURE_SUCCESS(nnfw_await(_session2)); + + SUCCEED(); +} + +// TODO Write two-session-test with large models run by threads diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.cc index 40d3f93..cb07919 100644 --- a/tests/nnfw_api/src/ValidationTestSessionCreated.cc +++ b/tests/nnfw_api/src/ValidationTestSessionCreated.cc @@ -21,7 +21,7 @@ TEST_F(ValidationTestSessionCreated, load_session_001) { // Existing model must ASSERT_EQ(nnfw_load_model_from_file( - _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()), + _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()), NNFW_STATUS_NO_ERROR); } @@ -36,7 +36,7 @@ TEST_F(ValidationTestSessionCreated, close_and_create_again) TEST_F(ValidationTestSessionCreated, neg_load_session_1) { ASSERT_EQ(nnfw_load_model_from_file( - _session, NNPackages::get().getModelAbsolutePath("nonexisting_directory").c_str()), + _session, NNPackages::get().getModelAbsolutePath("nonexisting_directory").c_str()), NNFW_STATUS_ERROR); } @@ -50,25 +50,25 @@ TEST_F(ValidationTestSessionCreated, neg_load_session_3) // Too long path const std::string long_path(1024, 'x'); ASSERT_EQ(nnfw_load_model_from_file( - _session, NNPackages::get().getModelAbsolutePath(long_path.c_str()).c_str()), + _session, NNPackages::get().getModelAbsolutePath(long_path.c_str()).c_str()), NNFW_STATUS_ERROR); } TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_1) { ASSERT_EQ( - nnfw_load_model_from_file( - _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_NO_MANIFEST).c_str()), - NNFW_STATUS_ERROR); + nnfw_load_model_from_file( + _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_NO_MANIFEST).c_str()), + NNFW_STATUS_ERROR); ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE); } TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_2) { - ASSERT_EQ(nnfw_load_model_from_file( - _session, - NNPackages::get().getModelAbsolutePath(NNPackages::ADD_INVALID_MANIFEST).c_str()), - NNFW_STATUS_ERROR); + ASSERT_EQ( + nnfw_load_model_from_file( + _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_INVALID_MANIFEST).c_str()), + NNFW_STATUS_ERROR); ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE); } diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.cc b/tests/nnfw_api/src/ValidationTestSingleSession.cc index b134629..852d5cd 100644 --- a/tests/nnfw_api/src/ValidationTestSingleSession.cc +++ b/tests/nnfw_api/src/ValidationTestSingleSession.cc @@ -89,7 +89,7 @@ TEST_F(ValidationTestSingleSession, neg_load_model) { // Invalid state ASSERT_EQ(nnfw_load_model_from_file( - nullptr, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()), + nullptr, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()), NNFW_STATUS_UNEXPECTED_NULL); } diff --git a/tests/nnfw_api/src/fixtures.h b/tests/nnfw_api/src/fixtures.h index 21be22f..15f51eb 100644 --- a/tests/nnfw_api/src/fixtures.h +++ b/tests/nnfw_api/src/fixtures.h @@ -23,6 +23,7 @@ #include #include "NNPackages.h" +#include "CircleGen.h" #define NNFW_ENSURE_SUCCESS(EXPR) ASSERT_EQ((EXPR), NNFW_STATUS_NO_ERROR) @@ -68,6 +69,7 @@ protected: { ValidationTestSingleSession::SetUp(); ASSERT_EQ(nnfw_create_session(&_session), NNFW_STATUS_NO_ERROR); + ASSERT_NE(_session, nullptr); } void TearDown() override @@ -77,16 +79,36 @@ protected: } }; +inline CircleBuffer genAddModel() +{ + CircleGen cgen; + std::vector rhs_data{2}; + uint32_t rhs_buf = cgen.addBuffer(rhs_data); + int lhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, 0, "X_input"}); + int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, rhs_buf, "y_var"}); + int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, 0, "ADD_TOP"}); + cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({lhs}, {out}); + return cgen.finish(); +} + template class ValidationTestModelLoaded : public ValidationTestSessionCreated { protected: void SetUp() override { ValidationTestSessionCreated::SetUp(); - ASSERT_EQ(nnfw_load_model_from_file(_session, - NNPackages::get().getModelAbsolutePath(PackageNo).c_str()), - NNFW_STATUS_NO_ERROR); - ASSERT_NE(_session, nullptr); + if (PackageNo == NNPackages::ADD) + { + auto cbuf = genAddModel(); + NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_session, cbuf.buffer(), cbuf.size())); + } + else + { + // TODO Eventually, downloaded model tests are removed. + NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file( + _session, NNPackages::get().getModelAbsolutePath(PackageNo).c_str())); + } } void TearDown() override { ValidationTestSessionCreated::TearDown(); } @@ -114,8 +136,8 @@ protected: EXPECT_EQ(input_elements, 1); _input.resize(input_elements); ASSERT_EQ( - nnfw_set_input(_session, 0, ti_input.dtype, _input.data(), sizeof(float) * input_elements), - NNFW_STATUS_NO_ERROR); + nnfw_set_input(_session, 0, ti_input.dtype, _input.data(), sizeof(float) * input_elements), + NNFW_STATUS_NO_ERROR); nnfw_tensorinfo ti_output; ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti_output), NNFW_STATUS_NO_ERROR); @@ -133,13 +155,13 @@ protected: uint64_t input_elements = num_elems(ti_input); _input.resize(input_elements); ASSERT_EQ( - nnfw_set_input(_session, 0, ti_input->dtype, _input.data(), sizeof(float) * input_elements), - NNFW_STATUS_NO_ERROR); + nnfw_set_input(_session, 0, ti_input->dtype, _input.data(), sizeof(float) * input_elements), + NNFW_STATUS_NO_ERROR); _output.resize(40000); // Give sufficient size for the output - ASSERT_EQ(nnfw_set_output(_session, 0, ti_input->dtype, _output.data(), - sizeof(float) * _output.size()), - NNFW_STATUS_NO_ERROR); + ASSERT_EQ( + nnfw_set_output(_session, 0, ti_input->dtype, _output.data(), sizeof(float) * _output.size()), + NNFW_STATUS_NO_ERROR); } protected: @@ -156,11 +178,12 @@ protected: { ValidationTest::SetUp(); - auto model_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD); for (auto &obj : _objects) { ASSERT_EQ(nnfw_create_session(&obj.session), NNFW_STATUS_NO_ERROR); - ASSERT_EQ(nnfw_load_model_from_file(obj.session, model_path.c_str()), NNFW_STATUS_NO_ERROR); + + auto cbuf = genAddModel(); + NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(obj.session, cbuf.buffer(), cbuf.size())); ASSERT_EQ(nnfw_prepare(obj.session), NNFW_STATUS_NO_ERROR); uint32_t num_inputs; @@ -206,4 +229,31 @@ protected: std::array _objects; }; +class ValidationTestTwoSessions : public ValidationTest +{ +protected: + nnfw_session *_session1 = nullptr; + nnfw_session *_session2 = nullptr; +}; + +class ValidationTestTwoSessionsCreated : public ValidationTestTwoSessions +{ +protected: + void SetUp() override + { + ValidationTestTwoSessions::SetUp(); + ASSERT_EQ(nnfw_create_session(&_session1), NNFW_STATUS_NO_ERROR); + ASSERT_EQ(nnfw_create_session(&_session2), NNFW_STATUS_NO_ERROR); + ASSERT_NE(_session1, nullptr); + ASSERT_NE(_session2, nullptr); + } + + void TearDown() override + { + ASSERT_EQ(nnfw_close_session(_session1), NNFW_STATUS_NO_ERROR); + ASSERT_EQ(nnfw_close_session(_session2), NNFW_STATUS_NO_ERROR); + ValidationTestTwoSessions::TearDown(); + } +}; + #endif // __NNFW_API_TEST_FIXTURES_H__ diff --git a/tests/nnfw_api/src/main.cc b/tests/nnfw_api/src/main.cc index 741c0fb..ff04eb3 100644 --- a/tests/nnfw_api/src/main.cc +++ b/tests/nnfw_api/src/main.cc @@ -31,8 +31,8 @@ int main(int argc, char **argv) } catch (std::runtime_error &e) { + std::cerr << "[WARNING] Test models are not loaded, so some tests will fail" << std::endl; std::cerr << e.what() << std::endl; - return -1; } return RUN_ALL_TESTS(); diff --git a/tests/nnfw_api/src/one_op_tests/AddN.cc b/tests/nnfw_api/src/one_op_tests/AddN.cc index cdb5295..73fa821 100644 --- a/tests/nnfw_api/src/one_op_tests/AddN.cc +++ b/tests/nnfw_api/src/one_op_tests/AddN.cc @@ -51,7 +51,24 @@ TEST_F(GenModelTest, neg_OneOp_AddN_InvalidType) cgen.setInputsAndOutputs({in1, in2, in3}, {out}); _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_AddN_TypeDiff) +{ + CircleGen cgen; + + int in1 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32}); + int in2 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32}); + int in3 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{8}, circle::TensorType::TensorType_INT32}); + + cgen.addOperatorAddN({{in1, in2, in3}, {out}}); + cgen.setInputsAndOutputs({in1, in2, in3}, {out}); + + _context = std::make_unique(cgen.finish()); _context->expectFailModelLoad(); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/ArgMax.cc b/tests/nnfw_api/src/one_op_tests/ArgMax.cc deleted file mode 100644 index 67b02cd..0000000 --- a/tests/nnfw_api/src/one_op_tests/ArgMax.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "GenModelTest.h" - -#include - -TEST_F(GenModelTest, OneOp_ArgMax_AxisToConst) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector axis_data{1}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT64; - std::vector axis_data{1}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0})); - _context->setBackends({"acl_cl"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in, axis}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->addTestCase(TestCaseData{} - .addInput({1, 4, 2, 3}) - .addInput({-3}) - .addOutput({1, 0})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis0) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector axis_data{4}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} - -TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis1) -{ - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector axis_data{-3}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{2}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - _context->expectFailCompile(); - - SUCCEED(); -} diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc new file mode 100644 index 0000000..3df7e74 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +struct ArgMinMaxVariationParam +{ + TestCaseData tcd; + bool is_argmax = true; + circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; +}; + +class ArgMinMaxVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +// Input shape: {1, 2, 2, 1} +// Reduce axis: 1 +// Output shape: {1, 2, 1} +// Output type: Int32 +TEST_P(ArgMinMaxVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{1}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +// Test with different input type and value +INSTANTIATE_TEST_CASE_P( + GenModelTest, ArgMinMaxVariation, + ::testing::Values( + // ArgMax, float input + ArgMinMaxVariationParam{TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), + true}, + // ArgMax, int32 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, + circle::TensorType::TensorType_INT32}, + // ArgMax, uint8 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, + circle::TensorType::TensorType_UINT8, 1.0, 1}, + // ArgMax, int8 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0}), true, + circle::TensorType::TensorType_INT8, 1.0, 1}, + // ArgMin, float input + ArgMinMaxVariationParam{TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), + false}, + // ArgMin, int32 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, + circle::TensorType::TensorType_INT32}, + // ArgMin, uint8 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, + circle::TensorType::TensorType_UINT8, 1.0, 1}, + // ArgMin, int8 input + ArgMinMaxVariationParam{ + TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({0, 1}), false, + circle::TensorType::TensorType_INT8, 1.0, 1})); + +TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT64; + std::vector axis_data{1}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{}.addInput({1, 4, 2, 3}).addOutput({1, 0})); + _context->setBackends({"acl_cl", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in, axis}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{} + .addInput({1, 4, 2, 3}) + .addInput({-3}) + .addOutput({1, 0})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis0) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis1) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{-3}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{2}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ArgMax_InType) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ArgMax_AxisType) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_FLOAT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_FLOAT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_ArgMax_paramType) +{ + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + cgen.addOperatorArgMax({{in, axis}, {out}}, circle::TensorType::TensorType_INT64); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Cast.cc b/tests/nnfw_api/src/one_op_tests/Cast.cc index 5cbe09d..928df2d 100644 --- a/tests/nnfw_api/src/one_op_tests/Cast.cc +++ b/tests/nnfw_api/src/one_op_tests/Cast.cc @@ -34,7 +34,7 @@ TEST_F(GenModelTest, OneOp_Cast_Int32ToFloat32) _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 2, 3, 4})); + TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 2, 3, 4})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); @@ -46,7 +46,7 @@ TEST_F(GenModelTest, OneOp_Cast_Float32ToInt32) _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 2, 3, 4})); + TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 2, 3, 4})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); @@ -58,7 +58,7 @@ TEST_F(GenModelTest, OneOp_Cast_BoolToFloat32) _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{}.addInput({true, false, true, true}).addOutput({1, 0, 1, 1})); + TestCaseData{}.addInput({true, false, true, true}).addOutput({1, 0, 1, 1})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); @@ -70,8 +70,8 @@ TEST_F(GenModelTest, OneOp_Cast_BoolToUInt8) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({true, false, true, true}) - .addOutput(std::vector{1, 0, 1, 1})); + .addInput({true, false, true, true}) + .addOutput(std::vector{1, 0, 1, 1})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); @@ -83,7 +83,7 @@ TEST_F(GenModelTest, OneOp_Cast_BoolToInt32) _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{}.addInput({true, false, true, true}).addOutput({1, 0, 1, 1})); + TestCaseData{}.addInput({true, false, true, true}).addOutput({1, 0, 1, 1})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/Concat.cc b/tests/nnfw_api/src/one_op_tests/Concat.cc index 0f40338..6e24359 100644 --- a/tests/nnfw_api/src/one_op_tests/Concat.cc +++ b/tests/nnfw_api/src/one_op_tests/Concat.cc @@ -37,33 +37,76 @@ TEST_F(GenModelTest, OneOp_Concat_ShareSubTensor) _context = std::make_unique(cgen.finish()); _context->addTestCase(uniformTCD( - {{1, 3, 2, 4}, {5, 4, 7, 4}}, - {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}})); + {{1, 3, 2, 4}, {5, 4, 7, 4}}, + {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); } -TEST_F(GenModelTest, OneOp_Concat) +struct ConcatVariationParam { - CircleGen cgen; + TestCaseData tcd; + circle::TensorType type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; +}; + +class ConcatVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; - int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - int input2 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - int output = cgen.addTensor({{4, 3}, circle::TensorType::TensorType_FLOAT32}); +// Input shape: {2, 3} / {2, 3} +// Output shape: {4, 3} +TEST_P(ConcatVariation, Test) +{ + auto ¶m = GetParam(); + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0, circle::ActivationFunctionType_NONE); cgen.setInputsAndOutputs({input1, input2}, {output}); _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})); + _context->addTestCase(param.tcd); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); } +INSTANTIATE_TEST_CASE_P( + GenModelTest, ConcatVariation, + ::testing::Values( + // Float + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})}, + // Uint8 + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + circle::TensorType::TensorType_UINT8, 1.0f, -2}, + // Int8 + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + circle::TensorType::TensorType_INT8, 1.0f, -2}, + // Int16 + // TODO Enable when nnfw api support int16 type + // ConcatVariationParam{ + // uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + // {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + // circle::TensorType::TensorType_INT16, 1.0f, 0}, + // Int32 + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + circle::TensorType::TensorType_INT32}, + // Int64 + ConcatVariationParam{uniformTCD({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}}, + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), + circle::TensorType::TensorType_INT64})); + TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D) { CircleGen cgen; @@ -112,26 +155,26 @@ TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D) _context = std::make_unique(cgen.finish()); _context->addTestCase(uniformTCD( - { - // inputs - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1 - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // in2 - }, - { - // outputs - {1, 2, 3, 4, 5}, // s_out1 - {6, 7, 8, 9, 10}, // s_out2 - {11, 12, 13, 14, 15}, // s_out3 - {16, 17, 18, 19, 20}, // s_out4 - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // c_out1 - {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2 - {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3 - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // a_out1 - {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2 - {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3 - {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, - 4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out - })); + { + // inputs + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1 + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // in2 + }, + { + // outputs + {1, 2, 3, 4, 5}, // s_out1 + {6, 7, 8, 9, 10}, // s_out2 + {11, 12, 13, 14, 15}, // s_out3 + {16, 17, 18, 19, 20}, // s_out4 + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // c_out1 + {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2 + {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3 + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // a_out1 + {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2 + {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3 + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, + 4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out + })); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.cc new file mode 100644 index 0000000..3822263 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Conv2D.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +TEST_F(GenModelTest, OneOp_Conv2D) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE, 1, 1); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, + {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Conv2D_Stride) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, + circle::ActivationFunctionType_NONE, 1, 1); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, + {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_Conv2D_Dilation) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE, 2, 2); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(uniformTCD( + {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}}, + {{-52, 7}})); + _context->setBackends({"cpu", "ruy", "xnnpack"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Conv2D_Type) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE, 1, 1); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0, + circle::ActivationFunctionType_NONE, 1, 1); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation) +{ + CircleGen cgen; + std::vector weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0}; + uint32_t weight_buf = cgen.addBuffer(weight_data); + std::vector bias_data{2, 3}; + uint32_t bias_buf = cgen.addBuffer(bias_data); + int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32}); + int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf}); + int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf}); + int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, + circle::ActivationFunctionType_NONE, 0, 0); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc new file mode 100644 index 0000000..9f56340 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +struct DepthToSpaceVariationParam +{ + TestCaseData tcd; + circle::TensorType type = circle::TensorType::TensorType_FLOAT32; + float scale = 0.0f; + int64_t zero_point = 0; +}; + +class DepthToSpaceVariation : public GenModelTest, + public ::testing::WithParamInterface +{ +}; + +INSTANTIATE_TEST_CASE_P( + GenModelTest, DepthToSpaceVariation, + ::testing::Values( + // Float + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})}, + // Int32 + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), + circle::TensorType::TensorType_INT32}, + // Int64 + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), + circle::TensorType::TensorType_INT64}, + // Uint8 + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), + circle::TensorType::TensorType_UINT8, 1.0f, -2}, + // Int8 + DepthToSpaceVariationParam{ + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), + circle::TensorType::TensorType_INT8, 1.0f, -2})); + +// Input shape: {1, 1, 2, 4} +// Block size: 2 +// Output shape: {1, 2, 4, 1} +TEST_P(DepthToSpaceVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point); + cgen.addOperatorDepthToSpace({{in}, {out}}, 2); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_DepthToSpace_Blocksize) +{ + CircleGen cgen; + circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; + int in = cgen.addTensor({{1, 1, 2, 4}, data_type}); + int out = cgen.addTensor({{1, 2, 4, 1}, data_type}); + cgen.addOperatorDepthToSpace({{in}, {out}}, -2); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc index 56ae296..87c67f1 100644 --- a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc +++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc @@ -34,7 +34,7 @@ TEST_F(GenModelTest, OneOp_DepthwiseConv2D) _context = std::make_unique(cgen.finish()); _context->addTestCase(uniformTCD({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}}, {{71, -34, 99, -20, 91, -26, 127, -4}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); SUCCEED(); } @@ -56,11 +56,11 @@ TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation) _context = std::make_unique(cgen.finish()); _context->addTestCase(uniformTCD({{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, - 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }}, {{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); SUCCEED(); } @@ -84,7 +84,7 @@ TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation_N_Stride) _context->addTestCase(uniformTCD({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, {{4, 0, 3, 0, 0, 0, 2, 0, 1}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); SUCCEED(); } @@ -182,11 +182,11 @@ CircleBuffer genSimpleDepthwiseConv2DQuantizedModel(int stride, int input_depth, CircleGen cgen; uint32_t ker_buf = cgen.addBuffer(std::vector{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, - 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, - 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, + 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, + 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); uint32_t bias_buf = cgen.addBuffer(std::vector(output_depth, 0)); int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0); int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0); @@ -214,14 +214,13 @@ class DepthwiseConv2DVariation : public GenModelTest, TEST_P(DepthwiseConv2DVariation, Test) { // Same input is used for all tests but output differs - static const std::vector input64{0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, - 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, - 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; + static const std::vector input64{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, + 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; auto ¶m = GetParam(); _context = std::make_unique(genSimpleDepthwiseConv2DQuantizedModel( - param.stride, param.input_depth, param.depth_multiplier)); + param.stride, param.input_depth, param.depth_multiplier)); std::vector ref_input(input64.begin(), input64.begin() + param.input_depth * 4); _context->addTestCase(uniformTCD({ref_input}, {param.ref_output})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); @@ -232,45 +231,45 @@ TEST_P(DepthwiseConv2DVariation, Test) // Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU // kernels. INSTANTIATE_TEST_CASE_P( - GenModelTest, DepthwiseConv2DVariation, - ::testing::Values( - // Stride == 1 - DepthwiseConv2DVariationParam{1, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DVariationParam{1, 4, 2, std::vector{0, 0, 2, 3, 0, 2, 6, 9}}, - DepthwiseConv2DVariationParam{ - 1, 2, 8, std::vector{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}}, - DepthwiseConv2DVariationParam{1, 2, 2, std::vector{0, 1, 4, 6}}, - DepthwiseConv2DVariationParam{1, 2, 1, std::vector{2, 5}}, - DepthwiseConv2DVariationParam{1, 1, 2, std::vector{2, 4}}, - DepthwiseConv2DVariationParam{1, 1, 4, std::vector{0, 2, 3, 5}}, - DepthwiseConv2DVariationParam{1, 4, 1, std::vector{0, 1, 4, 9}}, - DepthwiseConv2DVariationParam{ - 1, 4, 4, std::vector{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}}, - DepthwiseConv2DVariationParam{1, 12, 1, - std::vector{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}}, - // Stride == 2 - DepthwiseConv2DVariationParam{2, 4, 1, std::vector{0, 1, 4, 9}}, - DepthwiseConv2DVariationParam{2, 2, 1, std::vector{2, 5}}, - DepthwiseConv2DVariationParam{2, 1, 8, std::vector{0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DVariationParam{ - 2, 1, 32, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, - 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DVariationParam{2, 1, 20, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, - 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DVariationParam{ - 2, 1, 16, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, - DepthwiseConv2DVariationParam{2, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DVariationParam{ - 2, 8, 2, std::vector{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}}, - DepthwiseConv2DVariationParam{ - 2, 16, 1, std::vector{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}})); + GenModelTest, DepthwiseConv2DVariation, + ::testing::Values( + // Stride == 1 + DepthwiseConv2DVariationParam{1, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DVariationParam{1, 4, 2, std::vector{0, 0, 2, 3, 0, 2, 6, 9}}, + DepthwiseConv2DVariationParam{ + 1, 2, 8, std::vector{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}}, + DepthwiseConv2DVariationParam{1, 2, 2, std::vector{0, 1, 4, 6}}, + DepthwiseConv2DVariationParam{1, 2, 1, std::vector{2, 5}}, + DepthwiseConv2DVariationParam{1, 1, 2, std::vector{2, 4}}, + DepthwiseConv2DVariationParam{1, 1, 4, std::vector{0, 2, 3, 5}}, + DepthwiseConv2DVariationParam{1, 4, 1, std::vector{0, 1, 4, 9}}, + DepthwiseConv2DVariationParam{ + 1, 4, 4, std::vector{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}}, + DepthwiseConv2DVariationParam{1, 12, 1, + std::vector{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}}, + // Stride == 2 + DepthwiseConv2DVariationParam{2, 4, 1, std::vector{0, 1, 4, 9}}, + DepthwiseConv2DVariationParam{2, 2, 1, std::vector{2, 5}}, + DepthwiseConv2DVariationParam{2, 1, 8, std::vector{0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DVariationParam{2, 1, 32, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, + 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, + 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DVariationParam{ + 2, 1, 20, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DVariationParam{ + 2, 1, 16, std::vector{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}}, + DepthwiseConv2DVariationParam{2, 8, 1, std::vector{0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DVariationParam{ + 2, 8, 2, std::vector{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}}, + DepthwiseConv2DVariationParam{ + 2, 16, 1, std::vector{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}})); TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType) { _context = std::make_unique(genNegTestDepthwiseConv2DModel( - static_cast(99), 1, 1, 1, circle::ActivationFunctionType_NONE)); + static_cast(99), 1, 1, 1, circle::ActivationFunctionType_NONE)); _context->expectFailModelLoad(); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"}); SUCCEED(); } diff --git a/tests/nnfw_api/src/one_op_tests/Elu.cc b/tests/nnfw_api/src/one_op_tests/Elu.cc new file mode 100644 index 0000000..a037070 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Elu.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +TEST_F(GenModelTest, OneOp_Elu) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorElu({{in}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + uniformTCD({{0, -6, 2, -4, 3, -2, 10, -0.1}}, + {{0.0, -0.997521, 2.0, -0.981684, 3.0, -0.864665, 10.0, -0.0951626}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Elu_Type) +{ + CircleGen cgen; + int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_UINT8}, 1.0f, 0); + int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorElu({{in}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Equal.cc b/tests/nnfw_api/src/one_op_tests/Equal.cc index 26e52fd..9f79575 100644 --- a/tests/nnfw_api/src/one_op_tests/Equal.cc +++ b/tests/nnfw_api/src/one_op_tests/Equal.cc @@ -27,9 +27,9 @@ TEST_F(GenModelTest, OneOp_Equal) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({0.1, 0.3, 0.5, 0.7}) - .addInput({0.1, 0.2, 0.3, 0.4}) - .addOutput({true, false, false, false})); + .addInput({0.1, 0.3, 0.5, 0.7}) + .addInput({0.1, 0.2, 0.3, 0.4}) + .addOutput({true, false, false, false})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/ExpandDims.cc b/tests/nnfw_api/src/one_op_tests/ExpandDims.cc new file mode 100644 index 0000000..280cf73 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/ExpandDims.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +TEST_F(GenModelTest, OneOp_ExpandDims) +{ + CircleGen cgen; + + std::vector axis_data{1}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorExpandDims({{in, axis}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({0.1, 0.3, 0.5, 0.7}).addOutput({0.1, 0.3, 0.5, 0.7})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_ExpandDims_Int64AxisNeg) +{ + CircleGen cgen; + + std::vector axis_data{-1}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT64, axis_buf}); + int out = cgen.addTensor({{1, 4, 1, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorExpandDims({{in, axis}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({0.1, 0.3, 0.5, 0.7}).addOutput({0.1, 0.3, 0.5, 0.7})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_neg_ExpandDims_Axis) +{ + CircleGen cgen; + + std::vector axis_data{4}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorExpandDims({{in, axis}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->setBackends({"cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} + +TEST_F(GenModelTest, OneOp_neg_ExpandDims_AxisNegInput) +{ + CircleGen cgen; + + int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32}); + int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorExpandDims({{in, axis}, {out}}); + cgen.setInputsAndOutputs({in, axis}, {out}); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{} + .addInput({0.1, 0.3, 0.5, 0.7}) + .addInput({-5}) + .addOutput({0.1, 0.3, 0.5, 0.7}) + .expectFailRun()); + _context->setBackends({"cpu"}); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Fill.cc b/tests/nnfw_api/src/one_op_tests/Fill.cc index cf8948d..4d5e4d8 100644 --- a/tests/nnfw_api/src/one_op_tests/Fill.cc +++ b/tests/nnfw_api/src/one_op_tests/Fill.cc @@ -16,61 +16,78 @@ #include "GenModelTest.h" -TEST_F(GenModelTest, OneOp_Fill_Int32) +struct FillVariationParam { - CircleGen cgen; - std::vector value_data{13}; - uint32_t value_buf = cgen.addBuffer(value_data); - - int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); - int value = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, value_buf}); - int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32}); - cgen.addOperatorFill({{in, value}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); + TestCaseData tcd; + const uint8_t *value_data = nullptr; + circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; +}; - _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); - _context->setBackends({"cpu"}); - - SUCCEED(); -} +class FillVariation : public GenModelTest, public ::testing::WithParamInterface +{ +}; -TEST_F(GenModelTest, OneOp_Fill_Int64) +// value is constant +TEST_P(FillVariation, Test) { + auto ¶m = GetParam(); + CircleGen cgen; - std::vector value_data{13}; - uint32_t value_buf = cgen.addBuffer(value_data); - int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); - int value = cgen.addTensor({{1}, circle::TensorType::TensorType_INT64, value_buf}); - int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64}); - cgen.addOperatorFill({{in, value}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); + size_t value_size = + (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t); + uint32_t value_buf = cgen.addBuffer(param.value_data, value_size); + + int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); + int value = cgen.addTensor({{1}, param.data_type, value_buf}); + int out = cgen.addTensor({{2, 3}, param.data_type}); + cgen.addOperatorFill({{dims, value}, {out}}); + cgen.setInputsAndOutputs({dims}, {out}); _context = std::make_unique(cgen.finish()); - _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); + _context->addTestCase(param.tcd); _context->setBackends({"cpu"}); SUCCEED(); } -TEST_F(GenModelTest, OneOp_Fill_Float32) +const int32_t test_int32 = 13; +const int64_t test_int64 = 1052; +const float test_float = 5.2; + +// Test with different value type +INSTANTIATE_TEST_CASE_P( + GenModelTest, FillVariation, + ::testing::Values( + // float value + FillVariationParam{ + TestCaseData{}.addInput({2, 3}).addOutput({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}), + reinterpret_cast(&test_float)}, + // int32 value + FillVariationParam{ + TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13}), + reinterpret_cast(&test_int32), circle::TensorType::TensorType_INT32}, + // uint8 value + FillVariationParam{ + TestCaseData{}.addInput({2, 3}).addOutput({1052, 1052, 1052, 1052, 1052, + 1052}), + reinterpret_cast(&test_int64), circle::TensorType::TensorType_INT64})); + +TEST_F(GenModelTest, OneOp_Fill_Int64_Shape) { CircleGen cgen; std::vector value_data{1.3}; uint32_t value_buf = cgen.addBuffer(value_data); - int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32}); + int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64}); int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf}); int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorFill({{in, value}, {out}}); - cgen.setInputsAndOutputs({in}, {out}); + cgen.addOperatorFill({{dims, value}, {out}}); + cgen.setInputsAndOutputs({dims}, {out}); _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({1.3, 1.3, 1.3, 1.3, 1.3, 1.3})); + TestCaseData{}.addInput({2, 3}).addOutput({1.3, 1.3, 1.3, 1.3, 1.3, 1.3})); _context->setBackends({"cpu"}); SUCCEED(); @@ -87,7 +104,7 @@ TEST_F(GenModelTest, neg_OneOp_Fill_Int32_oneoperand) _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); + TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); _context->setBackends({"cpu"}); _context->expectFailModelLoad(); @@ -105,7 +122,7 @@ TEST_F(GenModelTest, neg_OneOp_Fill_Int64_oneoperand) _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); + TestCaseData{}.addInput({2, 3}).addOutput({13, 13, 13, 13, 13, 13})); _context->setBackends({"cpu"}); _context->expectFailModelLoad(); @@ -123,7 +140,7 @@ TEST_F(GenModelTest, neg_OneOp_Fill_Float32_oneoperand) _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{}.addInput({2, 3}).addOutput({1.3, 1.3, 1.3, 1.3, 1.3, 1.3})); + TestCaseData{}.addInput({2, 3}).addOutput({1.3, 1.3, 1.3, 1.3, 1.3, 1.3})); _context->setBackends({"cpu"}); _context->expectFailModelLoad(); diff --git a/tests/nnfw_api/src/one_op_tests/FullyConnected.cc b/tests/nnfw_api/src/one_op_tests/FullyConnected.cc index a7c01e1..791787f 100644 --- a/tests/nnfw_api/src/one_op_tests/FullyConnected.cc +++ b/tests/nnfw_api/src/one_op_tests/FullyConnected.cc @@ -51,8 +51,8 @@ TEST_F(GenModelTest, OneOp_FullyConnected) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}})); - _context->setBackends({"cpu", "acl_neon"}); + uniformTCD({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}})); + _context->setBackends({"cpu", "acl_neon", "xnnpack", "ruy"}); SUCCEED(); } @@ -80,7 +80,7 @@ TEST_F(GenModelTest, OneOp_FullyConnectedShuffled16x1Float32) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}})); + uniformTCD({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}})); _context->setBackends({"cpu"}); SUCCEED(); @@ -129,12 +129,12 @@ TEST_F(GenModelTest, OneOp_FullyConnected16x1Sparse) uint32_t bias_buf = cgen.addBuffer(bias_data); int input = cgen.addTensor({{1, 4}, circle::TensorType::TensorType_FLOAT32}); CircleGen::SparsityParams sp{ - {0, 1, 2, 3}, - {0, 1}, - {{CircleGen::SparseDimensionType::DimensionType_DENSE, 1}, - {CircleGen::SparseDimensionType::DimensionType_SPARSE_CSR, {0, 2}, {0, 3}}, - {CircleGen::SparseDimensionType::DimensionType_DENSE, 16}, - {CircleGen::SparseDimensionType::DimensionType_DENSE, 1}}}; + {0, 1, 2, 3}, + {0, 1}, + {{CircleGen::SparseDimensionType::DimensionType_DENSE, 1}, + {CircleGen::SparseDimensionType::DimensionType_SPARSE_CSR, {0, 2}, {0, 3}}, + {CircleGen::SparseDimensionType::DimensionType_DENSE, 16}, + {CircleGen::SparseDimensionType::DimensionType_DENSE, 1}}}; int weight = cgen.addTensor({{16, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}, sp); int bias = cgen.addTensor({{16}, circle::TensorType::TensorType_FLOAT32, bias_buf}); int output = cgen.addTensor({{1, 16}, circle::TensorType::TensorType_FLOAT32}); @@ -143,7 +143,7 @@ TEST_F(GenModelTest, OneOp_FullyConnected16x1Sparse) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}})); + uniformTCD({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}})); _context->setBackends({"cpu"}); SUCCEED(); @@ -171,9 +171,9 @@ TEST_F(GenModelTest, OneOp_FullyConnected_OptionalBias) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{3, -1, -1, 1, -2, 0, -2, 1}}, - {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + uniformTCD({{3, -1, -1, 1, -2, 0, -2, 1}}, + {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "ruy"}); SUCCEED(); } @@ -200,9 +200,9 @@ TEST_F(GenModelTest, neg_OneOp_FullyConnected_NoBias) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{3, -1, -1, 1, -2, 0, -2, 1}}, - {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + uniformTCD({{3, -1, -1, 1, -2, 0, -2, 1}}, + {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "ruy"}); _context->expectFailCompile(); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc b/tests/nnfw_api/src/one_op_tests/L2Normalization.cc index 8e0ae6d..f825fec 100644 --- a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc +++ b/tests/nnfw_api/src/one_op_tests/L2Normalization.cc @@ -27,9 +27,9 @@ TEST_F(GenModelTest, OneOp_L2Normalization) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}}, - {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0, - 0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}})); + uniformTCD({{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}}, + {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0, + 0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc b/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc index db1a375..cb3af4e 100644 --- a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc +++ b/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc @@ -26,8 +26,8 @@ TEST_F(GenModelTest, OneOp_LeakyRelu) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}})); - _context->setBackends({"acl_cl", "acl_neon"}); + uniformTCD({{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}})); + _context->setBackends({"cpu", "acl_cl", "acl_neon"}); SUCCEED(); } @@ -41,7 +41,7 @@ TEST_F(GenModelTest, neg_OneOp_LeakyRelu_InvalidType) cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon"}); + _context->setBackends({"cpu", "acl_cl", "acl_neon"}); _context->expectFailModelLoad(); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc b/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc index b34b2e8..5834fa5 100644 --- a/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc +++ b/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc @@ -30,8 +30,8 @@ TEST_F(GenModelTest, OneOp_LogSoftmax) _context = std::make_unique(cgen.finish()); _context->setBackends({"cpu"}); _context->addTestCase(uniformTCD( - {{0, -6, 2, 4, 3, -2, 10, 1}}, - {{-.00247565, -6.00247, -2.12692, -.126928, -.00671534, -5.00671, -.000123374, -9.00012}})); + {{0, -6, 2, 4, 3, -2, 10, 1}}, + {{-.00247565, -6.00247, -2.12692, -.126928, -.00671534, -5.00671, -.000123374, -9.00012}})); SUCCEED(); } diff --git a/tests/nnfw_api/src/one_op_tests/Mean.cc b/tests/nnfw_api/src/one_op_tests/Mean.cc new file mode 100644 index 0000000..6293d38 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Mean.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +CircleBuffer genSimpleMeanModel() +{ + CircleGen cgen; + uint32_t axis_buf = cgen.addBuffer(std::vector{1, 2}); + int in = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32}); + int axis = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, axis_buf}); + int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + cgen.addOperatorMean({{in, axis}, {out}}, true); + cgen.setInputsAndOutputs({in}, {out}); + return cgen.finish(); +} + +TEST_F(GenModelTest, OneOp_Mean) +{ + auto model = genSimpleMeanModel(); + _context = std::make_unique(std::move(model)); + _context->addTestCase(uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8, 9}}, {{5}})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + +CircleBuffer genWrongMeanModel() +{ + CircleGen cgen; + uint32_t axis_buf = cgen.addBuffer(std::vector{1, 2}); + int in = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_BOOL}); + int axis = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, axis_buf}); + int out = cgen.addTensor({{1}, circle::TensorType::TensorType_BOOL}); + cgen.addOperatorMean({{in, axis}, {out}}, true); + cgen.setInputsAndOutputs({in}, {out}); + return cgen.finish(); +} + +TEST_F(GenModelTest, neg_OneOp_Mean) +{ + auto model = genWrongMeanModel(); + _context = std::make_unique(std::move(model)); + _context->addTestCase(uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8, 9}}, {{5}})); + _context->setBackends({"cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/OneHot.cc b/tests/nnfw_api/src/one_op_tests/OneHot.cc index 11df5bc..78ad35b 100644 --- a/tests/nnfw_api/src/one_op_tests/OneHot.cc +++ b/tests/nnfw_api/src/one_op_tests/OneHot.cc @@ -36,9 +36,9 @@ TEST_F(GenModelTest, OneOp_OneHot_OffValueToConst) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({1, 2, 0, 2}) - .addInput({1}) - .addOutput({0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1})); + .addInput({1, 2, 0, 2}) + .addInput({1}) + .addOutput({0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); @@ -60,10 +60,10 @@ TEST_F(GenModelTest, OneOp_OneHot_OffValueToNotZero) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({1, 2, 0, 2}) - .addInput({1}) - .addInput({-1}) - .addOutput({-1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1})); + .addInput({1, 2, 0, 2}) + .addInput({1}) + .addInput({-1}) + .addOutput({-1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); @@ -87,9 +87,9 @@ TEST_F(GenModelTest, OneOp_OneHot_IndicesValueToNeg_OffValueToConst) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({1, 2, 0, -1}) - .addInput({1}) - .addOutput({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0})); + .addInput({1, 2, 0, -1}) + .addInput({1}) + .addOutput({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); @@ -111,10 +111,10 @@ TEST_F(GenModelTest, OneOp_OneHot_IndicesValueToNeg_OffValueToVar) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({1, 2, 0, -1}) - .addInput({1}) - .addInput({0}) - .addOutput({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0})); + .addInput({1, 2, 0, -1}) + .addInput({1}) + .addInput({0}) + .addOutput({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc index 63d02ab..380c1a3 100644 --- a/tests/nnfw_api/src/one_op_tests/Pad.cc +++ b/tests/nnfw_api/src/one_op_tests/Pad.cc @@ -29,7 +29,7 @@ TEST_F(GenModelTest, OneOp_Pad) cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})); + uniformTCD({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.cc b/tests/nnfw_api/src/one_op_tests/PadV2.cc index e613fe2..f9fe5f6 100644 --- a/tests/nnfw_api/src/one_op_tests/PadV2.cc +++ b/tests/nnfw_api/src/one_op_tests/PadV2.cc @@ -26,7 +26,7 @@ TEST_F(GenModelTest, OneOp_PadV2) std::vector padding_value_data{3.0}; uint32_t padding_value_buf = cgen.addBuffer(padding_value_data); int padding_value = - cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf}); + cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf}); int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32}); @@ -35,7 +35,7 @@ TEST_F(GenModelTest, OneOp_PadV2) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}})); + uniformTCD({{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}})); _context->setBackends({"cpu"}); SUCCEED(); @@ -51,7 +51,7 @@ TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadRank) std::vector padding_value_data{3.0}; uint32_t padding_value_buf = cgen.addBuffer(padding_value_data); int padding_value = - cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf}); + cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf}); int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32}); @@ -75,7 +75,7 @@ TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim0) std::vector padding_value_data{3.0}; uint32_t padding_value_buf = cgen.addBuffer(padding_value_data); int padding_value = - cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf}); + cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf}); int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32}); @@ -99,7 +99,7 @@ TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim1) std::vector padding_value_data{3.0}; uint32_t padding_value_buf = cgen.addBuffer(padding_value_data); int padding_value = - cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf}); + cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf}); int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); diff --git a/tests/nnfw_api/src/one_op_tests/Rank.cc b/tests/nnfw_api/src/one_op_tests/Rank.cc index 02e76ba..60ec193 100644 --- a/tests/nnfw_api/src/one_op_tests/Rank.cc +++ b/tests/nnfw_api/src/one_op_tests/Rank.cc @@ -27,9 +27,9 @@ TEST_F(GenModelTest, OneOp_Rank) cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{} - .addInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}) - .addOutput({4})); + TestCaseData{} + .addInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}) + .addOutput({4})); _context->setBackends({"cpu"}); SUCCEED(); @@ -45,8 +45,8 @@ TEST_F(GenModelTest, OneOp_Rank_Int32) cgen.addOperatorRank({{in}, {out}}); cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{4}})); + _context->addTestCase( + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{4}})); _context->setBackends({"cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc index 437cfd1..20320a0 100644 --- a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc +++ b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc @@ -30,8 +30,8 @@ TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToConst) cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - {{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}})); + _context->addTestCase( + uniformTCD({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc index d1617c3..1dd6584 100644 --- a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc +++ b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc @@ -31,8 +31,8 @@ TEST_F(GenModelTest, OneOp_ResizeNearestNeighbor) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{3, 4, 6, 10, 9, 10, 12, 16}}, - {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}})); + uniformTCD({{3, 4, 6, 10, 9, 10, 12, 16}}, + {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}})); _context->setBackends({"acl_cl"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/Reverse.cc b/tests/nnfw_api/src/one_op_tests/Reverse.cc index ef0c5fe..4168b21 100644 --- a/tests/nnfw_api/src/one_op_tests/Reverse.cc +++ b/tests/nnfw_api/src/one_op_tests/Reverse.cc @@ -32,8 +32,8 @@ TEST_F(GenModelTest, OneOp_ReverseV2_3D) _context = std::make_unique(cgen.finish()); _context->setBackends({"acl_cl", "cpu"}); _context->addTestCase(uniformTCD( - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}}, - {{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8, 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}})); + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}}, + {{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8, 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}})); SUCCEED(); } diff --git a/tests/nnfw_api/src/one_op_tests/Shape.cc b/tests/nnfw_api/src/one_op_tests/Shape.cc index 9a48aa7..2a73db9 100644 --- a/tests/nnfw_api/src/one_op_tests/Shape.cc +++ b/tests/nnfw_api/src/one_op_tests/Shape.cc @@ -27,9 +27,9 @@ TEST_F(GenModelTest, OneOp_Shape) cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{} - .addInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}) - .addOutput({1, 3, 3, 2})); + TestCaseData{} + .addInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}) + .addOutput({1, 3, 3, 2})); _context->setBackends({"cpu"}); SUCCEED(); @@ -46,9 +46,9 @@ TEST_F(GenModelTest, OneOp_Shape_Int64) cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique(cgen.finish()); _context->addTestCase( - TestCaseData{} - .addInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}) - .addOutput({1, 3, 3, 2})); + TestCaseData{} + .addInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}) + .addOutput({1, 3, 3, 2})); _context->setBackends({"cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/Split.cc b/tests/nnfw_api/src/one_op_tests/Split.cc index 2120164..32be9a7 100644 --- a/tests/nnfw_api/src/one_op_tests/Split.cc +++ b/tests/nnfw_api/src/one_op_tests/Split.cc @@ -32,7 +32,7 @@ TEST_F(GenModelTest, OneOp_Split) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6}, {3, 4, 7, 8}})); + uniformTCD({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6}, {3, 4, 7, 8}})); _context->setBackends({"cpu", "acl_cl", "acl_neon"}); SUCCEED(); @@ -52,10 +52,10 @@ TEST_F(GenModelTest, OneOp_SplitNonConstAxis) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({1}) - .addInput({1, 2, 3, 4, 5, 6, 7, 8}) - .addOutput({1, 2, 5, 6}) - .addOutput({3, 4, 7, 8})); + .addInput({1}) + .addInput({1, 2, 3, 4, 5, 6, 7, 8}) + .addOutput({1, 2, 5, 6}) + .addOutput({3, 4, 7, 8})); _context->setBackends({"cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/Sqrt.cc b/tests/nnfw_api/src/one_op_tests/Sqrt.cc new file mode 100644 index 0000000..01f3133 --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Sqrt.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +CircleGen genSimpleSqrtModel(circle::TensorType type) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, type}); + int out = cgen.addTensor({{1, 2, 2, 1}, type}); + cgen.addOperatorSqrt({{in}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + return cgen; +} + +TEST_F(GenModelTest, OneOp_Sqrt_f32) +{ + CircleGen cgen = genSimpleSqrtModel(circle::TensorType::TensorType_FLOAT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({1, 4, 9, 16}).addOutput({1, 2, 3, 4})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Sqrt_i32) +{ + CircleGen cgen = genSimpleSqrtModel(circle::TensorType::TensorType_INT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{}.addInput({1, 4, 9, 16}).addOutput({1, 2, 3, 4})); + _context->setBackends({"cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Square.cc b/tests/nnfw_api/src/one_op_tests/Square.cc new file mode 100644 index 0000000..2ec9bad --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/Square.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include + +CircleGen genSimpleSquareModel(circle::TensorType type) +{ + CircleGen cgen; + int in = cgen.addTensor({{1, 2, 2, 1}, type}); + int out = cgen.addTensor({{1, 2, 2, 1}, type}); + cgen.addOperatorSquare({{in}, {out}}); + cgen.setInputsAndOutputs({in}, {out}); + return cgen; +} + +TEST_F(GenModelTest, OneOp_Square_f32) +{ + CircleGen cgen = genSimpleSquareModel(circle::TensorType::TensorType_FLOAT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase( + TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 4, 9, 16})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_Square_i32) +{ + CircleGen cgen = genSimpleSquareModel(circle::TensorType::TensorType_INT32); + + _context = std::make_unique(cgen.finish()); + _context->addTestCase(TestCaseData{}.addInput({1, 2, 3, 4}).addOutput({1, 4, 9, 16})); + _context->setBackends({"cpu"}); + _context->expectFailCompile(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Tile.cc b/tests/nnfw_api/src/one_op_tests/Tile.cc index aa36ba2..3f193d5 100644 --- a/tests/nnfw_api/src/one_op_tests/Tile.cc +++ b/tests/nnfw_api/src/one_op_tests/Tile.cc @@ -29,7 +29,7 @@ TEST_F(GenModelTest, OneOp_Tile_ConstMul) _context = std::make_unique(cgen.finish()); _context->addTestCase( - uniformTCD({{1, 2, 3, 4, 5, 6}}, {{1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}})); + uniformTCD({{1, 2, 3, 4, 5, 6}}, {{1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}})); _context->setBackends({"cpu"}); SUCCEED(); @@ -47,10 +47,10 @@ TEST_F(GenModelTest, OneOp_Tile_MulToConst) cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique(cgen.finish()); - _context->addTestCase(uniformTCD( - {{11, 12, 13, 21, 22, 23}}, - {{11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, - 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23}})); + _context->addTestCase( + uniformTCD({{11, 12, 13, 21, 22, 23}}, + {{11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, + 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23}})); _context->setBackends({"cpu"}); SUCCEED(); @@ -66,12 +66,12 @@ TEST_F(GenModelTest, OneOp_Tile_MulToVar) cgen.setInputsAndOutputs({in, multiplies}, {out}); _context = std::make_unique(cgen.finish()); - _context->addTestCase(TestCaseData{} - .addInput({11, 12, 13, 21, 22, 23}) - .addInput({2, 3, 1}) - .addOutput({11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, - 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, - 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23})); + _context->addTestCase( + TestCaseData{} + .addInput({11, 12, 13, 21, 22, 23}) + .addInput({2, 3, 1}) + .addOutput({11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, + 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23})); _context->setBackends({"cpu"}); SUCCEED(); @@ -88,9 +88,9 @@ TEST_F(GenModelTest, OneOp_Tile_VarMul) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({1, 2, 3, 4, 5, 6}) - .addInput({1, 2}) - .addOutput({1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6})); + .addInput({1, 2, 3, 4, 5, 6}) + .addInput({1, 2}) + .addOutput({1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6})); _context->setBackends({"cpu"}); SUCCEED(); diff --git a/tests/nnfw_api/src/one_op_tests/Transpose.cc b/tests/nnfw_api/src/one_op_tests/Transpose.cc index ecfb159..5a92c73 100644 --- a/tests/nnfw_api/src/one_op_tests/Transpose.cc +++ b/tests/nnfw_api/src/one_op_tests/Transpose.cc @@ -31,20 +31,19 @@ TEST_F(GenModelTest, OneOp_Transpose_PermsToConst) _context = std::make_unique(cgen.finish()); _context->addTestCase(uniformTCD( - {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, - 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, - 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}}, - {{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44, 60, 61, 62, - 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104, 5, 6, 7, 8, 9, 25, - 26, 27, 28, 29, 45, 46, 47, 48, 49, 65, 66, 67, 68, 69, 85, 86, 87, 88, - 89, 105, 106, 107, 108, 109, 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, - 52, 53, 54, 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114, - 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59, 75, 76, 77, - 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}})); + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}}, + {{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44, 60, 61, 62, 63, 64, + 80, 81, 82, 83, 84, 100, 101, 102, 103, 104, 5, 6, 7, 8, 9, 25, 26, 27, 28, 29, + 45, 46, 47, 48, 49, 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109, + 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54, 70, 71, 72, 73, 74, + 90, 91, 92, 93, 94, 110, 111, 112, 113, 114, 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, + 55, 56, 57, 58, 59, 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); @@ -61,9 +60,9 @@ TEST_F(GenModelTest, OneOp_Transpose_PermsToVar) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({1, 2, 3, 4, 5, 6}) - .addInput({0, 2, 1, 3}) - .addOutput({1, 4, 2, 5, 3, 6})); + .addInput({1, 2, 3, 4, 5, 6}) + .addInput({0, 2, 1, 3}) + .addOutput({1, 4, 2, 5, 3, 6})); _context->setBackends({"cpu"}); SUCCEED(); @@ -80,9 +79,9 @@ TEST_F(GenModelTest, OneOp_Transpose_RegularTranspose) _context = std::make_unique(cgen.finish()); _context->addTestCase(TestCaseData{} - .addInput({1, 2, 3, 4, 5, 6}) - .addInput({}) - .addOutput({1, 4, 2, 5, 3, 6})); + .addInput({1, 2, 3, 4, 5, 6}) + .addInput({}) + .addOutput({1, 4, 2, 5, 3, 6})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); diff --git a/tests/scripts/benchmark_nnapi.sh b/tests/scripts/benchmark_nnapi.sh index af79728..6799923 100755 --- a/tests/scripts/benchmark_nnapi.sh +++ b/tests/scripts/benchmark_nnapi.sh @@ -104,7 +104,7 @@ function profile_for_he_shed() $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1 RET=$? if [[ $RET -ne 0 ]]; then - echo "Profiling $MODEL aborted in run#$j... exit code: $RET"xX + echo "Profiling $MODEL aborted in run#$j... exit code: $RET" exit $RET fi echo "finished" diff --git a/tests/scripts/test_scheduler_with_profiling.sh b/tests/scripts/test_scheduler_with_profiling.sh index c34e836..639cf3f 100755 --- a/tests/scripts/test_scheduler_with_profiling.sh +++ b/tests/scripts/test_scheduler_with_profiling.sh @@ -82,7 +82,7 @@ function run_benchmark_test() $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1 RET=$? if [[ $RET -ne 0 ]]; then - echo "Profiling $MODEL aborted in run#$j... exit code: $RET"xX + echo "Profiling $MODEL aborted in run#$j... exit code: $RET" exit $RET fi echo "finished" diff --git a/tests/scripts/test_scheduler_with_profiling_android.sh b/tests/scripts/test_scheduler_with_profiling_android.sh index 48576a9..8c12423 100644 --- a/tests/scripts/test_scheduler_with_profiling_android.sh +++ b/tests/scripts/test_scheduler_with_profiling_android.sh @@ -128,7 +128,7 @@ function run_benchmark_test() $SHELL_CMD $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1 RET=$? if [[ $RET -ne 0 ]]; then - echo "Profiling $MODEL aborted in run#$j... exit code: $RET"xX + echo "Profiling $MODEL aborted in run#$j... exit code: $RET" exit $RET fi echo "finished" diff --git a/tests/tools/nnpackage_run/src/allocation.h b/tests/tools/nnpackage_run/src/allocation.h index ea4672f..e7f1a9c 100644 --- a/tests/tools/nnpackage_run/src/allocation.h +++ b/tests/tools/nnpackage_run/src/allocation.h @@ -29,6 +29,7 @@ public: ~Allocation() { free(data_); } void *data() const { return data_; } void *alloc(uint64_t sz) { return data_ = malloc(sz); } + private: void *data_; }; diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc index 3929c8d..eeedcb7 100644 --- a/tests/tools/nnpackage_run/src/h5formatter.cc +++ b/tests/tools/nnpackage_run/src/h5formatter.cc @@ -135,7 +135,7 @@ void H5Formatter::loadInputs(const std::string &filename, std::vectordtype] * num_elems(ti); diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc index 64623a8..5bde74f 100644 --- a/tests/tools/nnpackage_run/src/nnpackage_run.cc +++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc @@ -70,7 +70,7 @@ int main(const int argc, char **argv) // TODO Apply verbose level to phases const int verbose = args.getVerboseLevel(); benchmark::Phases phases( - benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()}); + benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()}); nnfw_session *session = nullptr; NNPR_ENSURE_STATUS(nnfw_create_session(&session)); @@ -223,7 +223,7 @@ int main(const int argc, char **argv) } outputs[i].alloc(output_size_in_bytes); NNPR_ENSURE_STATUS( - nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes)); + nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes)); NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST)); } @@ -231,31 +231,35 @@ int main(const int argc, char **argv) // only warmup. if (verbose == 0) { - phases.run("WARMUP", - [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); }, - args.getWarmupRuns()); - phases.run("EXECUTE", - [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); }, - args.getNumRuns(), true); + phases.run( + "WARMUP", + [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); }, + args.getWarmupRuns()); + phases.run( + "EXECUTE", + [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); }, + args.getNumRuns(), true); } else { - phases.run("WARMUP", - [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); }, - [&](const benchmark::Phase &phase, uint32_t nth) { - std::cout << "... " - << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" - << std::endl; - }, - args.getWarmupRuns()); - phases.run("EXECUTE", - [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); }, - [&](const benchmark::Phase &phase, uint32_t nth) { - std::cout << "... " - << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" - << std::endl; - }, - args.getNumRuns(), true); + phases.run( + "WARMUP", + [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); }, + [&](const benchmark::Phase &phase, uint32_t nth) { + std::cout << "... " + << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" + << std::endl; + }, + args.getWarmupRuns()); + phases.run( + "EXECUTE", + [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); }, + [&](const benchmark::Phase &phase, uint32_t nth) { + std::cout << "... " + << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" + << std::endl; + }, + args.getNumRuns(), true); } #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1 diff --git a/tests/tools/nnpackage_run/src/randomgen.cc b/tests/tools/nnpackage_run/src/randomgen.cc index 3432420..a1fcf82 100644 --- a/tests/tools/nnpackage_run/src/randomgen.cc +++ b/tests/tools/nnpackage_run/src/randomgen.cc @@ -66,7 +66,7 @@ void RandomGenerator::generate(std::vector &inputs) std::exit(-1); } NNPR_ENSURE_STATUS( - nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes)); + nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes)); NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST)); } }; diff --git a/tests/tools/tflite_loader/CMakeLists.txt b/tests/tools/tflite_loader/CMakeLists.txt index 0fe1c69..6be3158 100644 --- a/tests/tools/tflite_loader/CMakeLists.txt +++ b/tests/tools/tflite_loader/CMakeLists.txt @@ -16,7 +16,7 @@ nnfw_find_package(Boost REQUIRED program_options system filesystem) add_executable(tflite_loader_test_tool ${SOURCES}) target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS}) -target_link_libraries(tflite_loader_test_tool onert_core onert tflite_loader) +target_link_libraries(tflite_loader_test_tool nnfw-dev) target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite nnfw_lib_misc) target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY}) diff --git a/tests/tools/tflite_loader/src/tflite_loader.cc b/tests/tools/tflite_loader/src/tflite_loader.cc index ce09921..f77570c 100644 --- a/tests/tools/tflite_loader/src/tflite_loader.cc +++ b/tests/tools/tflite_loader/src/tflite_loader.cc @@ -14,23 +14,20 @@ * limitations under the License. */ -#include "tflite/ext/kernels/register.h" - #include "args.h" -#include "tflite/InterpreterSession.h" -#include "tflite/Assert.h" -#include "tflite/Diff.h" -#include "misc/tensor/IndexIterator.h" -#include -#include +#include +#include -#include "compiler/Compiler.h" -#include "exec/Execution.h" -#include "ir/Graph.h" +#include +#include -#include "tflite_loader.h" +#include +#include +#include +#include +#include #include const int RUN_FAILED = 1; @@ -41,8 +38,15 @@ using namespace nnfw::tflite; const int FILE_ERROR = 2; const float DIFFERENCE_THRESHOLD = 10e-5; +#define NNFW_ASSERT_FAIL(expr, msg) \ + if ((expr) != NNFW_STATUS_NO_ERROR) \ + { \ + std::cerr << msg << std::endl; \ + exit(-1); \ + } + // Read vector of floats from selected file -std::vector readData(const string &path) +void readData(const string &path, std::vector &dest) { std::ifstream in(path); if (!in.good()) @@ -53,100 +57,104 @@ std::vector readData(const string &path) in.seekg(0, std::ifstream::end); size_t len = in.tellg(); in.seekg(0, std::ifstream::beg); - assert(len % sizeof(float) == 0); - size_t size = len / sizeof(float); - std::vector vec(size); - for (size_t i = 0; i < size; ++i) + + assert(dest.size() == len); + in.read(reinterpret_cast(dest.data()), len); +} + +template +void randomData(nnfw::misc::RandomGenerator &randgen, std::vector &dest) +{ + size_t elements = dest.size() / sizeof(T); + assert(dest.size() % sizeof(T) == 0); + + std::vector vec(elements); + for (uint64_t i = 0; i < elements; i++) { - in.read(reinterpret_cast(&vec[i]), sizeof(float)); + vec[i] = randgen.generate(); } - return vec; + memcpy(dest.data(), vec.data(), elements * sizeof(T)); } -std::vector randomData(nnfw::misc::RandomGenerator &randgen, const uint64_t size) +void randomBoolData(nnfw::misc::RandomGenerator &randgen, std::vector &dest) { - std::vector vec(size); - for (uint64_t i = 0; i < size; i++) + size_t elements = dest.size(); + std::vector vec(elements); + for (uint64_t i = 0; i < elements; i++) { - vec[i] = randgen.generate(); + bool value = randgen.generate(); + dest[i] = value ? 1 : 0; } - return vec; } -void executeGraph(const std::shared_ptr &g, - const std::vector> &inputs, - std::vector> &outputs) +inline uint64_t num_elems(const nnfw_tensorinfo *ti) { - auto subgs = std::make_shared(); - subgs->push(onert::ir::SubgraphIndex{0}, g); - auto compiler = new onert::compiler::Compiler(subgs); - std::shared_ptr executors; - // Compilation - try + uint64_t n = 1; + for (uint32_t i = 0; i < ti->rank; ++i) { - executors = compiler->compile(); + n *= ti->dims[i]; } - catch (const std::exception &e) + return n; +} + +inline size_t sizeOfNnfwType(NNFW_TYPE type) +{ + switch (type) { - std::cerr << "[Execution] Can't compile model" << std::endl; - std::cerr << e.what() << std::endl; - exit(-1); + case NNFW_TYPE_TENSOR_BOOL: + case NNFW_TYPE_TENSOR_UINT8: + case NNFW_TYPE_TENSOR_QUANT8_ASYMM: + case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED: + return 1; + case NNFW_TYPE_TENSOR_FLOAT32: + case NNFW_TYPE_TENSOR_INT32: + return 4; + case NNFW_TYPE_TENSOR_INT64: + return 8; + default: + throw std::runtime_error{"Invalid tensor type"}; } +} - std::cout << "[Execution] Graph compiled!" << std::endl; - - auto execution = std::make_shared(executors); - - // Setting IO - try +template +bool compareBuffersExact(const T *ref_buf, const std::vector &act_buf, uint32_t index) +{ + bool match = true; + for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++) { - // Verify input shapes - auto num_inputs = inputs.size(); - for (size_t i = 0; i < num_inputs; i++) - { - auto input_operand_idx = g->getInputs().at(i); - auto input_shape = g->operands().at(input_operand_idx).shape(); - assert(inputs[i].size() == input_shape.num_elements()); - } + T ref = ref_buf[e]; + T act = reinterpret_cast(act_buf.data())[e]; - // Set output shapes - auto num_outputs = g->getOutputs().size(); - outputs.resize(num_outputs); - for (uint32_t i = 0; i < num_outputs; i++) + if (ref != act) { - auto output_operand_idx = g->getOutputs().at(i); - auto output_shape = g->operands().at(output_operand_idx).shape(); - outputs[i].resize(output_shape.num_elements()); + std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref + << ", act: " << act << std::endl; + match = false; } - - for (size_t i = 0; i < num_inputs; i++) - execution->setInput(onert::ir::IOIndex(i), inputs[i].data(), - inputs[i].size() * sizeof(float)); - for (uint32_t i = 0; i < num_outputs; i++) - execution->setOutput(onert::ir::IOIndex(i), outputs[i].data(), - outputs[i].size() * sizeof(float)); - } - catch (const std::exception &e) - { - std::cerr << "[Execution] Can't set model IO" << std::endl; - std::cerr << e.what() << '\n'; - exit(-1); } - try - { - execution->execute(); - } - catch (const std::exception &e) + return match; +} + +bool compareBuffersExactBool(const uint8_t *ref_buf, const std::vector &act_buf, + uint32_t index) +{ + bool match = true; + for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++) { - std::cerr << "[Execution] Can't execute" << std::endl; - std::cerr << e.what() << '\n'; - exit(-1); + uint8_t ref_raw = ref_buf[e]; + bool ref = (ref_raw != 0 ? true : false); + uint8_t act_raw = reinterpret_cast(act_buf.data())[e]; + bool act = (act_raw != 0 ? true : false); + if (ref != act) + { + std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref + << ", act: " << act << std::endl; + match = false; + } } - std::cout << "[Execution] Done!" << std::endl; - - delete compiler; + return match; } int main(const int argc, char **argv) @@ -163,44 +171,38 @@ int main(const int argc, char **argv) } std::cout << "[Execution] Stage start!" << std::endl; - std::shared_ptr test_graph; // Loading - try + nnfw_session *onert_session = nullptr; + NNFW_ASSERT_FAIL(nnfw_create_session(&onert_session), "[ ERROR ] Failure during model load"); + if (onert_session == nullptr) { - test_graph = - onert::tflite_loader::loadModel(tflite_file.c_str())->at(onert::ir::SubgraphIndex{0}); - } - catch (std::exception &e) - { - std::cerr << "[ ERROR ] " - << "Failure during model load" << std::endl; - std::cerr << e.what() << std::endl; + std::cerr << "[ ERROR ] Failure to open session" << std::endl; exit(-1); } - // TODO: Support another input/output types - for (const auto &input_idx : test_graph->getInputs()) - { - const auto input_type = test_graph->operands().at(input_idx).typeInfo().type(); - assert(input_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 inputs are supported"); - } - for (const auto &output_idx : test_graph->getOutputs()) - { - const auto output_type = test_graph->operands().at(output_idx).typeInfo().type(); - assert(output_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 outputs are supported"); - } + NNFW_ASSERT_FAIL(nnfw_load_model_from_modelfile(onert_session, tflite_file.c_str()), + "[ ERROR ] Failure during model load"); + + uint32_t num_inputs; + uint32_t num_outputs; + NNFW_ASSERT_FAIL(nnfw_input_size(onert_session, &num_inputs), + "[ ERROR ] Failure during get model inputs"); + NNFW_ASSERT_FAIL(nnfw_output_size(onert_session, &num_outputs), + "[ ERROR ] Failure during get model outputs"); std::cout << "[Execution] Model is deserialized!" << std::endl; - auto num_inputs = test_graph->getInputs().size(); - std::vector> inputs(num_inputs); + + // Compile + nnfw_prepare(onert_session); + + std::cout << "[Execution] Model compiled!" << std::endl; + + // Prepare input/output data + std::vector> inputs(num_inputs); + std::vector> outputs(num_outputs); + bool generate_data = data_files.empty(); bool read_data = data_files.size() == num_inputs; - if (num_inputs == 0) - { - std::cerr << "[ ERROR ] " - << "No inputs in model => execution is not possible" << std::endl; - exit(1); - } if (!generate_data && !read_data) { std::cerr << "[ ERROR ] " @@ -210,32 +212,75 @@ int main(const int argc, char **argv) const int seed = 1; /* TODO Add an option for seed value */ nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f}; - try + + for (uint32_t i = 0; i < num_inputs; i++) { - for (uint32_t i = 0; i < num_inputs; i++) + nnfw_tensorinfo ti_input; + NNFW_ASSERT_FAIL(nnfw_input_tensorinfo(onert_session, i, &ti_input), + "[ ERROR ] Failure during get input data info"); + size_t input_size = num_elems(&ti_input) * sizeOfNnfwType(ti_input.dtype); + + inputs[i].resize(input_size); + + if (generate_data) { - if (generate_data) + switch (ti_input.dtype) { - uint64_t sz = - test_graph->operands().at(test_graph->getInputs().at(i)).shape().num_elements(); - inputs[i] = randomData(randgen, sz); + case NNFW_TYPE_TENSOR_BOOL: + randomBoolData(randgen, inputs[i]); + break; + case NNFW_TYPE_TENSOR_UINT8: + case NNFW_TYPE_TENSOR_QUANT8_ASYMM: + randomData(randgen, inputs[i]); + break; + case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED: + randomData(randgen, inputs[i]); + break; + case NNFW_TYPE_TENSOR_FLOAT32: + randomData(randgen, inputs[i]); + break; + case NNFW_TYPE_TENSOR_INT32: + randomData(randgen, inputs[i]); + break; + case NNFW_TYPE_TENSOR_INT64: + randomData(randgen, inputs[i]); + break; + default: + std::cerr << "[ ERROR ] " + << "Unspported input data type" << std::endl; + exit(-1); + break; } - else /* read_data */ - inputs[i] = readData(data_files[i]); } + else /* read_data */ + readData(data_files[i], inputs[i]); + + NNFW_ASSERT_FAIL(nnfw_set_input(onert_session, i, ti_input.dtype, inputs[i].data(), input_size), + "[ ERROR ] Failure to set input tensor buffer"); } - catch (std::exception &e) + + std::cout << "[Execution] Input data is defined!" << std::endl; + + for (uint32_t i = 0; i < num_outputs; i++) { - std::cerr << "[ ERROR ] " - << "Failure during input data generation" << std::endl; - std::cerr << e.what() << std::endl; - exit(-1); + nnfw_tensorinfo ti_output; + NNFW_ASSERT_FAIL(nnfw_output_tensorinfo(onert_session, i, &ti_output), + "[ ERROR ] Failure during get output tensor info"); + + uint64_t output_elements = num_elems(&ti_output); + size_t output_size = output_elements * sizeOfNnfwType(ti_output.dtype); + outputs[i].resize(output_size); + + NNFW_ASSERT_FAIL( + nnfw_set_output(onert_session, i, ti_output.dtype, outputs[i].data(), output_size), + "[ ERROR ] Failure to set output tensor buffer"); } - std::cout << "[Execution] Input data is defined!" << std::endl; - std::vector> outputs; - // Run graph - executeGraph(test_graph, inputs, outputs); + // Execute + NNFW_ASSERT_FAIL(nnfw_run(onert_session), "[Execution] Can't execute"); + + std::cout << "[Execution] Done!" << std::endl; + // Compare with tflite std::cout << "[Comparison] Stage start!" << std::endl; // Read tflite model @@ -255,7 +300,7 @@ int main(const int argc, char **argv) std::cerr << e.what() << std::endl; exit(FILE_ERROR); } - interpreter->SetNumThreads(2); + interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1)); auto sess = std::make_shared(interpreter.get()); sess->prepare(); @@ -263,7 +308,7 @@ int main(const int argc, char **argv) for (uint32_t i = 0; i < num_inputs; i++) { auto input_tensor = interpreter->tensor(interpreter->inputs().at(i)); - memcpy(input_tensor->data.f, inputs[i].data(), inputs[i].size() * sizeof(float)); + memcpy(input_tensor->data.uint8, inputs[i].data(), inputs[i].size()); } if (!sess->run()) { @@ -273,32 +318,69 @@ int main(const int argc, char **argv) std::cout << "[Comparison] TFLite run done!" << std::endl; // Calculate max difference over all outputs - float max_difference = 0.0f; - auto num_outputs = test_graph->getOutputs().size(); + float max_float_difference = 0.0f; + bool find_unmatched_output = false; + for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++) { - const auto &tflite_output_tensor = interpreter->tensor(interpreter->outputs().at(out_idx)); - const auto &nnfw_output_tensor = outputs[out_idx]; - - if (nnfw_output_tensor.size() != tflite_output_tensor->bytes / sizeof(float)) - std::cout << "[Comparison] Different size of outputs!" << std::endl; - // Check max difference - float *tflite_out_ptr = tflite_output_tensor->data.f; - for (const auto &nnfw_out : nnfw_output_tensor) - { - if (std::abs(nnfw_out - *tflite_out_ptr) > max_difference) - max_difference = std::abs(nnfw_out - *tflite_out_ptr); + nnfw_tensorinfo ti; + nnfw_output_tensorinfo(onert_session, out_idx, &ti); + + bool matched = true; + // Check output tensor values + + const auto &ref_output = interpreter->tensor(interpreter->outputs().at(out_idx))->data; + const auto &output = outputs[out_idx]; - tflite_out_ptr++; + switch (ti.dtype) + { + case NNFW_TYPE_TENSOR_BOOL: + matched = compareBuffersExactBool(ref_output.uint8, output, out_idx); + break; + case NNFW_TYPE_TENSOR_UINT8: + case NNFW_TYPE_TENSOR_QUANT8_ASYMM: + matched = compareBuffersExact(ref_output.uint8, output, out_idx); + break; + case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED: + matched = compareBuffersExact(ref_output.int8, output, out_idx); + break; + case NNFW_TYPE_TENSOR_INT32: + matched = compareBuffersExact(ref_output.i32, output, out_idx); + break; + case NNFW_TYPE_TENSOR_FLOAT32: + // TODO better way for handling FP error? + for (uint32_t e = 0; e < num_elems(&ti); e++) + { + float refval = ref_output.f[e]; + float val = reinterpret_cast(output.data())[e]; + if (std::abs(refval - val) > max_float_difference) + max_float_difference = std::abs(refval - val); + + if (max_float_difference > DIFFERENCE_THRESHOLD) + matched = false; + } + break; + case NNFW_TYPE_TENSOR_INT64: + matched = compareBuffersExact(ref_output.i64, output, out_idx); + break; + default: + throw std::runtime_error{"Invalid tensor type"}; } + + if (!matched) + find_unmatched_output = true; } // Print results - std::cout << "[Comparison] Max difference: " << max_difference << std::endl; + std::cout << "[Comparison] Max float difference: " << max_float_difference << std::endl; int ret = 0; - if (max_difference > DIFFERENCE_THRESHOLD) + if (find_unmatched_output) { - std::cout << "[Comparison] Outputs is not equal!" << std::endl; + std::cout << "[Comparison] outputs is not equal!" << std::endl; + if (max_float_difference > DIFFERENCE_THRESHOLD) + { + std::cout << "[Comparison] Float outputs is not equal!" << std::endl; + } ret = 1; } else @@ -307,5 +389,7 @@ int main(const int argc, char **argv) } std::cout << "[Comparison] Done!" << std::endl; + nnfw_close_session(onert_session); + return ret; } diff --git a/tests/tools/tflite_run/src/bin_image.cc b/tests/tools/tflite_run/src/bin_image.cc index 16d4c94..fadece0 100644 --- a/tests/tools/tflite_run/src/bin_image.cc +++ b/tests/tools/tflite_run/src/bin_image.cc @@ -20,7 +20,7 @@ #include "bin_image.h" BinImage::BinImage(unsigned int width, unsigned int height, unsigned int channels) - : _width(width), _height(height), _channels(channels) + : _width(width), _height(height), _channels(channels) { } diff --git a/tests/tools/tflite_run/src/tensor_loader.cc b/tests/tools/tflite_run/src/tensor_loader.cc index 93d9e2f..a1a9433 100644 --- a/tests/tools/tflite_run/src/tensor_loader.cc +++ b/tests/tools/tflite_run/src/tensor_loader.cc @@ -26,7 +26,7 @@ namespace TFLiteRun { TensorLoader::TensorLoader(tflite::Interpreter &interpreter) - : _interpreter(interpreter), _raw_data(nullptr) + : _interpreter(interpreter), _raw_data(nullptr) { } diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc index e72966d..d42f992 100644 --- a/tests/tools/tflite_run/src/tflite_run.cc +++ b/tests/tools/tflite_run/src/tflite_run.cc @@ -86,7 +86,7 @@ int main(const int argc, char **argv) // TODO Apply verbose level to phases const int verbose = args.getVerboseLevel(); benchmark::Phases phases( - benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()}); + benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()}); std::unique_ptr model; std::unique_ptr interpreter; @@ -156,7 +156,7 @@ int main(const int argc, char **argv) for (uint32_t axis = 0; axis < tensor->dims->size; axis++, offset++) { new_dim[axis] = - ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]); + ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]); } interpreter->ResizeInputTensor(id, new_dim); @@ -208,12 +208,12 @@ int main(const int argc, char **argv) int32_t value = 0; nnfw::misc::tensor::iterate(tensor_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - // TODO Generate random values - // Gather operation: index should be within input coverage. - tensor_view.at(ind) = value; - value++; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + // Gather operation: index should be within input coverage. + tensor_view.at(ind) = value; + value++; + }; } else if (tensor->type == kTfLiteUInt8) { @@ -221,16 +221,16 @@ int main(const int argc, char **argv) auto tensor_view = nnfw::tflite::TensorView::make(*interpreter, o); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tensor_view.shape(), std::bind(fp, randgen, _1, _2)); nnfw::misc::tensor::iterate(tensor_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); - tensor_view.at(ind) = value; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + const auto value = data.at(ind); + tensor_view.at(ind) = value; + }; } else if (tensor->type == kTfLiteBool) { @@ -238,16 +238,16 @@ int main(const int argc, char **argv) auto tensor_view = nnfw::tflite::TensorView::make(*interpreter, o); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tensor_view.shape(), std::bind(fp, randgen, _1, _2)); nnfw::misc::tensor::iterate(tensor_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); - tensor_view.at(ind) = value; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + const auto value = data.at(ind); + tensor_view.at(ind) = value; + }; } else { @@ -277,27 +277,28 @@ int main(const int argc, char **argv) // only warmup. if (verbose == 0) { - phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, - args.getWarmupRuns()); - phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, - args.getNumRuns(), true); + phases.run( + "WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, args.getWarmupRuns()); + phases.run( + "EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, args.getNumRuns(), true); } else { - phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, - [&](const benchmark::Phase &phase, uint32_t nth) { - std::cout << "... " - << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" - << std::endl; - }, - args.getWarmupRuns()); - phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, - [&](const benchmark::Phase &phase, uint32_t nth) { - std::cout << "... " - << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" - << std::endl; - }, - args.getNumRuns(), true); + phases.run( + "WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, + [&](const benchmark::Phase &phase, uint32_t nth) { + std::cout << "... " + << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" + << std::endl; + }, + args.getWarmupRuns()); + phases.run( + "EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, + [&](const benchmark::Phase &phase, uint32_t nth) { + std::cout << "... " + << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" << std::endl; + }, + args.getNumRuns(), true); } sess->teardown(); diff --git a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc index d44ea60..e9fb04c 100644 --- a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc +++ b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc @@ -86,7 +86,7 @@ int main(const int argc, char **argv) // TODO Apply verbose level to phases const int verbose = args.getVerboseLevel(); benchmark::Phases phases( - benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()}); + benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()}); std::unique_ptr model; std::unique_ptr interpreter; @@ -102,8 +102,8 @@ int main(const int argc, char **argv) } else { - model = tflite::FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), - &error_reporter); + model = + tflite::FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter); } if (model == nullptr) { @@ -153,12 +153,12 @@ int main(const int argc, char **argv) int32_t value = 0; nnfw::misc::tensor::iterate(tensor_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - // TODO Generate random values - // Gather operation: index should be within input coverage. - tensor_view.at(ind) = value; - value++; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + // Gather operation: index should be within input coverage. + tensor_view.at(ind) = value; + value++; + }; } else if (tensor->type == kTfLiteUInt8) { @@ -168,11 +168,11 @@ int main(const int argc, char **argv) uint8_t value = 0; nnfw::misc::tensor::iterate(tensor_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - // TODO Generate random values - tensor_view.at(ind) = value; - value = (value + 1) & 0xFF; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + tensor_view.at(ind) = value; + value = (value + 1) & 0xFF; + }; } else if (tensor->type == kTfLiteBool) { @@ -180,16 +180,16 @@ int main(const int argc, char **argv) auto tensor_view = TFLiteVanillaRun::TensorView::make(*interpreter, o); auto fp = static_cast( - &nnfw::misc::RandomGenerator::generate); + const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>( + &nnfw::misc::RandomGenerator::generate); const nnfw::misc::tensor::Object data(tensor_view.shape(), std::bind(fp, randgen, _1, _2)); nnfw::misc::tensor::iterate(tensor_view.shape()) - << [&](const nnfw::misc::tensor::Index &ind) { - const auto value = data.at(ind); - tensor_view.at(ind) = value; - }; + << [&](const nnfw::misc::tensor::Index &ind) { + const auto value = data.at(ind); + tensor_view.at(ind) = value; + }; } else { @@ -214,27 +214,30 @@ int main(const int argc, char **argv) // only warmup. if (verbose == 0) { - phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); }, - args.getWarmupRuns()); - phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); }, - args.getNumRuns(), true); + phases.run( + "WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); }, + args.getWarmupRuns()); + phases.run( + "EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); }, + args.getNumRuns(), true); } else { - phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); }, - [&](const benchmark::Phase &phase, uint32_t nth) { - std::cout << "... " - << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" - << std::endl; - }, - args.getWarmupRuns()); - phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); }, - [&](const benchmark::Phase &phase, uint32_t nth) { - std::cout << "... " - << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" - << std::endl; - }, - args.getNumRuns(), true); + phases.run( + "WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); }, + [&](const benchmark::Phase &phase, uint32_t nth) { + std::cout << "... " + << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" + << std::endl; + }, + args.getWarmupRuns()); + phases.run( + "EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); }, + [&](const benchmark::Phase &phase, uint32_t nth) { + std::cout << "... " + << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" << std::endl; + }, + args.getNumRuns(), true); } std::cout << "output tensor indices = ["; diff --git a/tools/.clang-format b/tools/.clang-format new file mode 120000 index 0000000..0ff66f3 --- /dev/null +++ b/tools/.clang-format @@ -0,0 +1 @@ +../.clang-format.8 \ No newline at end of file diff --git a/tools/cross/install_rootfs.sh b/tools/cross/install_rootfs.sh index 223d675..5a65dac 100755 --- a/tools/cross/install_rootfs.sh +++ b/tools/cross/install_rootfs.sh @@ -29,7 +29,7 @@ __UbuntuPackages="build-essential" # other development supports __UbuntuPackages+=" ocl-icd-opencl-dev" __UbuntuPackages+=" libhdf5-dev" -__UbuntuBoostPackages=" llibboost-all-dev" +__UbuntuBoostPackages=" libboost-all-dev" # symlinks fixer __UbuntuPackages+=" symlinks" diff --git a/tools/kbenchmark/kernels/acl_cl/Convolution.cpp b/tools/kbenchmark/kernels/acl_cl/Convolution.cpp index 37d179a..31cda05 100644 --- a/tools/kbenchmark/kernels/acl_cl/Convolution.cpp +++ b/tools/kbenchmark/kernels/acl_cl/Convolution.cpp @@ -230,12 +230,11 @@ inline nonius::benchmark_registry &local_benchmark_registry() } // namespace -#define NONIUS_LOCAL_BENCHMARK(name, ...) \ - namespace \ - { \ - static ::nonius::benchmark_registrar \ - NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \ - __VA_ARGS__); \ +#define NONIUS_LOCAL_BENCHMARK(name, ...) \ + namespace \ + { \ + static ::nonius::benchmark_registrar \ + NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \ } NONIUS_LOCAL_BENCHMARK("CLDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) { diff --git a/tools/kbenchmark/kernels/acl_cl/TransposeConv.cpp b/tools/kbenchmark/kernels/acl_cl/TransposeConv.cpp index 8278a61..c2ac305 100644 --- a/tools/kbenchmark/kernels/acl_cl/TransposeConv.cpp +++ b/tools/kbenchmark/kernels/acl_cl/TransposeConv.cpp @@ -207,12 +207,11 @@ inline nonius::benchmark_registry &local_benchmark_registry() } // namespace -#define NONIUS_LOCAL_BENCHMARK(name, ...) \ - namespace \ - { \ - static ::nonius::benchmark_registrar \ - NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \ - __VA_ARGS__); \ +#define NONIUS_LOCAL_BENCHMARK(name, ...) \ + namespace \ + { \ + static ::nonius::benchmark_registrar \ + NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \ } NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayer_NCHW", [](nonius::chronometer meter) { diff --git a/tools/kbenchmark/kernels/acl_neon/Convolution.cpp b/tools/kbenchmark/kernels/acl_neon/Convolution.cpp index 2d19cb2..1656186 100644 --- a/tools/kbenchmark/kernels/acl_neon/Convolution.cpp +++ b/tools/kbenchmark/kernels/acl_neon/Convolution.cpp @@ -223,12 +223,11 @@ inline nonius::benchmark_registry &local_benchmark_registry() } // namespace -#define NONIUS_LOCAL_BENCHMARK(name, ...) \ - namespace \ - { \ - static ::nonius::benchmark_registrar \ - NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \ - __VA_ARGS__); \ +#define NONIUS_LOCAL_BENCHMARK(name, ...) \ + namespace \ + { \ + static ::nonius::benchmark_registrar \ + NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \ } NONIUS_LOCAL_BENCHMARK("NEDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) { diff --git a/tools/kbenchmark/kernels/acl_neon/TransposeConv.cpp b/tools/kbenchmark/kernels/acl_neon/TransposeConv.cpp index 0878499..892547d 100644 --- a/tools/kbenchmark/kernels/acl_neon/TransposeConv.cpp +++ b/tools/kbenchmark/kernels/acl_neon/TransposeConv.cpp @@ -199,12 +199,11 @@ inline nonius::benchmark_registry &local_benchmark_registry() } // namespace -#define NONIUS_LOCAL_BENCHMARK(name, ...) \ - namespace \ - { \ - static ::nonius::benchmark_registrar \ - NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \ - __VA_ARGS__); \ +#define NONIUS_LOCAL_BENCHMARK(name, ...) \ + namespace \ + { \ + static ::nonius::benchmark_registrar \ + NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \ } NONIUS_LOCAL_BENCHMARK("NEDeconvolutionLayer_NCHW", [](nonius::chronometer meter) { diff --git a/tools/kernel_report/kernel_report.py b/tools/kernel_report/kernel_report.py index b8a601e..8940e88 100755 --- a/tools/kernel_report/kernel_report.py +++ b/tools/kernel_report/kernel_report.py @@ -14,8 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import argparse +from os.path import dirname, realpath, join class Backend: @@ -28,11 +28,8 @@ class Backend: class KernelReporter(object): def __init__(self, args): - # TODO: Remove os defendency - '/' - if args.base[0] != '/': - self.onertBase = os.getcwd() + '/' + args.base - else: - self.onertBase = args.base + root_path = dirname(dirname(dirname(realpath(__file__)))) + self.onertBase = join(root_path, "runtime", "onert") if args.md5: self.printMD5 = True else: @@ -181,7 +178,6 @@ if __name__ == '__main__': default='cpu,acl_cl,acl_neon', help="backend list to report (use comma)") arg_parser.add_argument("--md5", action='store_true', help="Print for md5") - arg_parser.add_argument("base", type=str, help="onert base directory") args = arg_parser.parse_args() report = KernelReporter(args) diff --git a/tools/nnapi_quickcheck/CMakeLists.txt b/tools/nnapi_quickcheck/CMakeLists.txt deleted file mode 100644 index c88155a..0000000 --- a/tools/nnapi_quickcheck/CMakeLists.txt +++ /dev/null @@ -1,82 +0,0 @@ -if(NOT BUILD_NNAPI_QUICKCHECK) - return() -endif(NOT BUILD_NNAPI_QUICKCHECK) - -file(GLOB_RECURSE NNAPI_QUICKCHECK_LIB_SOURCES "lib/*.cpp") -file(GLOB_RECURSE NNAPI_QUICKCHECK_LIB_TESTS "lib/*.test.cpp") -list(REMOVE_ITEM NNAPI_QUICKCHECK_LIB_SOURCES ${NNAPI_QUICKCHECK_LIB_TESTS}) - -add_library(nnapi_quickcheck_common ${NNAPI_QUICKCHECK_LIB_SOURCES}) -target_include_directories(nnapi_quickcheck_common PUBLIC "inc") -target_link_libraries(nnapi_quickcheck_common nnfw_lib_misc) -target_link_libraries(nnapi_quickcheck_common nnfw_lib_tflite) - -add_executable(nnapi_quickcheck_lib_env_test "lib/env.test.cpp") -target_link_libraries(nnapi_quickcheck_lib_env_test nnapi_quickcheck_common) - -function(add_nnapi_quickcheck NAME) - add_executable(nnapi_quickcheck_${NAME} "tests/${NAME}.cpp") - nnfw_find_package(GTest) - target_link_libraries(nnapi_quickcheck_${NAME} gtest gtest_main pthread) - target_link_libraries(nnapi_quickcheck_${NAME} nnapi_quickcheck_common) -endfunction(add_nnapi_quickcheck) - -add_nnapi_quickcheck(add_1) -add_nnapi_quickcheck(add_2) -add_nnapi_quickcheck(add_3) -add_nnapi_quickcheck(add_4) -add_nnapi_quickcheck(add_5) -add_nnapi_quickcheck(add_6) -add_nnapi_quickcheck(add_7) -add_nnapi_quickcheck(add_8) -add_nnapi_quickcheck(add_9) -add_nnapi_quickcheck(add_quan_1) -add_nnapi_quickcheck(div_1) -add_nnapi_quickcheck(div_2) -add_nnapi_quickcheck(sub_1) -add_nnapi_quickcheck(sub_2) -add_nnapi_quickcheck(sub_3) -add_nnapi_quickcheck(sub_4) -add_nnapi_quickcheck(sub_5) -add_nnapi_quickcheck(sub_6) -add_nnapi_quickcheck(mul_1) -add_nnapi_quickcheck(mul_2) -add_nnapi_quickcheck(mul_quan_1) -add_nnapi_quickcheck(relu_1) -add_nnapi_quickcheck(relu_quan_1) -add_nnapi_quickcheck(relu_2) -add_nnapi_quickcheck(relu_3) -add_nnapi_quickcheck(relu6_1) -add_nnapi_quickcheck(relu6_quan_1) -add_nnapi_quickcheck(relu1_1) -add_nnapi_quickcheck(conv_1) -add_nnapi_quickcheck(conv_quan_1) -add_nnapi_quickcheck(dconv_1) -add_nnapi_quickcheck(dconv_quan_1) -add_nnapi_quickcheck(max_pool_1) -add_nnapi_quickcheck(max_pool_quan_1) -add_nnapi_quickcheck(avg_pool_1) -add_nnapi_quickcheck(avg_pool_quan_1) -add_nnapi_quickcheck(concat_1) -add_nnapi_quickcheck(concat_quan_1) -add_nnapi_quickcheck(reshape_1) -add_nnapi_quickcheck(reshape_quan_1) -add_nnapi_quickcheck(fully_connected_1) -add_nnapi_quickcheck(fully_connected_quan_1) -add_nnapi_quickcheck(softmax_1) -add_nnapi_quickcheck(softmax_2) -add_nnapi_quickcheck(softmax_quan_1) -add_nnapi_quickcheck(resize_bilinear_1) -add_nnapi_quickcheck(topk_v2_1) -add_nnapi_quickcheck(cast_1) -add_nnapi_quickcheck(cast_q_to_f_1) -add_nnapi_quickcheck(cast_2) -add_nnapi_quickcheck(gather_1) -add_nnapi_quickcheck(gather_2) -add_nnapi_quickcheck(dequantize_1) -add_nnapi_quickcheck(tanh_1) -add_nnapi_quickcheck(logistic_quan_1) -add_nnapi_quickcheck(split_1) -add_nnapi_quickcheck(split_2) -add_nnapi_quickcheck(split_3) -add_nnapi_quickcheck(split_4) diff --git a/tools/nnapi_quickcheck/inc/env.h b/tools/nnapi_quickcheck/inc/env.h deleted file mode 100644 index c2efceb..0000000 --- a/tools/nnapi_quickcheck/inc/env.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ENV_UTILS_H__ -#define __ENV_UTILS_H__ - -#include - -#include - -class IntVar -{ -public: - IntVar(const std::string &name, int32_t value); - -public: - int32_t operator()(void) const { return _value; } - -private: - int32_t _value; -}; - -class FloatVar -{ -public: - FloatVar(const std::string &name, float value); - -public: - float operator()(void) const { return _value; } - -private: - float _value; -}; - -class StrVar -{ -public: - StrVar(const std::string &name, const std::string &value); - -public: - const std::string &operator()(void) const { return _value; } - -private: - std::string _value; -}; - -#endif // __ENV_UTILS_H__ diff --git a/tools/nnapi_quickcheck/lib/env.cpp b/tools/nnapi_quickcheck/lib/env.cpp deleted file mode 100644 index 005e876..0000000 --- a/tools/nnapi_quickcheck/lib/env.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "env.h" - -#include "misc/environment.h" - -// -// Integer variable -// -IntVar::IntVar(const std::string &name, int32_t value) : _value{value} -{ - nnfw::misc::env::IntAccessor{name}.access(_value); -} - -// -// Float variable -// -FloatVar::FloatVar(const std::string &name, float value) : _value{value} -{ - nnfw::misc::env::FloatAccessor{name}.access(_value); -} - -// -// String variable -// -#include - -StrVar::StrVar(const std::string &name, const std::string &value) : _value{value} -{ - auto env = std::getenv(name.c_str()); - - if (env) - { - _value = std::string{env}; - } -} diff --git a/tools/nnapi_quickcheck/tests/add_1.cpp b/tools/nnapi_quickcheck/tests/add_1.cpp deleted file mode 100644 index f5363f9..0000000 --- a/tools/nnapi_quickcheck/tests/add_1.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_1.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, - quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Left from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_1.lst b/tools/nnapi_quickcheck/tests/add_1.lst deleted file mode 100644 index fa17cae..0000000 --- a/tools/nnapi_quickcheck/tests/add_1.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 16) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_C, 3) -INT_VALUE(RIGHT_H, 16) -INT_VALUE(RIGHT_W, 16) diff --git a/tools/nnapi_quickcheck/tests/add_2.cpp b/tools/nnapi_quickcheck/tests/add_2.cpp deleted file mode 100644 index fe4d12f..0000000 --- a/tools/nnapi_quickcheck/tests/add_2.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_2, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_2.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure left data - const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W; - float left_data[left_size] = { - 0.0f, - }; - - // Fill left data with random data - { - std::normal_distribution left_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < left_size; ++off) - { - left_data[off++] = left_dist(random); - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization, - reinterpret_cast(left_data), - left_size * sizeof(float)); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, - quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read LHS from Tensor #1 - // - Read RHS from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_2.lst b/tools/nnapi_quickcheck/tests/add_2.lst deleted file mode 100644 index fa17cae..0000000 --- a/tools/nnapi_quickcheck/tests/add_2.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 16) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_C, 3) -INT_VALUE(RIGHT_H, 16) -INT_VALUE(RIGHT_W, 16) diff --git a/tools/nnapi_quickcheck/tests/add_3.cpp b/tools/nnapi_quickcheck/tests/add_3.cpp deleted file mode 100644 index ce409cc..0000000 --- a/tools/nnapi_quickcheck/tests/add_3.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" - -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/TensorShapeUtils.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_3, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - -#define STR_VALUE(NAME, VALUE) StrVar NAME##_Value(#NAME, VALUE); -#include "add_3.lst" -#undef STR_VALUE - - const auto LHS_SHAPE = nnfw::misc::tensor::Shape::from(LHS_SHAPE_Value()); - const auto RHS_SHAPE = nnfw::misc::tensor::Shape::from(RHS_SHAPE_Value()); - const auto OUT_SHAPE = nnfw::tflite::broadcast(LHS_SHAPE, RHS_SHAPE); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LHS_SHAPE); - PRINT_VALUE(RHS_SHAPE); - PRINT_VALUE(OUT_SHAPE); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - using nnfw::tflite::as_dims; - - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - as_dims(OUT_SHAPE), quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - as_dims(LHS_SHAPE), quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - as_dims(RHS_SHAPE), quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Left from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = 0; - param.tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(param.verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(param.tolerance); - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_3.lst b/tools/nnapi_quickcheck/tests/add_3.lst deleted file mode 100644 index 1981db4..0000000 --- a/tools/nnapi_quickcheck/tests/add_3.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef STR_VALUE -#error "STR_VALUE should be defined" -#endif // STR_VALUE - -STR_VALUE(LHS_SHAPE, "1,3,16,16") -STR_VALUE(RHS_SHAPE, "1,3,16,16") diff --git a/tools/nnapi_quickcheck/tests/add_4.cpp b/tools/nnapi_quickcheck/tests/add_4.cpp deleted file mode 100644 index b1231dd..0000000 --- a/tools/nnapi_quickcheck/tests/add_4.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_4, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_4.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, - quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Left from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_4.lst b/tools/nnapi_quickcheck/tests/add_4.lst deleted file mode 100644 index 6b28900..0000000 --- a/tools/nnapi_quickcheck/tests/add_4.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 2) -INT_VALUE(LEFT_H, 16) -INT_VALUE(LEFT_W, 8) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_C, 2) -INT_VALUE(RIGHT_H, 1) -INT_VALUE(RIGHT_W, 8) diff --git a/tools/nnapi_quickcheck/tests/add_5.cpp b/tools/nnapi_quickcheck/tests/add_5.cpp deleted file mode 100644 index f900153c..0000000 --- a/tools/nnapi_quickcheck/tests/add_5.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_5, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_5.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT = RIGHT_Value(); - - const int32_t OFM_N = LEFT_N; - const int32_t OFM_C = LEFT_C; - const int32_t OFM_H = LEFT_H; - const int32_t OFM_W = LEFT_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT} /* dims */, quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Left from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_5.lst b/tools/nnapi_quickcheck/tests/add_5.lst deleted file mode 100644 index eb316b6..0000000 --- a/tools/nnapi_quickcheck/tests/add_5.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 8) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT, 1) diff --git a/tools/nnapi_quickcheck/tests/add_6.cpp b/tools/nnapi_quickcheck/tests/add_6.cpp deleted file mode 100644 index 83b87ef..0000000 --- a/tools/nnapi_quickcheck/tests/add_6.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_6, simple_test) -{ - int verbose = 1; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_6.lst" -#undef INT_VALUE - - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT = RIGHT_Value(); - - const int32_t OFM_H = LEFT_H; - const int32_t OFM_W = LEFT_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_H, LEFT_W} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT} /* dims */, quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Left from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_6.lst b/tools/nnapi_quickcheck/tests/add_6.lst deleted file mode 100644 index 75db4c8..0000000 --- a/tools/nnapi_quickcheck/tests/add_6.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_H, 8) -INT_VALUE(LEFT_W, 2) - -INT_VALUE(RIGHT, 1) diff --git a/tools/nnapi_quickcheck/tests/add_7.cpp b/tools/nnapi_quickcheck/tests/add_7.cpp deleted file mode 100644 index 732320f..0000000 --- a/tools/nnapi_quickcheck/tests/add_7.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_7, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_7.lst" -#undef INT_VALUE - - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_C = LEFT_C; - const int32_t OFM_H = LEFT_H; - const int32_t OFM_W = LEFT_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Left from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_7.lst b/tools/nnapi_quickcheck/tests/add_7.lst deleted file mode 100644 index 1dc8b61..0000000 --- a/tools/nnapi_quickcheck/tests/add_7.lst +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 8) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT_C, 3) -INT_VALUE(RIGHT_H, 8) -INT_VALUE(RIGHT_W, 1) diff --git a/tools/nnapi_quickcheck/tests/add_8.cpp b/tools/nnapi_quickcheck/tests/add_8.cpp deleted file mode 100644 index d89e977..0000000 --- a/tools/nnapi_quickcheck/tests/add_8.cpp +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_8, simple_test) -{ - int verbose = 1; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_8.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure left data - const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W; - const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W; - float left_data[left_size] = { - 0.0f, - }; - float right_data[right_size] = { - 0.0f, - }; - - // Fill left data with random data - { - std::normal_distribution left_dist(-1.0f, +1.0f); - int value = 10; - for (uint32_t off = 0; off < left_size; ++off) - { - left_data[off] = value; - std::cout << left_data[off] << std::endl; - } - value = 1; - for (uint32_t off = 0; off < right_size; ++off) - { - right_data[off] = value++; - std::cout << right_data[off] << std::endl; - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization, - reinterpret_cast(left_data), - left_size * sizeof(float)); - - // Configure input(s) - interp.SetTensorParametersReadOnly( - 2, kTfLiteFloat32 /* type */, "right" /* name */, {RIGHT_C} /* dims */, quantization, - //{RIGHT_W, RIGHT_C} /* dims */, quantization, - reinterpret_cast(right_data), right_size * sizeof(float)); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read LHS from Tensor #1 - // - Read RHS from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_8.lst b/tools/nnapi_quickcheck/tests/add_8.lst deleted file mode 100644 index 3119c7f..0000000 --- a/tools/nnapi_quickcheck/tests/add_8.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_H, 3) -INT_VALUE(LEFT_W, 2) -INT_VALUE(LEFT_C, 4) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_H, 1) -INT_VALUE(RIGHT_W, 1) -INT_VALUE(RIGHT_C, 4) diff --git a/tools/nnapi_quickcheck/tests/add_9.cpp b/tools/nnapi_quickcheck/tests/add_9.cpp deleted file mode 100644 index fd4e1f9..0000000 --- a/tools/nnapi_quickcheck/tests/add_9.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_9, simple_test) -{ - int verbose = 1; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_9.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_VALUE(LEFT_C); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_VALUE(RIGHT_C); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); - PRINT_VALUE(OFM_C); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure left data - const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W; - const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W; - float left_data[left_size] = { - 0.0f, - }; - float right_data[right_size] = { - 0.0f, - }; - - // Fill left data with random data - { - std::normal_distribution left_dist(-1.0f, +1.0f); - float value = 10.0f; - for (uint32_t off = 0; off < left_size; ++off) - { - left_data[off] = value; - } - value = 1.0f; - for (uint32_t off = 0; off < right_size; ++off) - { - right_data[off] = value++; - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadOnly( - 1, kTfLiteFloat32 /* type */, "left" /* name */, {LEFT_W, LEFT_C} /* dims */, quantization, - reinterpret_cast(left_data), left_size * sizeof(float)); - - // Configure input(s) - interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, - quantization, reinterpret_cast(right_data), - right_size * sizeof(float)); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read LHS from Tensor #1 - // - Read RHS from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_9.lst b/tools/nnapi_quickcheck/tests/add_9.lst deleted file mode 100644 index 52a1f1a..0000000 --- a/tools/nnapi_quickcheck/tests/add_9.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_H, 1) -INT_VALUE(LEFT_W, 3) -INT_VALUE(LEFT_C, 4) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_H, 2) -INT_VALUE(RIGHT_W, 3) -INT_VALUE(RIGHT_C, 4) diff --git a/tools/nnapi_quickcheck/tests/add_quan_1.cpp b/tools/nnapi_quickcheck/tests/add_quan_1.cpp deleted file mode 100644 index e3d8512..0000000 --- a/tools/nnapi_quickcheck/tests/add_quan_1.cpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_add_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "add_quan_1.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - quantization.scale = 2.0f; - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - quantization.scale = 1.0f; - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteUInt8 /* type */, "right" /* name */, - {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, - quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Left from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/add_quan_1.lst b/tools/nnapi_quickcheck/tests/add_quan_1.lst deleted file mode 100644 index fa17cae..0000000 --- a/tools/nnapi_quickcheck/tests/add_quan_1.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 16) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_C, 3) -INT_VALUE(RIGHT_H, 16) -INT_VALUE(RIGHT_W, 16) diff --git a/tools/nnapi_quickcheck/tests/avg_pool_1.cpp b/tools/nnapi_quickcheck/tests/avg_pool_1.cpp deleted file mode 100644 index 052c689..0000000 --- a/tools/nnapi_quickcheck/tests/avg_pool_1.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_avg_pool_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "avg_pool_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = KER_W_Value(); - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = (IFM_H - KER_H) + 1; - const int32_t OFM_W = (IFM_W - KER_W) + 1; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add Max Pooling Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->padding = kTfLitePaddingValid; - param->stride_width = 1; - param->stride_height = 1; - param->filter_width = KER_W; - param->filter_height = KER_H; - param->activation = kTfLiteActNone; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_AVERAGE_POOL_2D, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/avg_pool_1.lst b/tools/nnapi_quickcheck/tests/avg_pool_1.lst deleted file mode 100644 index 02d86d4..0000000 --- a/tools/nnapi_quickcheck/tests/avg_pool_1.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_H, 3) -INT_VALUE(KER_W, 4) diff --git a/tools/nnapi_quickcheck/tests/avg_pool_quan_1.cpp b/tools/nnapi_quickcheck/tests/avg_pool_quan_1.cpp deleted file mode 100644 index 86f35f7..0000000 --- a/tools/nnapi_quickcheck/tests/avg_pool_quan_1.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_avg_pool_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "avg_pool_quan_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = KER_W_Value(); - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = (IFM_H - KER_H) + 1; - const int32_t OFM_W = (IFM_W - KER_W) + 1; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - quantization.scale = 1.0f; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add Max Pooling Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->padding = kTfLitePaddingValid; - param->stride_width = 1; - param->stride_height = 1; - param->filter_width = KER_W; - param->filter_height = KER_H; - param->activation = kTfLiteActNone; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_AVERAGE_POOL_2D, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/avg_pool_quan_1.lst b/tools/nnapi_quickcheck/tests/avg_pool_quan_1.lst deleted file mode 100644 index 02d86d4..0000000 --- a/tools/nnapi_quickcheck/tests/avg_pool_quan_1.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_H, 3) -INT_VALUE(KER_W, 4) diff --git a/tools/nnapi_quickcheck/tests/cast_1.cpp b/tools/nnapi_quickcheck/tests/cast_1.cpp deleted file mode 100644 index 788cd57..0000000 --- a/tools/nnapi_quickcheck/tests/cast_1.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_cast_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "cast_1.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - TfLiteQuantizationParams quantization; - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add Cast Node - // Run CAST and store the result into Tensor #0 - // - Read input from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1)); - - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/cast_1.lst b/tools/nnapi_quickcheck/tests/cast_1.lst deleted file mode 100644 index a0077cb..0000000 --- a/tools/nnapi_quickcheck/tests/cast_1.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_N, 1) -INT_VALUE(IFM_C, 3) -INT_VALUE(IFM_H, 320) -INT_VALUE(IFM_W, 320) diff --git a/tools/nnapi_quickcheck/tests/cast_2.cpp b/tools/nnapi_quickcheck/tests/cast_2.cpp deleted file mode 100644 index a9e99ee..0000000 --- a/tools/nnapi_quickcheck/tests/cast_2.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_cast_2, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "cast_2.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input - interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add Cast Node - // Run CAST and store the result into Tensor #0 - // - Read input from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1)); - - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/cast_2.lst b/tools/nnapi_quickcheck/tests/cast_2.lst deleted file mode 100644 index a0077cb..0000000 --- a/tools/nnapi_quickcheck/tests/cast_2.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_N, 1) -INT_VALUE(IFM_C, 3) -INT_VALUE(IFM_H, 320) -INT_VALUE(IFM_W, 320) diff --git a/tools/nnapi_quickcheck/tests/cast_q_to_f_1.cpp b/tools/nnapi_quickcheck/tests/cast_q_to_f_1.cpp deleted file mode 100644 index 4af6c77..0000000 --- a/tools/nnapi_quickcheck/tests/cast_q_to_f_1.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_cast_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "cast_q_to_f_1.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - TfLiteQuantizationParams quantization; - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add Cast Node - // Run CAST and store the result into Tensor #0 - // - Read input from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1)); - - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/cast_q_to_f_1.lst b/tools/nnapi_quickcheck/tests/cast_q_to_f_1.lst deleted file mode 100644 index a0077cb..0000000 --- a/tools/nnapi_quickcheck/tests/cast_q_to_f_1.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_N, 1) -INT_VALUE(IFM_C, 3) -INT_VALUE(IFM_H, 320) -INT_VALUE(IFM_W, 320) diff --git a/tools/nnapi_quickcheck/tests/concat_1.cpp b/tools/nnapi_quickcheck/tests/concat_1.cpp deleted file mode 100644 index d2cb1aa..0000000 --- a/tools/nnapi_quickcheck/tests/concat_1.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_concat_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "concat_1.lst" -#undef INT_VALUE - - // TODO Allow users to set concat axis! - const int32_t CONCAT_COUNT = CONCAT_COUNT_Value(); - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - int32_t OFM_C = 0; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(CONCAT_COUNT); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Randomize IFM depth - std::default_random_engine generator(SEED); - std::uniform_int_distribution distribution(1, 8); - - std::vector depths; - - for (int32_t n = 0; n < CONCAT_COUNT; ++n) - { - const auto depth = distribution(generator); - - OFM_C += depth; - depths.emplace_back(depth); - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(depths.size() + 1); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM(s) - std::vector ifm_indexes; - - for (uint32_t n = 0; n < depths.size(); ++n) - { - const auto ifm_index = 1 + n; - const auto IFM_C = depths.at(n); - - interp.SetTensorParametersReadWrite(ifm_index, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - ifm_indexes.emplace_back(ifm_index); - } - - // Add Concat Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - param->axis = 3; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters(ifm_indexes, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_CONCATENATION, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs(ifm_indexes); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/concat_1.lst b/tools/nnapi_quickcheck/tests/concat_1.lst deleted file mode 100644 index db70d4c..0000000 --- a/tools/nnapi_quickcheck/tests/concat_1.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(CONCAT_COUNT, 3) - -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) diff --git a/tools/nnapi_quickcheck/tests/concat_quan_1.cpp b/tools/nnapi_quickcheck/tests/concat_quan_1.cpp deleted file mode 100644 index f861ac8..0000000 --- a/tools/nnapi_quickcheck/tests/concat_quan_1.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_concat_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "concat_quan_1.lst" -#undef INT_VALUE - - // TODO Allow users to set concat axis! - const int32_t CONCAT_COUNT = CONCAT_COUNT_Value(); - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - int32_t OFM_C = 0; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(CONCAT_COUNT); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Randomize IFM depth - std::default_random_engine generator(SEED); - std::uniform_int_distribution distribution(1, 8); - - std::vector depths; - - for (int32_t n = 0; n < CONCAT_COUNT; ++n) - { - const auto depth = distribution(generator); - - OFM_C += depth; - depths.emplace_back(depth); - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - quantization.scale = 1.0f; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(depths.size() + 1); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM(s) - std::vector ifm_indexes; - - for (uint32_t n = 0; n < depths.size(); ++n) - { - const auto ifm_index = 1 + n; - const auto IFM_C = depths.at(n); - - interp.SetTensorParametersReadWrite(ifm_index, kTfLiteUInt8 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - ifm_indexes.emplace_back(ifm_index); - } - - // Add Concat Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - param->axis = 3; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters(ifm_indexes, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_CONCATENATION, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs(ifm_indexes); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/concat_quan_1.lst b/tools/nnapi_quickcheck/tests/concat_quan_1.lst deleted file mode 100644 index db70d4c..0000000 --- a/tools/nnapi_quickcheck/tests/concat_quan_1.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(CONCAT_COUNT, 3) - -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) diff --git a/tools/nnapi_quickcheck/tests/conv_1.cpp b/tools/nnapi_quickcheck/tests/conv_1.cpp deleted file mode 100644 index b5b145c..0000000 --- a/tools/nnapi_quickcheck/tests/conv_1.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_conv_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "conv_1.lst" -#undef INT_VALUE - - const int32_t STRIDE_H = STRIDE_H_Value(); - const int32_t STRIDE_W = STRIDE_W_Value(); - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_N = KER_N_Value(); - const int32_t KER_C = IFM_C_Value(); - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = KER_W_Value(); - - const int32_t OFM_C = KER_N; - const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1; - const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(STRIDE_H); - PRINT_VALUE(STRIDE_W); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_N); - PRINT_VALUE(KER_C); - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure Kernel Data - const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W; - float kernel_data[kernel_size] = { - 0.0f, - }; - - // Fill kernel data with random data - { - std::normal_distribution kernel_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < kernel_size; ++off) - { - kernel_data[off++] = kernel_dist(random); - } - } - - // Configure Bias Data - const auto bias_size = KER_N; - float bias_data[bias_size] = { - 0.0f, - }; - - // Fill bias data with random data - { - std::normal_distribution bias_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < bias_size; ++off) - { - bias_data[off] = bias_dist(random); - } - } - - // Assumption on this example - assert(IFM_C == KER_C); - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(5); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // NOTE kernel_data & bias_data should live longer than interpreter! - interp.SetTensorParametersReadOnly( - 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */, - quantization, reinterpret_cast(kernel_data), kernel_size * sizeof(float)); - - interp.SetTensorParametersReadOnly( - 3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization, - reinterpret_cast(bias_data), bias_size * sizeof(float)); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->padding = kTfLitePaddingValid; - param->stride_width = STRIDE_W; - param->stride_height = STRIDE_H; - param->activation = kTfLiteActRelu; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - // - Read Filter from Tensor #2, - // - Read Bias from Tensor #3 - interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/conv_1.lst b/tools/nnapi_quickcheck/tests/conv_1.lst deleted file mode 100644 index c01fc90..0000000 --- a/tools/nnapi_quickcheck/tests/conv_1.lst +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_N, 1) -INT_VALUE(KER_H, 3) -INT_VALUE(KER_W, 4) - -INT_VALUE(STRIDE_H, 1) -INT_VALUE(STRIDE_W, 1) diff --git a/tools/nnapi_quickcheck/tests/conv_quan_1.cpp b/tools/nnapi_quickcheck/tests/conv_quan_1.cpp deleted file mode 100644 index 2824547..0000000 --- a/tools/nnapi_quickcheck/tests/conv_quan_1.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_conv_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "conv_quan_1.lst" -#undef INT_VALUE - - const int32_t STRIDE_H = STRIDE_H_Value(); - const int32_t STRIDE_W = STRIDE_W_Value(); - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_N = KER_N_Value(); - const int32_t KER_C = IFM_C_Value(); - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = KER_W_Value(); - - const int32_t OFM_C = KER_N; - const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1; - const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(STRIDE_H); - PRINT_VALUE(STRIDE_W); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_N); - PRINT_VALUE(KER_C); - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure Kernel Data - const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W; - float kernel_data[kernel_size] = { - 0.0f, - }; - - // Fill kernel data with random data - { - std::normal_distribution kernel_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < kernel_size; ++off) - { - kernel_data[off++] = kernel_dist(random); - } - } - - // Configure Bias Data - const auto bias_size = KER_N; - int32_t bias_data[bias_size] = { - 0, - }; - - // Fill bias data with random data - { - std::normal_distribution bias_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < bias_size; ++off) - { - bias_data[off] = static_cast(bias_dist(random)); - } - } - - // Assumption on this example - assert(IFM_C == KER_C); - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - TfLiteQuantizationParams quantization; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(5); - - // Configure OFM - float max_scale = (KER_N, KER_C * KER_H * KER_W) * - std::numeric_limits::max(); // * IFM_scale(1.0f) * kernel_scale(1.0f) - quantization.scale = max_scale; - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - quantization.scale = 1.0f; - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // NOTE kernel_data & bias_data should live longer than interpreter! - interp.SetTensorParametersReadOnly( - 2, kTfLiteUInt8 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */, - quantization, reinterpret_cast(kernel_data), kernel_size * sizeof(uint8_t)); - - quantization.scale *= quantization.scale; - interp.SetTensorParametersReadOnly( - 3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization, - reinterpret_cast(bias_data), bias_size * sizeof(int32_t)); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->padding = kTfLitePaddingValid; - param->stride_width = STRIDE_W; - param->stride_height = STRIDE_H; - param->activation = kTfLiteActRelu; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - // - Read Filter from Tensor #2, - // - Read Bias from Tensor #3 - interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/conv_quan_1.lst b/tools/nnapi_quickcheck/tests/conv_quan_1.lst deleted file mode 100644 index c01fc90..0000000 --- a/tools/nnapi_quickcheck/tests/conv_quan_1.lst +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_N, 1) -INT_VALUE(KER_H, 3) -INT_VALUE(KER_W, 4) - -INT_VALUE(STRIDE_H, 1) -INT_VALUE(STRIDE_W, 1) diff --git a/tools/nnapi_quickcheck/tests/dconv_1.cpp b/tools/nnapi_quickcheck/tests/dconv_1.cpp deleted file mode 100644 index 36ec7a9..0000000 --- a/tools/nnapi_quickcheck/tests/dconv_1.cpp +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_dconv_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "dconv_1.lst" -#undef INT_VALUE - - const int32_t STRIDE_H = STRIDE_H_Value(); - const int32_t STRIDE_W = STRIDE_W_Value(); - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_C = KER_C_Value(); - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = KER_W_Value(); - - const int32_t OFM_C = KER_C; - const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1; - const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1; - - const int32_t MULTIPLIER = MULTIPLIER_Value(); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_C); - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(STRIDE_H); - PRINT_VALUE(STRIDE_W); - PRINT_NEWLINE(); - - PRINT_VALUE(MULTIPLIER); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - assert(MULTIPLIER * IFM_C == KER_C); - - // Configure Kernel Data - const uint32_t kernel_size = KER_C * KER_H * KER_W; - float kernel_data[kernel_size] = { - 0.0f, - }; - - // Fill kernel data with random data - { - std::normal_distribution kernel_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < kernel_size; ++off) - { - kernel_data[off] = kernel_dist(random); - } - } - - // Configure Bias Data - const auto bias_size = KER_C; - float bias_data[bias_size] = { - 0.0f, - }; - - // Fill bias data with random data - { - std::normal_distribution bias_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < bias_size; ++off) - { - bias_data[off] = bias_dist(random); - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(4); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // NOTE kernel_data & bias_data should live longer than interpreter! - interp.SetTensorParametersReadOnly( - 2, kTfLiteFloat32 /* type */, "filter" /* name */, {1, KER_H, KER_W, KER_C} /* dims */, - quantization, reinterpret_cast(kernel_data), kernel_size * sizeof(float)); - - interp.SetTensorParametersReadOnly( - 3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization, - reinterpret_cast(bias_data), bias_size * sizeof(float)); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->padding = kTfLitePaddingValid; - param->stride_width = STRIDE_W; - param->stride_height = STRIDE_H; - param->depth_multiplier = MULTIPLIER; - param->activation = kTfLiteActRelu; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - // - Read Filter from Tensor #2, - // - Read Bias from Tensor #3 - interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_DEPTHWISE_CONV_2D, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/dconv_1.lst b/tools/nnapi_quickcheck/tests/dconv_1.lst deleted file mode 100644 index da851ae..0000000 --- a/tools/nnapi_quickcheck/tests/dconv_1.lst +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_C, 2) -INT_VALUE(KER_H, 3) -INT_VALUE(KER_W, 4) - -INT_VALUE(MULTIPLIER, 1) - -INT_VALUE(STRIDE_H, 1) -INT_VALUE(STRIDE_W, 1) diff --git a/tools/nnapi_quickcheck/tests/dconv_quan_1.cpp b/tools/nnapi_quickcheck/tests/dconv_quan_1.cpp deleted file mode 100644 index 8305ad1..0000000 --- a/tools/nnapi_quickcheck/tests/dconv_quan_1.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_dconv_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "dconv_quan_1.lst" -#undef INT_VALUE - - const int32_t STRIDE_H = STRIDE_H_Value(); - const int32_t STRIDE_W = STRIDE_W_Value(); - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_C = KER_C_Value(); - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = KER_W_Value(); - - const int32_t OFM_C = KER_C; - const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1; - const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1; - - const int32_t MULTIPLIER = MULTIPLIER_Value(); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_C); - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(STRIDE_H); - PRINT_VALUE(STRIDE_W); - PRINT_NEWLINE(); - - PRINT_VALUE(MULTIPLIER); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - assert(MULTIPLIER * IFM_C == KER_C); - - // Configure Kernel Data - const uint32_t kernel_size = KER_C * KER_H * KER_W; - float kernel_data[kernel_size] = { - 0.0f, - }; - - // Fill kernel data with random data - { - std::normal_distribution kernel_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < kernel_size; ++off) - { - kernel_data[off] = kernel_dist(random); - } - } - - // Configure Bias Data - const auto bias_size = KER_C; - int32_t bias_data[bias_size] = { - 0, - }; - - // Fill bias data with random data - { - std::normal_distribution bias_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < bias_size; ++off) - { - bias_data[off] = static_cast(bias_dist(random)); - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - TfLiteQuantizationParams quantization; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(4); - - // Configure OFM - float max_scale = (1 * KER_C * KER_H * KER_W) * - std::numeric_limits::max(); // * IFM_scale(1.0f) * kernel_scale(1.0f) - quantization.scale = max_scale; - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - quantization.scale = 1.0f; - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // NOTE kernel_data & bias_data should live longer than interpreter! - interp.SetTensorParametersReadOnly( - 2, kTfLiteUInt8 /* type */, "filter" /* name */, {1, KER_H, KER_W, KER_C} /* dims */, - quantization, reinterpret_cast(kernel_data), kernel_size * sizeof(uint8_t)); - - quantization.scale *= quantization.scale; - interp.SetTensorParametersReadOnly( - 3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization, - reinterpret_cast(bias_data), bias_size * sizeof(int32_t)); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->padding = kTfLitePaddingValid; - param->stride_width = STRIDE_W; - param->stride_height = STRIDE_H; - param->depth_multiplier = MULTIPLIER; - param->activation = kTfLiteActRelu; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - // - Read Filter from Tensor #2, - // - Read Bias from Tensor #3 - interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_DEPTHWISE_CONV_2D, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/dconv_quan_1.lst b/tools/nnapi_quickcheck/tests/dconv_quan_1.lst deleted file mode 100644 index da851ae..0000000 --- a/tools/nnapi_quickcheck/tests/dconv_quan_1.lst +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_C, 2) -INT_VALUE(KER_H, 3) -INT_VALUE(KER_W, 4) - -INT_VALUE(MULTIPLIER, 1) - -INT_VALUE(STRIDE_H, 1) -INT_VALUE(STRIDE_W, 1) diff --git a/tools/nnapi_quickcheck/tests/dequantize_1.cpp b/tools/nnapi_quickcheck/tests/dequantize_1.cpp deleted file mode 100644 index e725fa2..0000000 --- a/tools/nnapi_quickcheck/tests/dequantize_1.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_dequantize_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "dequantize_1.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - TfLiteQuantizationParams quantization; - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add DEQUANTIZE Node - // Run DEQUANTIZE and store the result into Tensor #0 - // - Read input from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_DEQUANTIZE, 1)); - - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/dequantize_1.lst b/tools/nnapi_quickcheck/tests/dequantize_1.lst deleted file mode 100644 index a0077cb..0000000 --- a/tools/nnapi_quickcheck/tests/dequantize_1.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_N, 1) -INT_VALUE(IFM_C, 3) -INT_VALUE(IFM_H, 320) -INT_VALUE(IFM_W, 320) diff --git a/tools/nnapi_quickcheck/tests/div_1.cpp b/tools/nnapi_quickcheck/tests/div_1.cpp deleted file mode 100644 index 26dfbbe..0000000 --- a/tools/nnapi_quickcheck/tests/div_1.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_div_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "div_1.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, - quantization); - - // Add Division Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Div and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_DIV, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/div_1.lst b/tools/nnapi_quickcheck/tests/div_1.lst deleted file mode 100644 index fa17cae..0000000 --- a/tools/nnapi_quickcheck/tests/div_1.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 16) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_C, 3) -INT_VALUE(RIGHT_H, 16) -INT_VALUE(RIGHT_W, 16) diff --git a/tools/nnapi_quickcheck/tests/div_2.cpp b/tools/nnapi_quickcheck/tests/div_2.cpp deleted file mode 100644 index df4efa4..0000000 --- a/tools/nnapi_quickcheck/tests/div_2.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_div_2, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "div_2.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT = RIGHT_Value(); - - const int32_t OFM_N = LEFT_N; - const int32_t OFM_C = LEFT_C; - const int32_t OFM_H = LEFT_H; - const int32_t OFM_W = LEFT_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT} /* dims */, quantization); - - // Add Division Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Div and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_DIV, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/div_2.lst b/tools/nnapi_quickcheck/tests/div_2.lst deleted file mode 100644 index cd36ac1..0000000 --- a/tools/nnapi_quickcheck/tests/div_2.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 16) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT, 1) diff --git a/tools/nnapi_quickcheck/tests/fully_connected_1.cpp b/tools/nnapi_quickcheck/tests/fully_connected_1.cpp deleted file mode 100644 index 43cd0a4..0000000 --- a/tools/nnapi_quickcheck/tests/fully_connected_1.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -template T *make_malloc(void) { return reinterpret_cast(malloc(sizeof(T))); } - -TEST(NNAPI_Quickcheck_fully_connected_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "conv_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_H = KER_N_Value(); - const int32_t KER_W = IFM_C_Value() * IFM_H_Value() * IFM_W_Value(); - - const int32_t OUT_LEN = KER_H; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OUT_LEN); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure Kernel Data - const uint32_t kernel_size = KER_H * KER_W; - float kernel_data[kernel_size] = { - 0.0f, - }; - - // Fill kernel data with random data - { - std::normal_distribution kernel_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < kernel_size; ++off) - { - kernel_data[off++] = kernel_dist(random); - } - } - - // Configure Bias Data - const auto bias_size = KER_H; - float bias_data[bias_size] = { - 0.0f, - }; - - // Fill bias data with random data - { - std::normal_distribution bias_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < bias_size; ++off) - { - bias_data[off] = bias_dist(random); - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(4); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, KER_H} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // NOTE kernel_data & bias_data should live longer than interpreter! - interp.SetTensorParametersReadOnly( - 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_H, KER_W} /* dims */, quantization, - reinterpret_cast(kernel_data), kernel_size * sizeof(float)); - - interp.SetTensorParametersReadOnly( - 3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization, - reinterpret_cast(bias_data), bias_size * sizeof(float)); - - // Add Fully Connected Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_malloc(); - - param->activation = kTfLiteActRelu; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - // - Read Filter from Tensor #2, - // - Read Bias from Tensor #3 - interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_FULLY_CONNECTED, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/fully_connected_1.lst b/tools/nnapi_quickcheck/tests/fully_connected_1.lst deleted file mode 100644 index 22acb9f..0000000 --- a/tools/nnapi_quickcheck/tests/fully_connected_1.lst +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_H, 1) diff --git a/tools/nnapi_quickcheck/tests/fully_connected_quan_1.cpp b/tools/nnapi_quickcheck/tests/fully_connected_quan_1.cpp deleted file mode 100644 index 2c68835..0000000 --- a/tools/nnapi_quickcheck/tests/fully_connected_quan_1.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -template T *make_malloc(void) { return reinterpret_cast(malloc(sizeof(T))); } - -TEST(NNAPI_Quickcheck_fully_connected_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "fully_connected_quan_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = IFM_C_Value() * IFM_H_Value() * IFM_W_Value(); - - const int32_t OUT_LEN = KER_H; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OUT_LEN); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure Kernel Data - const uint32_t kernel_size = KER_H * KER_W; - float kernel_data[kernel_size] = { - 0.0f, - }; - - // Fill kernel data with random data - { - std::normal_distribution kernel_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < kernel_size; ++off) - { - kernel_data[off++] = kernel_dist(random); - } - } - - // Configure Bias Data - const auto bias_size = KER_H; - int32_t bias_data[bias_size] = { - 0, - }; - - // Fill bias data with random data - { - std::normal_distribution bias_dist(-1.0f, +1.0f); - - for (uint32_t off = 0; off < bias_size; ++off) - { - bias_data[off] = static_cast(bias_dist(random)); - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - quantization.scale = FLOAT_NEAREST_TO_1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(4); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {1 /*N*/, KER_H} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // NOTE kernel_data & bias_data should live longer than interpreter! - interp.SetTensorParametersReadOnly( - 2, kTfLiteUInt8 /* type */, "filter" /* name */, {KER_H, KER_W} /* dims */, quantization, - reinterpret_cast(kernel_data), kernel_size * sizeof(uint8_t)); - - interp.SetTensorParametersReadOnly( - 3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization, - reinterpret_cast(bias_data), bias_size * sizeof(int32_t)); - - // Add Fully Connected Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_malloc(); - - param->activation = kTfLiteActRelu; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - // - Read Filter from Tensor #2, - // - Read Bias from Tensor #3 - interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_FULLY_CONNECTED, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/fully_connected_quan_1.lst b/tools/nnapi_quickcheck/tests/fully_connected_quan_1.lst deleted file mode 100644 index 22acb9f..0000000 --- a/tools/nnapi_quickcheck/tests/fully_connected_quan_1.lst +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_H, 1) diff --git a/tools/nnapi_quickcheck/tests/gather_1.cpp b/tools/nnapi_quickcheck/tests/gather_1.cpp deleted file mode 100644 index 4ab164e..0000000 --- a/tools/nnapi_quickcheck/tests/gather_1.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_gather_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "gather_1.lst" -#undef INT_VALUE - - const int32_t INPUT_DATA = INPUT_DATA_Value(); - const int32_t INDEX_DATA = INDEX_DATA_Value(); - - const int32_t OUTPUT_DATA = INDEX_DATA; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(INPUT_DATA); - PRINT_VALUE(INDEX_DATA); - PRINT_NEWLINE(); - - PRINT_VALUE(OUTPUT_DATA); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure INPUT_DATA - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */, - {INPUT_DATA} /* dims */, quantization); - - // Configure INDEX_DATA - interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "index" /* name */, - {INDEX_DATA} /* dims */, quantization); - - // Configure OUTPUT_VALUES - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_data" /* name */, - {OUTPUT_DATA} /* dims */, quantization); - - auto *param = reinterpret_cast(malloc(sizeof(TfLiteGatherParams))); - - param->axis = 0; - - // Add GATHER Node - // Run GATHER and store its result into Tensor #2 - // - Read input data and index_data from Tensor #0 and #1, respectively - interp.AddNodeWithParameters({0, 1}, {2}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_GATHER, 1)); - - // Set Tensor #0 and #1 as Input, and Tensor #2 as Output - interp.SetInputs({0, 1}); - interp.SetOutputs({2}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/gather_1.lst b/tools/nnapi_quickcheck/tests/gather_1.lst deleted file mode 100644 index 923a056..0000000 --- a/tools/nnapi_quickcheck/tests/gather_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(INPUT_DATA, 8192) -INT_VALUE(INDEX_DATA, 300) diff --git a/tools/nnapi_quickcheck/tests/gather_2.cpp b/tools/nnapi_quickcheck/tests/gather_2.cpp deleted file mode 100644 index ac9ec8b..0000000 --- a/tools/nnapi_quickcheck/tests/gather_2.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_gather_2, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "gather_2.lst" -#undef INT_VALUE - - const int32_t INPUT_DATA_H = INPUT_DATA_H_Value(); - const int32_t INPUT_DATA_W = INPUT_DATA_W_Value(); - const int32_t INDEX_DATA = INDEX_DATA_Value(); - - const int32_t OUTPUT_DATA_H = INPUT_DATA_H; - const int32_t OUTPUT_DATA_W = INDEX_DATA; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(INPUT_DATA_H); - PRINT_VALUE(INPUT_DATA_W); - PRINT_VALUE(INDEX_DATA); - PRINT_NEWLINE(); - - PRINT_VALUE(OUTPUT_DATA_H); - PRINT_VALUE(OUTPUT_DATA_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure INPUT_DATA - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */, - {INPUT_DATA_H, INPUT_DATA_W} /* dims */, quantization); - - // Configure INDEX_DATA - interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "index" /* name */, - {INDEX_DATA} /* dims */, quantization); - - // Configure OUTPUT_VALUES - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_data" /* name */, - {OUTPUT_DATA_H, OUTPUT_DATA_W} /* dims */, quantization); - - auto *param = reinterpret_cast(malloc(sizeof(TfLiteGatherParams))); - - param->axis = 0; - - // Add GATHER Node - // Run GATHER and store its result into Tensor #2 - // - Read input data and index_data from Tensor #0 and #1, respectively - interp.AddNodeWithParameters({0, 1}, {2}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_GATHER, 1)); - - // Set Tensor #0 and #1 as Input, and Tensor #2 as Output - interp.SetInputs({0, 1}); - interp.SetOutputs({2}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/gather_2.lst b/tools/nnapi_quickcheck/tests/gather_2.lst deleted file mode 100644 index 5bf6bd3..0000000 --- a/tools/nnapi_quickcheck/tests/gather_2.lst +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(INPUT_DATA_H, 128192) -INT_VALUE(INPUT_DATA_W, 4) -INT_VALUE(INDEX_DATA, 300) diff --git a/tools/nnapi_quickcheck/tests/logistic_quan_1.cpp b/tools/nnapi_quickcheck/tests/logistic_quan_1.cpp deleted file mode 100644 index 0b0a690..0000000 --- a/tools/nnapi_quickcheck/tests/logistic_quan_1.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_logistic_quan_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "logistic_quan_1.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - TfLiteQuantizationParams in_quantization; - in_quantization.scale = 0.5f; - in_quantization.zero_point = 0; - - TfLiteQuantizationParams out_quantization; - out_quantization.scale = 1.f / 256; - out_quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, out_quantization); - - // Configure input - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, in_quantization); - - // Add Logistic Node - // Run Logistic and store the result into Tensor #0 - // - Read input from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_LOGISTIC, 1)); - - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/logistic_quan_1.lst b/tools/nnapi_quickcheck/tests/logistic_quan_1.lst deleted file mode 100644 index 9b3d8eb..0000000 --- a/tools/nnapi_quickcheck/tests/logistic_quan_1.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_N, 1) -INT_VALUE(IFM_C, 1) -INT_VALUE(IFM_H, 2) -INT_VALUE(IFM_W, 2) diff --git a/tools/nnapi_quickcheck/tests/max_pool_1.cpp b/tools/nnapi_quickcheck/tests/max_pool_1.cpp deleted file mode 100644 index 62f985d..0000000 --- a/tools/nnapi_quickcheck/tests/max_pool_1.cpp +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_max_pool_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "max_pool_1.lst" -#undef INT_VALUE - - const TfLitePadding PADDING_TYPE = static_cast(PADDING_TYPE_Value()); - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = KER_W_Value(); - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = OFM_H_Value(); - const int32_t OFM_W = OFM_W_Value(); - - assert((OFM_H >= (IFM_H - KER_H))); - assert((OFM_W >= (IFM_W - KER_W))); - assert((kTfLitePaddingSame == PADDING_TYPE) || (kTfLitePaddingValid == PADDING_TYPE)); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(PADDING_TYPE); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add Max Pooling Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->padding = PADDING_TYPE; - param->stride_width = 1; - param->stride_height = 1; - param->filter_width = KER_W; - param->filter_height = KER_H; - param->activation = kTfLiteActNone; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_MAX_POOL_2D, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/max_pool_1.lst b/tools/nnapi_quickcheck/tests/max_pool_1.lst deleted file mode 100644 index 4b5c130..0000000 --- a/tools/nnapi_quickcheck/tests/max_pool_1.lst +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_N, 1) -INT_VALUE(KER_H, 3) -INT_VALUE(KER_W, 4) - -INT_VALUE(OFM_H, 1) -INT_VALUE(OFM_W, 1) - -// Default is kTfLitePaddingValid (= 2) -INT_VALUE(PADDING_TYPE, 2) diff --git a/tools/nnapi_quickcheck/tests/max_pool_quan_1.cpp b/tools/nnapi_quickcheck/tests/max_pool_quan_1.cpp deleted file mode 100644 index 2c05a7d..0000000 --- a/tools/nnapi_quickcheck/tests/max_pool_quan_1.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_max_pool_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "max_pool_quan_1.lst" -#undef INT_VALUE - - const TfLitePadding PADDING_TYPE = static_cast(PADDING_TYPE_Value()); - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t KER_H = KER_H_Value(); - const int32_t KER_W = KER_W_Value(); - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = OFM_H_Value(); - const int32_t OFM_W = OFM_W_Value(); - - assert((OFM_H >= (IFM_H - KER_H))); - assert((OFM_W >= (IFM_W - KER_W))); - assert((kTfLitePaddingSame == PADDING_TYPE) || (kTfLitePaddingValid == PADDING_TYPE)); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(PADDING_TYPE); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(KER_H); - PRINT_VALUE(KER_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - quantization.scale = 1.0f; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add Max Pooling Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->padding = PADDING_TYPE; - param->stride_width = 1; - param->stride_height = 1; - param->filter_width = KER_W; - param->filter_height = KER_H; - param->activation = kTfLiteActNone; - - // Run Convolution and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_MAX_POOL_2D, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/max_pool_quan_1.lst b/tools/nnapi_quickcheck/tests/max_pool_quan_1.lst deleted file mode 100644 index 4b5c130..0000000 --- a/tools/nnapi_quickcheck/tests/max_pool_quan_1.lst +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(KER_N, 1) -INT_VALUE(KER_H, 3) -INT_VALUE(KER_W, 4) - -INT_VALUE(OFM_H, 1) -INT_VALUE(OFM_W, 1) - -// Default is kTfLitePaddingValid (= 2) -INT_VALUE(PADDING_TYPE, 2) diff --git a/tools/nnapi_quickcheck/tests/mul_1.cpp b/tools/nnapi_quickcheck/tests/mul_1.cpp deleted file mode 100644 index 57ab713..0000000 --- a/tools/nnapi_quickcheck/tests/mul_1.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_mul_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "mul_1.lst" -#undef INT_VALUE - - const int32_t LEFT_1D = LEFT_1D_Value(); - const int32_t LEFT_2D = LEFT_2D_Value(); - const int32_t LEFT_3D = LEFT_3D_Value(); - - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_1D = LEFT_1D_Value(); - const int32_t OFM_2D = LEFT_2D_Value(); - const int32_t OFM_3D = LEFT_3D_Value(); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_1D); - PRINT_VALUE(LEFT_2D); - PRINT_VALUE(LEFT_3D); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_1D); - PRINT_VALUE(OFM_2D); - PRINT_VALUE(OFM_3D); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_1D, OFM_2D, OFM_3D} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_1D, LEFT_2D, LEFT_3D} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_W} /* dims */, quantization); - - // Add MUL Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run MUL and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - param.tensor_logging = 1; - param.log_path = "report/tensor_mul_1.log"; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/mul_1.lst b/tools/nnapi_quickcheck/tests/mul_1.lst deleted file mode 100644 index 1d42159..0000000 --- a/tools/nnapi_quickcheck/tests/mul_1.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -// (3, 1, 4) -INT_VALUE(LEFT_1D, 3) -INT_VALUE(LEFT_2D, 1) -INT_VALUE(LEFT_3D, 4) - -INT_VALUE(RIGHT_W, 4) diff --git a/tools/nnapi_quickcheck/tests/mul_2.cpp b/tools/nnapi_quickcheck/tests/mul_2.cpp deleted file mode 100644 index a692616..0000000 --- a/tools/nnapi_quickcheck/tests/mul_2.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_mul_2, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "mul_2.lst" -#undef INT_VALUE - - const int32_t LEFT_D1 = LEFT_D1_Value(); - const int32_t LEFT_D2 = LEFT_D2_Value(); - const int32_t LEFT_D3 = LEFT_D3_Value(); - - const int32_t RIGHT_D1 = RIGHT_D1_Value(); - - const int32_t OFM_D1 = LEFT_D1; - const int32_t OFM_D2 = LEFT_D2; - const int32_t OFM_D3 = LEFT_D3; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_D1); - PRINT_VALUE(LEFT_D2); - PRINT_VALUE(LEFT_D3); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_D1); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_D1); - PRINT_VALUE(OFM_D2); - PRINT_VALUE(OFM_D3); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - - quantization.scale = 1; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_D1, OFM_D2, OFM_D3} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_D1, LEFT_D2, LEFT_D3} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_D1} /* dims */, quantization); - - // Add Convolution Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Add and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Left from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/mul_2.lst b/tools/nnapi_quickcheck/tests/mul_2.lst deleted file mode 100644 index da53e7e..0000000 --- a/tools/nnapi_quickcheck/tests/mul_2.lst +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_D1, 5) -INT_VALUE(LEFT_D2, 3) -INT_VALUE(LEFT_D3, 12) - -INT_VALUE(RIGHT_D1, 12) diff --git a/tools/nnapi_quickcheck/tests/mul_quan_1.cpp b/tools/nnapi_quickcheck/tests/mul_quan_1.cpp deleted file mode 100644 index 5f0061e..0000000 --- a/tools/nnapi_quickcheck/tests/mul_quan_1.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_mul_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "mul_1.lst" -#undef INT_VALUE - - const int32_t LEFT_1D = LEFT_1D_Value(); - const int32_t LEFT_2D = LEFT_2D_Value(); - const int32_t LEFT_3D = LEFT_3D_Value(); - - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_1D = LEFT_1D_Value(); - const int32_t OFM_2D = LEFT_2D_Value(); - const int32_t OFM_3D = LEFT_3D_Value(); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_1D); - PRINT_VALUE(LEFT_2D); - PRINT_VALUE(LEFT_3D); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_1D); - PRINT_VALUE(OFM_2D); - PRINT_VALUE(OFM_3D); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - float max_scale = - std::numeric_limits::max(); // * input1_scale(1.0f) * input2_scale(1.0f) - quantization.scale = max_scale; - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {OFM_1D, OFM_2D, OFM_3D} /* dims */, quantization); - - // Configure input(s) - quantization.scale = 1.0f; - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "left" /* name */, - {LEFT_1D, LEFT_2D, LEFT_3D} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteUInt8 /* type */, "right" /* name */, - {RIGHT_W} /* dims */, quantization); - - // Add MUL Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run MUL and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/mul_quan_1.lst b/tools/nnapi_quickcheck/tests/mul_quan_1.lst deleted file mode 100644 index d850f37..0000000 --- a/tools/nnapi_quickcheck/tests/mul_quan_1.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -// (300, 1, 4) -INT_VALUE(LEFT_1D, 300) -INT_VALUE(LEFT_2D, 1) -INT_VALUE(LEFT_3D, 4) - -INT_VALUE(RIGHT_W, 4) diff --git a/tools/nnapi_quickcheck/tests/relu1_1.cpp b/tools/nnapi_quickcheck/tests/relu1_1.cpp deleted file mode 100644 index 25e71dc..0000000 --- a/tools/nnapi_quickcheck/tests/relu1_1.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -int main(int argc, char **argv) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "relu1_1.lst" -#undef INT_VALUE - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_H, IFM_W} /* dims */, quantization); - - // Add ReLU Node - // Run ReLU and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_RELU_N1_TO_1, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - return RandomTestRunner{SEED, param}.run(builder); -} diff --git a/tools/nnapi_quickcheck/tests/relu1_1.lst b/tools/nnapi_quickcheck/tests/relu1_1.lst deleted file mode 100644 index 4f61845..0000000 --- a/tools/nnapi_quickcheck/tests/relu1_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 16) -INT_VALUE(IFM_W, 16) diff --git a/tools/nnapi_quickcheck/tests/relu6_1.cpp b/tools/nnapi_quickcheck/tests/relu6_1.cpp deleted file mode 100644 index 43e8383..0000000 --- a/tools/nnapi_quickcheck/tests/relu6_1.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_relu6_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "relu6_1.lst" -#undef INT_VALUE - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_H, IFM_W} /* dims */, quantization); - - // Add ReLU Node - // Run ReLU and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_RELU6, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/relu6_1.lst b/tools/nnapi_quickcheck/tests/relu6_1.lst deleted file mode 100644 index 4f61845..0000000 --- a/tools/nnapi_quickcheck/tests/relu6_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 16) -INT_VALUE(IFM_W, 16) diff --git a/tools/nnapi_quickcheck/tests/relu6_quan_1.cpp b/tools/nnapi_quickcheck/tests/relu6_quan_1.cpp deleted file mode 100644 index 8356442..0000000 --- a/tools/nnapi_quickcheck/tests/relu6_quan_1.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -int main(int argc, char **argv) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "relu6_quan_1.lst" -#undef INT_VALUE - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - quantization.scale = 1.0f; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {IFM_H, IFM_W} /* dims */, quantization); - - // Add ReLU Node - // Run ReLU and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_RELU6, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - return RandomTestRunner{SEED, param}.run(builder); -} diff --git a/tools/nnapi_quickcheck/tests/relu6_quan_1.lst b/tools/nnapi_quickcheck/tests/relu6_quan_1.lst deleted file mode 100644 index 4f61845..0000000 --- a/tools/nnapi_quickcheck/tests/relu6_quan_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 16) -INT_VALUE(IFM_W, 16) diff --git a/tools/nnapi_quickcheck/tests/relu_1.cpp b/tools/nnapi_quickcheck/tests/relu_1.cpp deleted file mode 100644 index decd0dd..0000000 --- a/tools/nnapi_quickcheck/tests/relu_1.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_relu_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "relu_1.lst" -#undef INT_VALUE - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_H, IFM_W} /* dims */, quantization); - - // Add ReLU Node - // Run ReLU and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/relu_1.lst b/tools/nnapi_quickcheck/tests/relu_1.lst deleted file mode 100644 index 4f61845..0000000 --- a/tools/nnapi_quickcheck/tests/relu_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 16) -INT_VALUE(IFM_W, 16) diff --git a/tools/nnapi_quickcheck/tests/relu_2.cpp b/tools/nnapi_quickcheck/tests/relu_2.cpp deleted file mode 100644 index ccb9f06..0000000 --- a/tools/nnapi_quickcheck/tests/relu_2.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_relu_2, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "relu_2.lst" -#undef INT_VALUE - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add ReLU Node - // Run ReLU and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/relu_2.lst b/tools/nnapi_quickcheck/tests/relu_2.lst deleted file mode 100644 index 343bff8..0000000 --- a/tools/nnapi_quickcheck/tests/relu_2.lst +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 16) -INT_VALUE(IFM_W, 16) -INT_VALUE(IFM_C, 3) diff --git a/tools/nnapi_quickcheck/tests/relu_3.cpp b/tools/nnapi_quickcheck/tests/relu_3.cpp deleted file mode 100644 index 59a8560..0000000 --- a/tools/nnapi_quickcheck/tests/relu_3.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_relu_3, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "relu_3.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add ReLU Node - // Run ReLU and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/relu_3.lst b/tools/nnapi_quickcheck/tests/relu_3.lst deleted file mode 100644 index a3a405c..0000000 --- a/tools/nnapi_quickcheck/tests/relu_3.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 16) -INT_VALUE(IFM_W, 16) -INT_VALUE(IFM_C, 3) -INT_VALUE(IFM_N, 1) diff --git a/tools/nnapi_quickcheck/tests/relu_quan_1.cpp b/tools/nnapi_quickcheck/tests/relu_quan_1.cpp deleted file mode 100644 index 303080e..0000000 --- a/tools/nnapi_quickcheck/tests/relu_quan_1.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -int main(int argc, char **argv) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "relu_quan_1.lst" -#undef INT_VALUE - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - quantization.scale = 1.0f; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {IFM_H, IFM_W} /* dims */, quantization); - - // Add ReLU Node - // Run ReLU and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - return RandomTestRunner{SEED, param}.run(builder); -} diff --git a/tools/nnapi_quickcheck/tests/relu_quan_1.lst b/tools/nnapi_quickcheck/tests/relu_quan_1.lst deleted file mode 100644 index 4f61845..0000000 --- a/tools/nnapi_quickcheck/tests/relu_quan_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 16) -INT_VALUE(IFM_W, 16) diff --git a/tools/nnapi_quickcheck/tests/reshape_1.cpp b/tools/nnapi_quickcheck/tests/reshape_1.cpp deleted file mode 100644 index 54cfce2..0000000 --- a/tools/nnapi_quickcheck/tests/reshape_1.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_reshape_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "max_pool_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OUT_L = IFM_C * IFM_H * IFM_W; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OUT_L); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t dims[2] = {1, OUT_L}; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and - // TENSOR_QUANT8_ASYMM are passed on to the runtime. - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, OUT_L} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Configure Shape - interp.SetTensorParametersReadOnly(2, kTfLiteInt32 /* type */, "shape" /* name */, - {2} /* dims */, quantization, - reinterpret_cast(dims), 2 * sizeof(int32_t)); - - // Add Reshape Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->num_dimensions = 2; - param->shape[0] = 1; - param->shape[1] = OUT_L; - - // Run Reshapeand store its result into Tensor #0 - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_RESHAPE, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/reshape_1.lst b/tools/nnapi_quickcheck/tests/reshape_1.lst deleted file mode 100644 index fcaaff0..0000000 --- a/tools/nnapi_quickcheck/tests/reshape_1.lst +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 4) -INT_VALUE(IFM_W, 8) diff --git a/tools/nnapi_quickcheck/tests/reshape_quan_1.cpp b/tools/nnapi_quickcheck/tests/reshape_quan_1.cpp deleted file mode 100644 index 8eb0bf3..0000000 --- a/tools/nnapi_quickcheck/tests/reshape_quan_1.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_reshape_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "reshape_quan_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OUT_L = IFM_C * IFM_H * IFM_W; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OUT_L); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t dims[2] = {1, OUT_L}; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and - // TENSOR_QUANT8_ASYMM are passed on to the runtime. - TfLiteQuantizationParams quantization; - quantization.scale = 1.0f; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {1 /*N*/, OUT_L} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Configure Shape - interp.SetTensorParametersReadOnly(2, kTfLiteInt32 /* type */, "shape" /* name */, - {2} /* dims */, quantization, - reinterpret_cast(dims), 2 * sizeof(int32_t)); - - // Add Reshape Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->num_dimensions = 2; - param->shape[0] = 1; - param->shape[1] = OUT_L; - - // Run Reshapeand store its result into Tensor #0 - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_RESHAPE, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/reshape_quan_1.lst b/tools/nnapi_quickcheck/tests/reshape_quan_1.lst deleted file mode 100644 index fcaaff0..0000000 --- a/tools/nnapi_quickcheck/tests/reshape_quan_1.lst +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 4) -INT_VALUE(IFM_W, 8) diff --git a/tools/nnapi_quickcheck/tests/resize_bilinear_1.cpp b/tools/nnapi_quickcheck/tests/resize_bilinear_1.cpp deleted file mode 100644 index 5b2d7b6..0000000 --- a/tools/nnapi_quickcheck/tests/resize_bilinear_1.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_resize_bilinear_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "resize_bilinear_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = OFM_H_Value(); - const int32_t OFM_W = OFM_W_Value(); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - int32_t size_data[2] = {OFM_H, OFM_W}; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and - // TENSOR_QUANT8_ASYMM are passed on to the runtime. - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure OFM - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure IFM - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Configure Size - interp.SetTensorParametersReadOnly( - 2, kTfLiteInt32 /* type */, "size" /* name */, {2} /* dims */, quantization, - reinterpret_cast(size_data), 2 * sizeof(int32_t)); - - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - // NOTE What is this? - param->align_corners = false; - - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_RESIZE_BILINEAR, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/resize_bilinear_1.lst b/tools/nnapi_quickcheck/tests/resize_bilinear_1.lst deleted file mode 100644 index cc3dbd5..0000000 --- a/tools/nnapi_quickcheck/tests/resize_bilinear_1.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_C, 2) -INT_VALUE(IFM_H, 3) -INT_VALUE(IFM_W, 4) - -INT_VALUE(OFM_H, 30) -INT_VALUE(OFM_W, 40) diff --git a/tools/nnapi_quickcheck/tests/softmax_1.cpp b/tools/nnapi_quickcheck/tests/softmax_1.cpp deleted file mode 100644 index 7142475..0000000 --- a/tools/nnapi_quickcheck/tests/softmax_1.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_softmax_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "softmax_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = 1; - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W}; - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1, IFM_H * IFM_W} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1, IFM_H * IFM_W} /* batch_size, input_size */, - quantization); - - // Add Softmax Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->beta = 1.0; - - // Run Softmax and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/softmax_1.lst b/tools/nnapi_quickcheck/tests/softmax_1.lst deleted file mode 100644 index 1ef9da0..0000000 --- a/tools/nnapi_quickcheck/tests/softmax_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 2) -INT_VALUE(IFM_W, 2) diff --git a/tools/nnapi_quickcheck/tests/softmax_2.cpp b/tools/nnapi_quickcheck/tests/softmax_2.cpp deleted file mode 100644 index df1ff27..0000000 --- a/tools/nnapi_quickcheck/tests/softmax_2.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_softmax_2, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define FLOAT_VALUE(NAME, VALUE) FloatVar NAME##_Value(#NAME, VALUE); -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "softmax_2.lst" -#undef INT_VALUE -#undef FLOAT_VALUE - - const int32_t IFM_C = 1; - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - const float BETA = BETA_Value(); - -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_VALUE(BETA); - PRINT_NEWLINE(); - -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W}; - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {1, IFM_H * IFM_W} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {1, IFM_H * IFM_W} /* batch_size, input_size */, - quantization); - - // Add Softmax Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->beta = BETA; - - // Run Softmax and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/softmax_2.lst b/tools/nnapi_quickcheck/tests/softmax_2.lst deleted file mode 100644 index 1c381bf..0000000 --- a/tools/nnapi_quickcheck/tests/softmax_2.lst +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -#ifndef FLOAT_VALUE -#error "FLOAT_VALUE should be defined" -#endif // FLOAT_VALUE - -INT_VALUE(IFM_H, 2) -INT_VALUE(IFM_W, 2) -FLOAT_VALUE(BETA, 0.1) diff --git a/tools/nnapi_quickcheck/tests/softmax_quan_1.cpp b/tools/nnapi_quickcheck/tests/softmax_quan_1.cpp deleted file mode 100644 index 5d38f77..0000000 --- a/tools/nnapi_quickcheck/tests/softmax_quan_1.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_softmax_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "softmax_quan_1.lst" -#undef INT_VALUE - - const int32_t IFM_C = 1; - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W}; - - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization; - quantization.scale = 1.0f / 256; - quantization.zero_point = 0; - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure Output Tensor - interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */, - {1, IFM_H * IFM_W} /* dims */, quantization); - - // Configure Input Tensor - interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */, - {1, IFM_H * IFM_W} /* batch_size, input_size */, - quantization); - - // Add Softmax Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->beta = 1.0; - - // Run Softmax and store its result into Tensor #0 - // - Read IFM from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1)); - - // Set Tensor #1 as Input #0, and Tensor #0 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/softmax_quan_1.lst b/tools/nnapi_quickcheck/tests/softmax_quan_1.lst deleted file mode 100644 index 1ef9da0..0000000 --- a/tools/nnapi_quickcheck/tests/softmax_quan_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 2) -INT_VALUE(IFM_W, 2) diff --git a/tools/nnapi_quickcheck/tests/split_1.cpp b/tools/nnapi_quickcheck/tests/split_1.cpp deleted file mode 100644 index 95a7aa8..0000000 --- a/tools/nnapi_quickcheck/tests/split_1.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_split_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "split_1.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - const int32_t NUM_SPLIT = NUM_SPLIT_Value(); - const int32_t AXIS = AXIS_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_VALUE(NUM_SPLIT); - PRINT_VALUE(AXIS); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - const int32_t axis[1] = {AXIS}; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(NUM_SPLIT + 2); - - // Configure Input Tensor(s) - interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */, - {1} /* dims */, quantization, - reinterpret_cast(axis), 1 * sizeof(int32_t)); - - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Configure Output Tensor - std::vector ofm_indexes; - - for (uint32_t n = 0; n < NUM_SPLIT; ++n) - { - const auto ofm_index = 2 + n; - - interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - ofm_indexes.emplace_back(ofm_index); - } - - auto *param = reinterpret_cast(malloc(sizeof(TfLiteSplitParams))); - - param->num_splits = NUM_SPLIT; - - // Add SPLIT Node - // Run SPLIT and store its result into Tensor #0 - // - Read axis and IFM from Tensor #0 and #1, respectively - interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1)); - - // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs(ofm_indexes); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/split_1.lst b/tools/nnapi_quickcheck/tests/split_1.lst deleted file mode 100644 index 823bf24..0000000 --- a/tools/nnapi_quickcheck/tests/split_1.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_N, 1) -INT_VALUE(IFM_C, 1) -INT_VALUE(IFM_H, 5) -INT_VALUE(IFM_W, 30) -INT_VALUE(NUM_SPLIT, 5) -INT_VALUE(AXIS, 1) diff --git a/tools/nnapi_quickcheck/tests/split_2.cpp b/tools/nnapi_quickcheck/tests/split_2.cpp deleted file mode 100644 index eb06ea0..0000000 --- a/tools/nnapi_quickcheck/tests/split_2.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_split_2, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "split_2.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - const int32_t NUM_SPLIT = NUM_SPLIT_Value(); - const int32_t AXIS = AXIS_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_VALUE(NUM_SPLIT); - PRINT_VALUE(AXIS); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - const int32_t axis[1] = {AXIS}; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(NUM_SPLIT + 2); - - // Configure Input Tensor(s) - interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */, - {1} /* dims */, quantization, - reinterpret_cast(axis), 1 * sizeof(int32_t)); - - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Configure Output Tensor - std::vector ofm_indexes; - - for (uint32_t n = 0; n < NUM_SPLIT; ++n) - { - const auto ofm_index = 2 + n; - - interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - ofm_indexes.emplace_back(ofm_index); - } - - auto *param = reinterpret_cast(malloc(sizeof(TfLiteSplitParams))); - - param->num_splits = NUM_SPLIT; - - // Add SPLIT Node - // Run SPLIT and store its result into Tensor #0 - // - Read axis and IFM from Tensor #0 and #1, respectively - interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1)); - - // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs(ofm_indexes); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/split_2.lst b/tools/nnapi_quickcheck/tests/split_2.lst deleted file mode 100644 index ebfbab2..0000000 --- a/tools/nnapi_quickcheck/tests/split_2.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_N, 1) -INT_VALUE(IFM_C, 1) -INT_VALUE(IFM_H, 5) -INT_VALUE(IFM_W, 30) -INT_VALUE(NUM_SPLIT, 3) -INT_VALUE(AXIS, 2) diff --git a/tools/nnapi_quickcheck/tests/split_3.cpp b/tools/nnapi_quickcheck/tests/split_3.cpp deleted file mode 100644 index e3beb5b..0000000 --- a/tools/nnapi_quickcheck/tests/split_3.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_split_3, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "split_3.lst" -#undef INT_VALUE - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - const int32_t NUM_SPLIT = NUM_SPLIT_Value(); - const int32_t AXIS = AXIS_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_VALUE(NUM_SPLIT); - PRINT_VALUE(AXIS); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - const int32_t axis[1] = {AXIS}; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(NUM_SPLIT + 2); - - // Configure Input Tensor(s) - interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */, - {1} /* dims */, quantization, - reinterpret_cast(axis), 1 * sizeof(int32_t)); - - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_H, IFM_W} /* dims */, quantization); - - // Configure Output Tensor - std::vector ofm_indexes; - - for (uint32_t n = 0; n < NUM_SPLIT; ++n) - { - const auto ofm_index = 2 + n; - - interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - ofm_indexes.emplace_back(ofm_index); - } - - auto *param = reinterpret_cast(malloc(sizeof(TfLiteSplitParams))); - - param->num_splits = NUM_SPLIT; - - // Add SPLIT Node - // Run SPLIT and store its result into Tensor #0 - // - Read axis and IFM from Tensor #0 and #1, respectively - interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1)); - - // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs(ofm_indexes); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/split_3.lst b/tools/nnapi_quickcheck/tests/split_3.lst deleted file mode 100644 index 300bb02..0000000 --- a/tools/nnapi_quickcheck/tests/split_3.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 5) -INT_VALUE(IFM_W, 30) -INT_VALUE(NUM_SPLIT, 3) -INT_VALUE(AXIS, 1) diff --git a/tools/nnapi_quickcheck/tests/split_4.cpp b/tools/nnapi_quickcheck/tests/split_4.cpp deleted file mode 100644 index e098973..0000000 --- a/tools/nnapi_quickcheck/tests/split_4.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" -#include "misc/feature/Shape.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_split_4, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "split_4.lst" -#undef INT_VALUE - - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - const int32_t NUM_SPLIT = NUM_SPLIT_Value(); - const int32_t AXIS = AXIS_Value(); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_VALUE(NUM_SPLIT); - PRINT_VALUE(AXIS); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - const int32_t axis[1] = {AXIS}; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(NUM_SPLIT + 2); - - // Configure Input Tensor(s) - interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */, - {1} /* dims */, quantization, - reinterpret_cast(axis), 1 * sizeof(int32_t)); - - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_H, IFM_W} /* dims */, quantization); - - // Configure Output Tensor - std::vector ofm_indexes; - - for (uint32_t n = 0; n < NUM_SPLIT; ++n) - { - const auto ofm_index = 2 + n; - - interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - ofm_indexes.emplace_back(ofm_index); - } - - auto *param = reinterpret_cast(malloc(sizeof(TfLiteSplitParams))); - - param->num_splits = NUM_SPLIT; - - // Add SPLIT Node - // Run SPLIT and store its result into Tensor #0 - // - Read axis and IFM from Tensor #0 and #1, respectively - interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1)); - - // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0 - interp.SetInputs({1}); - interp.SetOutputs(ofm_indexes); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/split_4.lst b/tools/nnapi_quickcheck/tests/split_4.lst deleted file mode 100644 index 5b28828..0000000 --- a/tools/nnapi_quickcheck/tests/split_4.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_H, 5) -INT_VALUE(IFM_W, 30) -INT_VALUE(NUM_SPLIT, 5) -INT_VALUE(AXIS, 0) diff --git a/tools/nnapi_quickcheck/tests/sub_1.cpp b/tools/nnapi_quickcheck/tests/sub_1.cpp deleted file mode 100644 index 8bc4208..0000000 --- a/tools/nnapi_quickcheck/tests/sub_1.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_sub_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "sub_1.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, - quantization); - - // Add Subtraction Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Sub and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/sub_1.lst b/tools/nnapi_quickcheck/tests/sub_1.lst deleted file mode 100644 index fa17cae..0000000 --- a/tools/nnapi_quickcheck/tests/sub_1.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 16) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_C, 3) -INT_VALUE(RIGHT_H, 16) -INT_VALUE(RIGHT_W, 16) diff --git a/tools/nnapi_quickcheck/tests/sub_2.cpp b/tools/nnapi_quickcheck/tests/sub_2.cpp deleted file mode 100644 index 423e105..0000000 --- a/tools/nnapi_quickcheck/tests/sub_2.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_sub_2, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "sub_2.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT = RIGHT_Value(); - - const int32_t OFM_N = LEFT_N; - const int32_t OFM_C = LEFT_C; - const int32_t OFM_H = LEFT_H; - const int32_t OFM_W = LEFT_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT} /* dims */, quantization); - - // Add Subtraction Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Sub and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/sub_2.lst b/tools/nnapi_quickcheck/tests/sub_2.lst deleted file mode 100644 index cd36ac1..0000000 --- a/tools/nnapi_quickcheck/tests/sub_2.lst +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 16) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT, 1) diff --git a/tools/nnapi_quickcheck/tests/sub_3.cpp b/tools/nnapi_quickcheck/tests/sub_3.cpp deleted file mode 100644 index 7bb6ab4..0000000 --- a/tools/nnapi_quickcheck/tests/sub_3.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_sub_3, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "sub_3.lst" -#undef INT_VALUE - - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT = RIGHT_Value(); - - const int32_t OFM_H = LEFT_H; - const int32_t OFM_W = LEFT_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_H, LEFT_W} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT, LEFT_W} /* dims */, quantization); - - // Add Subtraction Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Sub and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/sub_3.lst b/tools/nnapi_quickcheck/tests/sub_3.lst deleted file mode 100644 index c568750..0000000 --- a/tools/nnapi_quickcheck/tests/sub_3.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_H, 8) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT, 1) diff --git a/tools/nnapi_quickcheck/tests/sub_4.cpp b/tools/nnapi_quickcheck/tests/sub_4.cpp deleted file mode 100644 index 7fc8577..0000000 --- a/tools/nnapi_quickcheck/tests/sub_4.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_sub_4, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "sub_1.lst" -#undef INT_VALUE - - const int32_t LEFT_C = LEFT_C_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - - const int32_t RIGHT_C = RIGHT_C_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_C); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_C); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization); - - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, quantization); - - // Add Subtraction Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Sub and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1)); - - interp.SetInputs({1, 2}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/sub_4.lst b/tools/nnapi_quickcheck/tests/sub_4.lst deleted file mode 100644 index ce6128f..0000000 --- a/tools/nnapi_quickcheck/tests/sub_4.lst +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_C, 3) -INT_VALUE(LEFT_H, 8) -INT_VALUE(LEFT_W, 16) - -INT_VALUE(RIGHT_C, 3) -INT_VALUE(RIGHT_H, 1) -INT_VALUE(RIGHT_W, 16) diff --git a/tools/nnapi_quickcheck/tests/sub_5.cpp b/tools/nnapi_quickcheck/tests/sub_5.cpp deleted file mode 100644 index 19f95b6..0000000 --- a/tools/nnapi_quickcheck/tests/sub_5.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_sub_5, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "sub_5.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_VALUE(LEFT_C); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_VALUE(RIGHT_C); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); - PRINT_VALUE(OFM_C); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure left data - const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W; - const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W; - float left_data[left_size] = { - 0.0f, - }; - float right_data[right_size] = { - 0.0f, - }; - - // Fill left data with random data - { - std::normal_distribution left_dist(-1.0f, +1.0f); - float value = 10.0f; - for (uint32_t off = 0; off < left_size; ++off) - { - left_data[off] = value; - } - value = 1.0f; - for (uint32_t off = 0; off < right_size; ++off) - { - right_data[off] = value++; - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization, - reinterpret_cast(left_data), - left_size * sizeof(float)); - - // Configure input(s) - interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_W, RIGHT_C} /* dims: test with other shapes */, - quantization, reinterpret_cast(right_data), - right_size * sizeof(float)); - - // Add Subtraction Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Sub and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1)); - - interp.SetInputs({}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/sub_5.lst b/tools/nnapi_quickcheck/tests/sub_5.lst deleted file mode 100644 index 0327e6b..0000000 --- a/tools/nnapi_quickcheck/tests/sub_5.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_H, 2) -INT_VALUE(LEFT_W, 3) -INT_VALUE(LEFT_C, 4) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_H, 1) -INT_VALUE(RIGHT_W, 3) -INT_VALUE(RIGHT_C, 4) diff --git a/tools/nnapi_quickcheck/tests/sub_6.cpp b/tools/nnapi_quickcheck/tests/sub_6.cpp deleted file mode 100644 index 66b167e..0000000 --- a/tools/nnapi_quickcheck/tests/sub_6.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_sub_6, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "sub_6.lst" -#undef INT_VALUE - - const int32_t LEFT_N = LEFT_N_Value(); - const int32_t LEFT_H = LEFT_H_Value(); - const int32_t LEFT_W = LEFT_W_Value(); - const int32_t LEFT_C = LEFT_C_Value(); - - const int32_t RIGHT_N = RIGHT_N_Value(); - const int32_t RIGHT_H = RIGHT_H_Value(); - const int32_t RIGHT_W = RIGHT_W_Value(); - const int32_t RIGHT_C = RIGHT_C_Value(); - - const int32_t OFM_N = std::max(LEFT_N, RIGHT_N); - const int32_t OFM_H = std::max(LEFT_H, RIGHT_H); - const int32_t OFM_W = std::max(LEFT_W, RIGHT_W); - const int32_t OFM_C = std::max(LEFT_C, RIGHT_C); - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(LEFT_N); - PRINT_VALUE(LEFT_H); - PRINT_VALUE(LEFT_W); - PRINT_VALUE(LEFT_C); - PRINT_NEWLINE(); - - PRINT_VALUE(RIGHT_N); - PRINT_VALUE(RIGHT_H); - PRINT_VALUE(RIGHT_W); - PRINT_VALUE(RIGHT_C); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); - PRINT_VALUE(OFM_C); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Configure left data - const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W; - const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W; - float left_data[left_size] = { - 0.0f, - }; - float right_data[right_size] = { - 0.0f, - }; - - // Fill left data with random data - { - std::normal_distribution left_dist(-1.0f, +1.0f); - float value = 10.0f; - for (uint32_t off = 0; off < left_size; ++off) - { - left_data[off] = value; - } - value = 1.0f; - for (uint32_t off = 0; off < right_size; ++off) - { - right_data[off] = value++; - } - } - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(3); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input(s) - interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */, - {LEFT_W, LEFT_C} /* dims: test with other shapes */, - quantization, reinterpret_cast(left_data), - left_size * sizeof(float)); - - // Configure input(s) - interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */, - {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, - quantization, reinterpret_cast(right_data), - right_size * sizeof(float)); - - // Add Subtraction Node - // - // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free - // So, param should be allocated with malloc - auto param = make_alloc(); - - param->activation = kTfLiteActNone; - - // Run Sub and store the result into Tensor #0 - // - Read Left from Tensor #1 - // - Read Right from Tensor #2, - interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast(param), - BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1)); - - interp.SetInputs({}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/sub_6.lst b/tools/nnapi_quickcheck/tests/sub_6.lst deleted file mode 100644 index 52a1f1a..0000000 --- a/tools/nnapi_quickcheck/tests/sub_6.lst +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(LEFT_N, 1) -INT_VALUE(LEFT_H, 1) -INT_VALUE(LEFT_W, 3) -INT_VALUE(LEFT_C, 4) - -INT_VALUE(RIGHT_N, 1) -INT_VALUE(RIGHT_H, 2) -INT_VALUE(RIGHT_W, 3) -INT_VALUE(RIGHT_C, 4) diff --git a/tools/nnapi_quickcheck/tests/tanh_1.cpp b/tools/nnapi_quickcheck/tests/tanh_1.cpp deleted file mode 100644 index 7dd9261..0000000 --- a/tools/nnapi_quickcheck/tests/tanh_1.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_tanh_1, simple_test) -{ - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "tanh_1.lst" -#undef INT_VALUE - - const int32_t IFM_N = IFM_N_Value(); - const int32_t IFM_C = IFM_C_Value(); - const int32_t IFM_H = IFM_H_Value(); - const int32_t IFM_W = IFM_W_Value(); - - const int32_t OFM_N = IFM_N; - const int32_t OFM_C = IFM_C; - const int32_t OFM_H = IFM_H; - const int32_t OFM_W = IFM_W; - - // Initialize random number generator - std::minstd_rand random(SEED); - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(IFM_N); - PRINT_VALUE(IFM_C); - PRINT_VALUE(IFM_H); - PRINT_VALUE(IFM_W); - PRINT_NEWLINE(); - - PRINT_VALUE(OFM_N); - PRINT_VALUE(OFM_C); - PRINT_VALUE(OFM_H); - PRINT_VALUE(OFM_W); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(2); - - // Configure output - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */, - {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization); - - // Configure input - interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */, - {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization); - - // Add Tanh Node - // Run Tanh and store the result into Tensor #0 - // - Read input from Tensor #1 - interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_TANH, 1)); - - interp.SetInputs({1}); - interp.SetOutputs({0}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/tanh_1.lst b/tools/nnapi_quickcheck/tests/tanh_1.lst deleted file mode 100644 index a0077cb..0000000 --- a/tools/nnapi_quickcheck/tests/tanh_1.lst +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(IFM_N, 1) -INT_VALUE(IFM_C, 3) -INT_VALUE(IFM_H, 320) -INT_VALUE(IFM_W, 320) diff --git a/tools/nnapi_quickcheck/tests/topk_v2_1.cpp b/tools/nnapi_quickcheck/tests/topk_v2_1.cpp deleted file mode 100644 index c47af57..0000000 --- a/tools/nnapi_quickcheck/tests/topk_v2_1.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "gtest/gtest.h" - -#include "tflite/ext/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/builtin_op_data.h" - -#include "env.h" -#include "memory.h" -#include "misc/environment.h" - -#include "tflite/Diff.h" -#include "tflite/Quantization.h" -#include "tflite/interp/FunctionBuilder.h" - -#include -#include - -using namespace tflite; -using namespace nnfw::tflite; - -TEST(NNAPI_Quickcheck_topk_v2_1, simple_test) -{ - // Set random seed - int SEED = std::chrono::system_clock::now().time_since_epoch().count(); - - nnfw::misc::env::IntAccessor("SEED").access(SEED); - - // Set random test parameters - int verbose = 0; - int tolerance = 1; - - nnfw::misc::env::IntAccessor("VERBOSE").access(verbose); - nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); - -#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE); -#include "topk_v2_1.lst" -#undef INT_VALUE - - const int32_t INPUT_DATA = INPUT_DATA_Value(); - const int32_t K = K_Value(); - - const int32_t OUTPUT_VALUES = K; - const int32_t OUTPUT_INDICES = K; - - std::cout << "Configurations:" << std::endl; -#define PRINT_NEWLINE() \ - { \ - std::cout << std::endl; \ - } -#define PRINT_VALUE(value) \ - { \ - std::cout << " " << #value << ": " << (value) << std::endl; \ - } - PRINT_VALUE(SEED); - PRINT_NEWLINE(); - - PRINT_VALUE(INPUT_DATA); - PRINT_VALUE(K); - PRINT_NEWLINE(); - - PRINT_VALUE(OUTPUT_VALUES); - PRINT_VALUE(OUTPUT_INDICES); -#undef PRINT_VALUE -#undef PRINT_NEWLINE - - // Fill the K data - int32_t k_data[1] = {K}; - - auto setup = [&](Interpreter &interp) { - // Comment from 'context.h' - // - // Parameters for asymmetric quantization. Quantized values can be converted - // back to float using: - // real_value = scale * (quantized_value - zero_point); - // - // Q: Is this necessary? - // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and - // TENSOR_QUANT8_ASYMM are passed on to the runtime. - TfLiteQuantizationParams quantization = make_default_quantization(); - - // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N) - interp.AddTensors(4); - - // Configure INPUT_DATA - interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */, - {INPUT_DATA} /* dims */, quantization); - - // Configure K - interp.SetTensorParametersReadOnly(1, kTfLiteInt32 /* type */, "k" /* name */, {1} /* dims */, - quantization, reinterpret_cast(k_data), - sizeof(k_data)); - - // Configure OUTPUT_VALUES - interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_values" /* name */, - {OUTPUT_VALUES} /* dims */, quantization); - - // Configure OUTPUT_INDICES - interp.SetTensorParametersReadWrite(3, kTfLiteInt32 /* type */, "output_indices" /* name */, - {OUTPUT_INDICES} /* dims */, quantization); - - // Add TopK_V2 Node - // Run TopK_V2 and store its result into Tensor #2 and #3 - // - Read input data and K from Tensor #0 and #1, respectively - interp.AddNodeWithParameters({0, 1}, {2, 3}, nullptr, 0, nullptr, - BuiltinOpResolver().FindOp(BuiltinOperator_TOPK_V2, 1)); - - // Set Tensor #0 as Input, and Tensor #2 and #3 as Output - interp.SetInputs({0}); - interp.SetOutputs({2, 3}); - }; - - const nnfw::tflite::FunctionBuilder builder(setup); - - RandomTestParam param; - - param.verbose = verbose; - param.tolerance = tolerance; - - int res = RandomTestRunner{SEED, param}.run(builder); - - EXPECT_EQ(res, 0); -} diff --git a/tools/nnapi_quickcheck/tests/topk_v2_1.lst b/tools/nnapi_quickcheck/tests/topk_v2_1.lst deleted file mode 100644 index a40ee3c..0000000 --- a/tools/nnapi_quickcheck/tests/topk_v2_1.lst +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef INT_VALUE -#error "INT_VALUE should be defined" -#endif // INT_VALUE - -INT_VALUE(INPUT_DATA, 8192) -INT_VALUE(K, 16) diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh index 26f6c70..f4e223a 100755 --- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh +++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh @@ -5,6 +5,8 @@ set -eu progname=$(basename "${BASH_SOURCE[0]}") outdir="." name="" +config="" +config_src="" usage() { echo "Usage: $progname [options] modelfile" @@ -14,11 +16,13 @@ usage() { echo " -h show this help" echo " -o set nnpackage output directory (default=$outdir)" echo " -p set nnpackage output name (default=[modelfile name])" + echo " -c provide configuration file" echo "" echo "Examples:" echo " $progname add.tflite => create nnpackage 'add' in $outdir/" echo " $progname -o out add.tflite => create nnpackage 'add' in out/" echo " $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/" + echo " $progname -c add.cfg add.tflite => create nnpackage 'add' with add.cfg" exit 1 } @@ -27,11 +31,12 @@ if [ $# -eq 0 ]; then exit 1 fi -while getopts "ho:p:" OPTION; do +while getopts "ho:p:c:" OPTION; do case "${OPTION}" in h) usage;; o) outdir=$OPTARG;; p) name=$OPTARG;; + c) config_src=$OPTARG;; ?) exit 1;; esac done @@ -64,11 +69,18 @@ extension=${modelfile##*.} echo "Generating nnpackage "$name" in "$outdir"" mkdir -p "$outdir"/"$name"/metadata + +if [ -s "$config_src" ]; then + config=$(basename "$config_src") + cp "$config_src" "$outdir/$name/metadata/$config" +fi + cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF { "major-version" : "1", - "minor-version" : "0", + "minor-version" : "1", "patch-version" : "0", + "configs" : [ "$config" ], "models" : [ "$modelfile" ], "model-types" : [ "$extension" ] } diff --git a/tools/tflite_accuracy/src/tflite_accuracy.cc b/tools/tflite_accuracy/src/tflite_accuracy.cc index a532890..66c19a8 100644 --- a/tools/tflite_accuracy/src/tflite_accuracy.cc +++ b/tools/tflite_accuracy/src/tflite_accuracy.cc @@ -60,7 +60,7 @@ template void Print(const char *fmt, Args... args) template struct BaseLabelData { explicit BaseLabelData(int label = -1, DataType confidence = 0) - : label(label), confidence(confidence) + : label(label), confidence(confidence) { } @@ -116,8 +116,8 @@ public: Runner(std::unique_ptr interpreter, std::unique_ptr model, std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size) - : interpreter(std::move(interpreter)), model(std::move(model)), delegate(std::move(delegate)), - interrupted(false), kInputSize(1 * img_size * img_size * 3 * sizeof(DataType)) + : interpreter(std::move(interpreter)), model(std::move(model)), delegate(std::move(delegate)), + interrupted(false), kInputSize(1 * img_size * img_size * 3 * sizeof(DataType)) { inference_times.reserve(500); top1.reserve(500); @@ -308,7 +308,7 @@ public: FloatRunner(std::unique_ptr interpreter, std::unique_ptr model, std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size) - : Runner(std::move(interpreter), std::move(model), std::move(delegate), img_size) + : Runner(std::move(interpreter), std::move(model), std::move(delegate), img_size) { } @@ -333,7 +333,7 @@ public: QuantizedRunner(std::unique_ptr interpreter, std::unique_ptr model, std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size) - : Runner(std::move(interpreter), std::move(model), std::move(delegate), img_size) + : Runner(std::move(interpreter), std::move(model), std::move(delegate), img_size) { } @@ -411,12 +411,12 @@ std::unique_ptr MakeRunner(const std::string &model_path, unsigned i if (interpreter->tensor(input_index)->type == kTfLiteFloat32) { return std::unique_ptr( - new FloatRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size)); + new FloatRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size)); } else if (interpreter->tensor(input_index)->type == kTfLiteUInt8) { - return std::unique_ptr(new QuantizedRunner( - std::move(interpreter), std::move(model), std::move(delegate), img_size)); + return std::unique_ptr( + new QuantizedRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size)); } throw std::invalid_argument("data type of model's input tensor is not supported."); } @@ -424,13 +424,13 @@ std::unique_ptr MakeRunner(const std::string &model_path, unsigned i Target GetTarget(const std::string &str) { static const std::map target_names{ - {"tflite-cpu", Target::TfLiteCpu}, - {"tflite-delegate", Target::TfLiteDelegate}, - {"nnfw-delegate", Target::NnfwDelegate}}; + {"tflite-cpu", Target::TfLiteCpu}, + {"tflite-delegate", Target::TfLiteDelegate}, + {"nnfw-delegate", Target::NnfwDelegate}}; if (target_names.find(str) == target_names.end()) { throw std::invalid_argument( - str + ": invalid target. Run with --help for a list of available targets."); + str + ": invalid target. Run with --help for a list of available targets."); } return target_names.at(str); } @@ -451,19 +451,22 @@ void HandleSigInt(int) } } -int main(int argc, char *argv[]) try +int main(int argc, char *argv[]) +try { namespace po = boost::program_options; po::options_description desc("Run a model on multiple binary images and print" " statistics"); - desc.add_options()("help", "print this message and quit")( - "model", po::value()->default_value(kDefaultModelFile), "tflite file")( - "input", po::value()->default_value(kDefaultImagesDir), - "directory with input images")("offset", po::value()->default_value(1), "labels offset")( - "target", po::value()->default_value("nnfw-delegate"), - "how the model will be run (available targets: tflite-cpu, " - "tflite-delegate, nnfw-delegate)")("imgsize", po::value()->default_value(224), - "the width and height of the image"); + // clang-format off + desc.add_options() + ("help", "print this message and quit") + ("model", po::value()->default_value(kDefaultModelFile), "tflite file") + ("input", po::value()->default_value(kDefaultImagesDir), "directory with input images") + ("offset", po::value()->default_value(1), "labels offset") + ("target", po::value()->default_value("nnfw-delegate"), + "how the model will be run (available targets: tflite-cpu, tflite-delegate, nnfw-delegate)") + ("imgsize", po::value()->default_value(224), "the width and height of the image"); + // clang-fomrat on po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); if (vm.count("help")) diff --git a/tools/tflitefile_tool/model_parser.py b/tools/tflitefile_tool/model_parser.py index cd66bf5..ed534c1 100755 --- a/tools/tflitefile_tool/model_parser.py +++ b/tools/tflitefile_tool/model_parser.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved #