BraceWrapping:
AfterClass: true
AfterControlStatement: true
- AfterEnum: false
+ AfterEnum: true
AfterFunction: true
- AfterNamespace: false
+ AfterNamespace: true
AfterObjCDeclaration: false
AfterStruct: true
- AfterUnion: true
+ AfterUnion: false
BeforeCatch: true
BeforeElse: true
IndentBraces: false
-BreakBeforeBraces: Allman
+BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
--- /dev/null
+Language: Cpp
+BasedOnStyle: Google
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignEscapedNewlinesLeft: true
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: true
+ AfterControlStatement: true
+ AfterEnum: true
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: false
+ AfterStruct: true
+ AfterUnion: false
+ AfterExternBlock: false
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
+BreakBeforeBraces: Custom
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit: 100
+CommentPragmas: '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 2
+ContinuationIndentWidth: 2
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: false
+IncludeCategories:
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
+ Priority: 2
+ - Regex: '^(<|"(gtest|isl|json)/)'
+ Priority: 3
+ - Regex: '.*'
+ Priority: 1
+IndentCaseLabels: true
+IndentWidth: 2
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+ReflowComments: true
+SortIncludes: false
+SortUsingDeclarations: false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 2
+UseTab: Never
if infoname == "bcqinfo_dequant_weight":
has_dequant_weight = True
- # Ideal situation is that the user nodes of BCQ applicable constant nodes
- # are BCQ applicable operations such as MatMul, GatherV2, etc.
- # However, operations which do not change original values such as
- # Ideneity or Transpose can exist between them. In view of TensorFlow Lite,
- # real user nodes of BCQ applicable constant nodes must be found first.
- # This work is done by BFS search with queue.
-
- prefix_node_dict = {} # key : prefix / value : list of candidates
- matmul_node_prefix_dict = {} # key : Name of MatMul node / value : prefix
-
- queue_prefix = list(prefix_set)
- queue_nodename = [queue_prefix[idx] + ":0" for idx in range(len(queue_prefix))]
-
- while len(queue_prefix) > 0:
- prefix = queue_prefix.pop(0)
- nodename = queue_nodename.pop(0)
- if prefix not in prefix_node_dict.keys():
- prefix_node_dict[prefix] = []
-
- # Usually, output name of op is like "outputname:0"
- # -2 is for removing ":0"
- for op in ops:
- if op.type == "MatMul" and (op.inputs[0].name == nodename
- or op.inputs[1].name == nodename):
- prefix_node_dict[prefix].append(op.outputs[0].name[:-2])
- matmul_node_prefix_dict[op.outputs[0].name[:-2]] = prefix
- elif op.type == "Einsum" and (op.inputs[0].name == nodename
- or op.inputs[1].name == nodename):
- prefix_node_dict[prefix].append(op.outputs[0].name[:-2])
- elif op.type == "GatherV2" and op.inputs[0].name == nodename:
- prefix_node_dict[prefix].append(op.outputs[0].name[:-2])
- elif len(op.outputs) == 1:
- for i in range(len(op.inputs)):
- if op.inputs[i].name == nodename:
- queue_prefix.append(prefix)
- queue_nodename.append(op.outputs[0].name)
- break
-
- # When TensorFlow model is converted to TensorFlow Lite model,
- # more than one operation can be fused as one.
- # For example, MatMul + BiasAdd + ReLU in TensorFlow can be fused as
- # one FullyConnected in TensorFlow Lite.
- # It means that even real user nodes of BCQ applicable constant nodes
- # in TensorFlow are found, they may be real user nodes in TensorFlow Lite.
- # Therefore additional candidates of real user nodes should be found either.
- # Finding additional candidates is done by BFS search with queue.
-
- fuseop_prefix_dict = {} # key : Candidate operation / Value : prefix
-
- # These ops can be candidate. However other candidates may exists after these ops.
- mark_type = ["Add", "AddV2", "BiasAdd", "Reshape", "Transpose"]
-
- # These ops can be candidate. And no more candidates will be found after these ops.
- mark_and_stop_type = ["Relu", "Relu6", "Tanh"]
-
- # These ops cannot be candidates but other candidates may exists after these ops.
- # NOTE : Some of following ops may be removed from the list but not sure for now.
- pass_type = [
- "BatchToSpaceND", "Cast", "DepthToSpace", "ExpandDims", "ResizeBilinear",
- "ResizeNearestNeighbor", "ScatterNd", "SpaceToBatchND", "SpaceToDepth", "Squeeze",
- "Identity", "Pack", "Unpack", "Stack"
- ]
-
- queue_prefix = list(matmul_node_prefix_dict.values())
- queue_nodename = [matmul + ":0" for matmul in matmul_node_prefix_dict.keys()]
-
- visited_nodes = set(queue_nodename)
- while len(queue_prefix) > 0:
- prefix = queue_prefix.pop(0)
- nodename = queue_nodename.pop(0)
-
- # Usually, output name of op is like "outputname:0"
- # -2 is for removing ":0"
- for op in ops:
- for i in range(len(op.inputs)):
- if nodename == op.inputs[i].name:
- if op.type in mark_type:
- if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys():
- fuseop_prefix_dict[op.outputs[0].name[:-2]] = set()
- fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix)
- if op.outputs[0].name not in visited_nodes:
- queue_prefix.append(prefix)
- queue_nodename.append(op.outputs[0].name)
- visited_nodes.add(op.outputs[0].name)
- elif op.type in mark_and_stop_type:
- if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys():
- fuseop_prefix_dict[op.outputs[0].name[:-2]] = set()
- fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix)
- elif op.type in pass_type and op.outputs[0].name not in visited_nodes:
- queue_prefix.append(prefix)
- queue_nodename.append(op.outputs[0].name)
- visited_nodes.add(op.outputs[0].name)
-
# Write the name of metadata node
with open(flags.metadata_path, 'w') as f_metadata:
f_metadata.write("one_compiler/bcqinfo_one_metadata,")
- # Write all pairs of candidate operations and related BCQ information nodes.
+ # Write all pairs of a constant node and related BCQ information nodes.
with open(flags.output_arrays_path, 'w') as f_arrays:
for prefix in prefix_set:
- for fusable_op in prefix_node_dict[prefix]:
- f_arrays.write("," + prefix + "/bcqinfo_do_w_x")
- f_arrays.write("," + prefix + "/bcqinfo_alpha")
- f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code")
- f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters")
- f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters")
- f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters")
- f_arrays.write("," + fusable_op)
- if has_dequant_weight:
- f_arrays.write("," + prefix + "/bcqinfo_dequant_weight")
- for fuseop in fuseop_prefix_dict.keys():
- if len(fuseop_prefix_dict[fuseop]) == 1:
- prefix = fuseop_prefix_dict[fuseop].pop()
- f_arrays.write("," + prefix + "/bcqinfo_do_w_x")
- f_arrays.write("," + prefix + "/bcqinfo_alpha")
- f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code")
- f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters")
- f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters")
- f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters")
- f_arrays.write("," + fuseop)
- if has_dequant_weight:
- f_arrays.write("," + prefix + "/bcqinfo_dequant_weight")
+ f_arrays.write("," + prefix + "/bcqinfo_do_w_x")
+ f_arrays.write("," + prefix + "/bcqinfo_alpha")
+ f_arrays.write("," + prefix + "/bcqinfo_packed_binary_code")
+ f_arrays.write("," + prefix + "/bcqinfo_number_of_clusters")
+ f_arrays.write("," + prefix + "/bcqinfo_size_of_clusters")
+ f_arrays.write("," + prefix + "/bcqinfo_qbits_of_clusters")
+ f_arrays.write("," + prefix)
+ if has_dequant_weight:
+ f_arrays.write("," + prefix + "/bcqinfo_dequant_weight")
def print_bcq_output_arrays(flags):
if infoname == "bcqinfo_dequant_weight":
has_dequant_weight = True
- # Ideal situation is that the user nodes of BCQ applicable constant nodes
- # are BCQ applicable operations such as MatMul, GatherV2, etc.
- # However, operations which do not change original values such as
- # Ideneity or Transpose can exist between them. In view of TensorFlow Lite,
- # real user nodes of BCQ applicable constant nodes must be found first.
- # This work is done by BFS search with queue.
-
- prefix_node_dict = {} # key : prefix / value : list of candidates
- matmul_node_prefix_dict = {} # key : Name of MatMul node / value : prefix
-
- queue_prefix = list(prefix_set)
- queue_nodename = [queue_prefix[idx] + ":0" for idx in range(len(queue_prefix))]
-
- while len(queue_prefix) > 0:
- prefix = queue_prefix.pop(0)
- nodename = queue_nodename.pop(0)
- if prefix not in prefix_node_dict.keys():
- prefix_node_dict[prefix] = []
-
- # Usually, output name of op is like "outputname:0"
- # -2 is for removing ":0"
- for op in ops:
- if op.type == "MatMul" and (op.inputs[0].name == nodename
- or op.inputs[1].name == nodename):
- prefix_node_dict[prefix].append(op.outputs[0].name[:-2])
- matmul_node_prefix_dict[op.outputs[0].name[:-2]] = prefix
- elif op.type == "Einsum" and (op.inputs[0].name == nodename
- or op.inputs[1].name == nodename):
- prefix_node_dict[prefix].append(op.outputs[0].name[:-2])
- elif op.type == "GatherV2" and op.inputs[0].name == nodename:
- prefix_node_dict[prefix].append(op.outputs[0].name[:-2])
- elif len(op.outputs) == 1:
- for i in range(len(op.inputs)):
- if op.inputs[i].name == nodename:
- queue_prefix.append(prefix)
- queue_nodename.append(op.outputs[0].name)
- break
-
- # When TensorFlow model is converted to TensorFlow Lite model,
- # more than one operation can be fused as one.
- # For example, MatMul + BiasAdd + ReLU in TensorFlow can be fused as
- # one FullyConnected in TensorFlow Lite.
- # It means that even real user nodes of BCQ applicable constant nodes
- # in TensorFlow are found, they may be real user nodes in TensorFlow Lite.
- # Therefore additional candidates of real user nodes should be found either.
- # Finding additional candidates is done by BFS search with queue.
-
- fuseop_prefix_dict = {} # key : Candidate operation / Value : prefix
-
- # These ops can be candidate. However other candidates may exists after these ops.
- mark_type = ["Add", "AddV2", "BiasAdd", "Reshape", "Transpose"]
-
- # These ops can be candidate. And no more candidates will be found after these ops.
- mark_and_stop_type = ["Relu", "Relu6", "Tanh"]
-
- # These ops cannot be candidates but other candidates may exists after these ops.
- # NOTE : Some of following ops may be removed from the list but not sure for now.
- pass_type = [
- "BatchToSpaceND", "Cast", "DepthToSpace", "ExpandDims", "ResizeBilinear",
- "ResizeNearestNeighbor", "ScatterNd", "SpaceToBatchND", "SpaceToDepth", "Squeeze",
- "Identity", "Pack", "Unpack", "Stack"
- ]
-
- queue_prefix = list(matmul_node_prefix_dict.values())
- queue_nodename = [matmul + ":0" for matmul in matmul_node_prefix_dict.keys()]
-
- visited_nodes = set(queue_nodename)
- while len(queue_prefix) > 0:
- prefix = queue_prefix.pop(0)
- nodename = queue_nodename.pop(0)
-
- # Usually, output name of op is like "outputname:0"
- # -2 is for removing ":0"
- for op in ops:
- for i in range(len(op.inputs)):
- if nodename == op.inputs[i].name:
- if op.type in mark_type:
- if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys():
- fuseop_prefix_dict[op.outputs[0].name[:-2]] = set()
- fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix)
- if op.outputs[0].name not in visited_nodes:
- queue_prefix.append(prefix)
- queue_nodename.append(op.outputs[0].name)
- visited_nodes.add(op.outputs[0].name)
- elif op.type in mark_and_stop_type:
- if op.outputs[0].name[:-2] not in fuseop_prefix_dict.keys():
- fuseop_prefix_dict[op.outputs[0].name[:-2]] = set()
- fuseop_prefix_dict[op.outputs[0].name[:-2]].add(prefix)
- elif op.type in pass_type and op.outputs[0].name not in visited_nodes:
- queue_prefix.append(prefix)
- queue_nodename.append(op.outputs[0].name)
- visited_nodes.add(op.outputs[0].name)
-
# the name of metadata node
ret_output_arrays = ['one_compiler/bcqinfo_one_metadata']
# given node from user
- ret_output_arrays.append(output_arrays)
+ ret_output_arrays += output_arrays.split(',')
- # all pairs of candidate operations and related BCQ information nodes
+ # all pairs of a constant node and related BCQ information nodes.
for prefix in prefix_set:
- for fusable_op in prefix_node_dict[prefix]:
- ret_output_arrays.append(prefix + '/bcqinfo_do_w_x')
- ret_output_arrays.append(prefix + '/bcqinfo_alpha')
- ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code')
- ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters')
- ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters')
- ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters')
- ret_output_arrays.append(fusable_op)
- if has_dequant_weight:
- ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight')
- for fuseop in fuseop_prefix_dict.keys():
- if len(fuseop_prefix_dict[fuseop]) == 1:
- prefix = fuseop_prefix_dict[fuseop].pop()
- ret_output_arrays.append(prefix + '/bcqinfo_do_w_x')
- ret_output_arrays.append(prefix + '/bcqinfo_alpha')
- ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code')
- ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters')
- ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters')
- ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters')
- ret_output_arrays.append(fuseop)
- if has_dequant_weight:
- ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight')
+ ret_output_arrays.append(prefix + '/bcqinfo_do_w_x')
+ ret_output_arrays.append(prefix + '/bcqinfo_alpha')
+ ret_output_arrays.append(prefix + '/bcqinfo_packed_binary_code')
+ ret_output_arrays.append(prefix + '/bcqinfo_number_of_clusters')
+ ret_output_arrays.append(prefix + '/bcqinfo_size_of_clusters')
+ ret_output_arrays.append(prefix + '/bcqinfo_qbits_of_clusters')
+ ret_output_arrays.append(prefix)
+ if has_dequant_weight:
+ ret_output_arrays.append(prefix + '/bcqinfo_dequant_weight')
return ret_output_arrays
if model_version == 1:
return get_bcqinfo_output_arrays_v1(input_path, output_arrays)
elif model_version == -1:
- return None
+ return output_arrays.split(',')
else:
err_msg = "BCQ version of the model(v{}) ".format(model_version)
err_msg += "is higher than "
.default_value(false)
.help("This will fuse BatchNorm operators of pre-activations to Convolution operator");
+ arser.add_argument("--remove_redundant_transpose")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse or remove subsequent Transpose operators");
+
+ arser.add_argument("--replace_cw_mul_add_with_depthwise_conv")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will replace channel-wise mul/add with DepthwiseConv2D operator");
+
arser.add_argument("--resolve_customop_add")
.nargs(0)
.required(false)
.default_value(false)
.help("This will convert Custom(Matmul) to Matmul operator");
+ arser.add_argument("--shuffle_weight_to_16x1float32")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
+ "it only converts weights whose row is a multiple of 16");
+
+ arser.add_argument("--substitute_pack_to_reshape")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert single input Pack to Reshape");
+
arser.add_argument("--mute_warnings")
.nargs(0)
.required(false)
options->enable(Algorithms::ResolveCustomOpAdd);
options->enable(Algorithms::ResolveCustomOpBatchMatMul);
options->enable(Algorithms::ResolveCustomOpMatMul);
+ options->enable(Algorithms::RemoveRedundantTranspose);
+ options->enable(Algorithms::SubstitutePackToReshape);
}
if (arser.get<bool>("--fold_dequantize"))
options->enable(Algorithms::FoldDequantize);
options->enable(Algorithms::MakeBatchNormGammaPositive);
if (arser.get<bool>("--fuse_preactivation_batchnorm"))
options->enable(Algorithms::FusePreActivationBatchNorm);
+ if (arser.get<bool>("--remove_redundant_transpose"))
+ options->enable(Algorithms::RemoveRedundantTranspose);
+ if (arser.get<bool>("--replace_cw_mul_add_with_depthwise_conv"))
+ options->enable(Algorithms::ReplaceMulAddWithDepthwiseConv);
if (arser.get<bool>("--resolve_customop_add"))
options->enable(Algorithms::ResolveCustomOpAdd);
if (arser.get<bool>("--resolve_customop_batchmatmul"))
options->enable(Algorithms::ResolveCustomOpBatchMatMul);
if (arser.get<bool>("--resolve_customop_matmul"))
options->enable(Algorithms::ResolveCustomOpMatMul);
+ if (arser.get<bool>("--shuffle_weight_to_16x1float32"))
+ options->enable(Algorithms::ShuffleWeightTo16x1Float32);
+ if (arser.get<bool>("--substitute_pack_to_reshape"))
+ options->enable(Algorithms::SubstitutePackToReshape);
if (arser.get<bool>("--mute_warnings"))
settings->set(luci::UserSettings::Key::MuteWarnings, true);
luci::Importer importer;
auto module = importer.importModule(circle_model);
+ // call luci optimizations for module
+ optimizer.optimize(module.get());
+
for (size_t idx = 0; idx < module->size(); ++idx)
{
auto graph = module->graph(idx);
- // call luci optimizations
+ // call luci optimizations for graph
optimizer.optimize(graph);
optimizer.sparsify(graph);
list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE})
endforeach(RECIPE)
+# Add local files
+file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe")
+
+foreach(RECIPE IN ITEMS ${RECIPES})
+ get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
+
+ set(RECIPE_SOURCE_FILE "${RECIPE_PREFIX}.recipe")
+ set(RECIPE_OUTPUT_FILE "${RECIPE_PREFIX}.circle")
+
+ # Copy .recipe
+ add_custom_command(OUTPUT ${RECIPE_SOURCE_FILE}
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}" ${RECIPE_SOURCE_FILE}
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RECIPE}"
+ COMMENT "Generating ${RECIPE_SOURCE_FILE}")
+
+ # Generate .circle
+ add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
+ COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+ DEPENDS circlechef-file ${RECIPE_SOURCE_FILE}
+ COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
+
+ list(APPEND TESTS ${RECIPE_PREFIX})
+ list(APPEND TESTFILES ${RECIPE_OUTPUT_FILE})
+endforeach(RECIPE)
+
#Test circlechef-reverse
file(GLOB GEN_CIRCLEFILES RELATIVE ${CIRCLERECIPES_DIR} "${CIRCLERECIPES_DIR}/*/test.reverse")
# Note: While in development, circlechef-reverse may not handle the operator.
list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2})
endforeach(CIRCLEFILE)
+# Test local circlechef-reverse
+file(GLOB GEN_CIRCLEFILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.reverse")
+
+foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES})
+ get_filename_component(CIRCLE_PREFIX ${CIRCLEFILE} DIRECTORY)
+
+ set(RECIPE_OUTPUT_FILE "${CIRCLE_PREFIX}.circle")
+ set(RECIPE_GEN_OUTPUT_FILE "${CIRCLE_PREFIX}.gen.recipe")
+ set(RECIPE_GEN_OUTPUT_FILE2 "${CIRCLE_PREFIX}.gen.circle")
+
+ # Generate .gen.recipe from generated .circle
+ add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
+ COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+ DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE}
+ COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
+
+ add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
+ COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+ DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE}
+ COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
+
+ list(APPEND TESTS ${CIRCLE_PREFIX}.gen)
+ list(APPEND TESTFILES ${RECIPE_GEN_OUTPUT_FILE2})
+endforeach(CIRCLEFILE)
+
# Add a dummy target to create a target-level dependency.
# TODO Find a way to create a dependency between circlechef_test and generated testfiles.
add_custom_target(circlechef_testfiles ALL DEPENDS ${TESTFILES})
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+ shape_signature { dim: -1 dim: 8 dim: 6 dim: 12 }
+}
+operand {
+ name: "gamma"
+ type: FLOAT32
+ shape { dim: 12 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "beta"
+ type: FLOAT32
+ shape { dim: 12 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+ shape_signature { dim: -1 dim: 8 dim: 6 dim: 12 }
+}
+operation {
+ type: "InstanceNorm"
+ input: "ifm"
+ input: "gamma"
+ input: "beta"
+ output: "ofm"
+ instance_norm_options {
+ epsilon: 0.00001
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
tcgenerate(Add_001) # runtime doesn't support
tcgenerate(Add_U8_000)
tcgenerate(All_000)
-tcgenerate(ArgMax_U8_000)
-tcgenerate(ArgMax_U8_001)
-tcgenerate(ArgMax_U8_002)
-tcgenerate(ArgMax_U8_003)
tcgenerate(ArgMin_000)
tcgenerate(ArgMin_001)
tcgenerate(ArgMin_002)
tcgenerate(Cast_000)
tcgenerate(Cast_001)
tcgenerate(Ceil_000)
-tcgenerate(Concatenation_U8_000)
tcgenerate(Conv2D_003) # runtime doesn't support dilation
-tcgenerate(Conv2D_U8_000)
-tcgenerate(Conv2D_U8_001)
tcgenerate(Cos_000)
tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation
tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation
-tcgenerate(DepthwiseConv2D_U8_000)
tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet
tcgenerate(Dequantize_000) # runtime and luci-interpreter doesn't support Dequantize op yet
-tcgenerate(Div_000)
-tcgenerate(Equal_000)
-tcgenerate(Exp_000)
tcgenerate(ExpandDims_000)
tcgenerate(ExpandDims_001)
tcgenerate(ExpandDims_002)
tcgenerate(ExpandDims_003)
tcgenerate(Fill_000)
tcgenerate(Fill_001)
-tcgenerate(Floor_000)
-tcgenerate(FloorDiv_000)
-tcgenerate(FloorDiv_001)
tcgenerate(FloorMod_000)
tcgenerate(FloorMod_001)
-tcgenerate(FullyConnected_002)
tcgenerate(FullyConnected_U8_000)
tcgenerate(Gather_000)
tcgenerate(GatherNd_000)
tcgenerate(GatherNd_001)
-tcgenerate(Greater_000)
-tcgenerate(GreaterEqual_000)
tcgenerate(If_000)
tcgenerate(If_001)
tcgenerate(L2Pool2D_U8_000)
-tcgenerate(Less_000)
-tcgenerate(LessEqual_000)
tcgenerate(Log_000)
-tcgenerate(LogicalAnd_000)
-tcgenerate(LogicalNot_000)
-tcgenerate(LogicalOr_000)
-tcgenerate(LogSoftmax_000)
tcgenerate(MatMul_000)
tcgenerate(MatrixBandPart_000)
tcgenerate(MatrixDiag_000)
tcgenerate(MatrixSetDiag_000)
-tcgenerate(Maximum_000)
-tcgenerate(MaxPool2D_U8_000)
tcgenerate(MaxPoolWithArgMax_000)
tcgenerate(MaxPoolWithArgMax_001)
tcgenerate(MaxPoolWithArgMax_002)
-tcgenerate(Mean_U8_000)
-tcgenerate(Minimum_000)
tcgenerate(NonMaxSuppressionV4_000)
tcgenerate(NonMaxSuppressionV4_001)
tcgenerate(NonMaxSuppressionV5_000)
tcgenerate(Net_InstanceNorm_002)
tcgenerate(Net_InstanceNorm_003)
tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
-tcgenerate(NotEqual_000)
tcgenerate(OneHot_000)
tcgenerate(OneHot_001)
tcgenerate(OneHot_002)
tcgenerate(OneHot_003)
tcgenerate(Pack_000)
tcgenerate(Pack_U8_000)
-tcgenerate(Pad_U8_000)
tcgenerate(PadV2_000)
-tcgenerate(Pow_000)
tcgenerate(Range_000)
tcgenerate(Rank_000)
tcgenerate(ReduceAny_000)
tcgenerate(ReduceAny_001)
tcgenerate(ReduceAny_002)
tcgenerate(ReduceAny_003)
+tcgenerate(ReduceAny_dynamic_000)
+tcgenerate(ReduceAny_dynamic_001)
+tcgenerate(ReduceAny_dynamic_002)
+tcgenerate(ReduceAny_dynamic_003)
tcgenerate(ReduceMax_000)
+tcgenerate(ReduceMax_dynamic_000)
tcgenerate(ReduceMin_000)
+tcgenerate(ReduceMin_dynamic_000)
tcgenerate(ReduceProd_000)
tcgenerate(ReduceProd_001)
tcgenerate(ReduceProd_002)
tcgenerate(ReduceProd_003)
-tcgenerate(ReLU_000)
-tcgenerate(ReLU6_000)
+tcgenerate(ReduceProd_dynamic_000)
+tcgenerate(ReduceProd_dynamic_001)
+tcgenerate(ReduceProd_dynamic_002)
+tcgenerate(ReduceProd_dynamic_003)
tcgenerate(ReLUN1To1_000)
+tcgenerate(ReLUN1To1_dynamic_000)
tcgenerate(Reshape_003) # luci-interpreter doesn't support reshape without built-in option
-tcgenerate(Reshape_U8_000)
-tcgenerate(ResizeBilinear_000)
-tcgenerate(ResizeBilinear_U8_000) # luci-interpreter
-tcgenerate(ResizeNearestNeighbor_000)
tcgenerate(ReverseSequence_000)
tcgenerate(ReverseV2_000)
tcgenerate(Round_000)
tcgenerate(SelectV2_002)
tcgenerate(Shape_000)
tcgenerate(Sin_000)
-tcgenerate(Softmax_U8_000)
tcgenerate(SpaceToBatchND_000)
tcgenerate(SpaceToBatchND_001)
tcgenerate(SpaceToBatchND_002)
tcgenerate(SplitV_000)
tcgenerate(Square_000)
tcgenerate(SquaredDifference_000)
-tcgenerate(Sub_000)
-tcgenerate(Sub_001)
-tcgenerate(Sub_U8_000)
tcgenerate(Sum_000)
tcgenerate(Sum_001)
+tcgenerate(Sum_dynamic_000)
+tcgenerate(Sum_dynamic_001)
tcgenerate(Tile_000)
tcgenerate(Tile_U8_000)
tcgenerate(TopKV2_000)
tcgenerate(BCQGather_000)
tcgenerate(CircleBatchMatMul_000)
tcgenerate(InstanceNorm_000)
+tcgenerate(InstanceNorm_001)
{
/**
- * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ * @brief Record the information of T/F Lite SubGraph and its mapping to loco
*/
struct SubGraphContext
{
};
/**
- * @breif Expand shape x and y to same rank by align right and filling with 1
+ * @brief Expand shape x and y to same rank by align right and filling with 1
*/
void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
{
}
/**
- * @breif Returns shape of expanded dimension of input x and y having same rank
+ * @brief Returns shape of expanded dimension of input x and y having same rank
*/
loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
{
{
/**
- * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ * @brief Record the information of T/F Lite SubGraph and its mapping to loco
*/
struct SubGraphContext
{
public:
/// @brief The number of lines
uint32_t lines(void) const { return _lines.size(); }
- /// @breif The content of a specific line
+ /// @brief The content of a specific line
const std::string &line(uint32_t n) const { return _lines.at(n); }
private:
}
throw std::runtime_error("Unsupported type.");
case DataType::U8:
- evalQuantized();
+ if (filter()->scales().size() == 1)
+ {
+ evalQuantized();
+ }
+ else if (filter()->scales().size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(0)));
+ evalQuantizedPerChannel();
+ }
break;
case DataType::S16:
evalQuantizedS16();
getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
}
+void Conv2D::evalQuantizedPerChannel() const
+{
+ const auto *input_data = getTensorData<uint8_t>(input());
+ const auto *filter_data = getTensorData<uint8_t>(filter());
+ const auto *bias_data = getTensorData<int32_t>(bias());
+ auto *output_data = getTensorData<uint8_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+ const int32_t dilation_height_factor = _params.dilation_height_factor;
+ const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ const std::vector<double> effective_output_scale =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ const std::vector<ChannelQuantMultipliers> multipliers_raw =
+ quantizeMultipliers(effective_output_scale);
+ BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const int32_t in_y_origin = out_y * stride_height - _padding_height;
+ const int32_t in_x_origin = out_x * stride_width - _padding_width;
+ int32_t acc = 0;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const uint8_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const uint8_t filter_val =
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ acc += static_cast<int32_t>(input_val - input()->zero_point()) *
+ static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+ }
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+
+ scaled_acc += output()->zero_point();
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
void Conv2D::evalQuantizedS16() const
{
const auto *input_data = getTensorData<int16_t>(input());
private:
void evalFloat() const;
void evalQuantized() const;
+ void evalQuantizedPerChannel() const;
void evalQuantizedS16() const;
private:
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
+TEST(Conv2DTest, Uint8_CWQ)
+{
+ const int output_channels = 3;
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Shape filter_shape{output_channels, 2, 2, 1};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first,
+ input_quant_param.second, input_data);
+ Tensor filter_tensor =
+ makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data);
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
TEST(Conv2DTest, SInt16)
{
Shape input_shape{1, 4, 3, 2};
}
throw std::runtime_error("Unsupported type.");
case DataType::U8:
- evalQuantized();
+ if (filter()->scales().size() == 1)
+ {
+ evalQuantized();
+ }
+ else if (filter()->scales().size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(3)));
+ evalQuantizedPerChannel();
+ }
break;
case DataType::S16:
evalQuantizedS16();
getTensorShape(output()), getTensorData<float>(output()));
}
+void DepthwiseConv2D::evalQuantizedPerChannel() const
+{
+ const auto *input_data = getTensorData<uint8_t>(input());
+ const auto *filter_data = getTensorData<uint8_t>(filter());
+ const auto *bias_data = getTensorData<int32_t>(bias());
+ auto *output_data = getTensorData<uint8_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+ const int32_t dilation_height_factor = _params.dilation_height_factor;
+ const int32_t dilation_width_factor = _params.dilation_width_factor;
+ const int32_t depth_multiplier = _params.depth_multiplier;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+ quantizeMultipliers(effective_output_scales);
+ BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ for (int m = 0; m < depth_multiplier; ++m)
+ {
+ const int output_channel = m + in_channel * depth_multiplier;
+ const int in_x_origin = (out_x * stride_width) - _padding_width;
+ const int in_y_origin = (out_y * stride_height) - _padding_height;
+ int32 acc = 0;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ // Zero padding by omitting the areas outside the image.
+ const bool is_point_inside_image =
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+ if (is_point_inside_image)
+ {
+ int32 input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+ int32 filter_val =
+ filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+ acc += (filter_val - filter()->zero_points()[output_channel]) *
+ (input_val - input()->zero_point());
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[output_channel];
+ }
+ int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
+ int output_shift = quant_multipliers[output_channel].shift;
+ int32_t scaled_acc =
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ scaled_acc += output()->zero_point();
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+ output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
+ static_cast<uint8_t>(scaled_acc);
+ }
+ }
+ }
+ }
+ }
+}
+
void DepthwiseConv2D::evalQuantized() const
{
const auto input_scale = static_cast<double>(input()->scale());
private:
void evalFloat() const;
void evalQuantized() const;
+ void evalQuantizedPerChannel() const;
void evalQuantizedS16() const;
private:
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
+TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
+{
+ const int output_channels = 4;
+ Shape input_shape{1, 3, 2, 2};
+ Shape filter_shape{1, 2, 2, output_channels};
+ Shape bias_shape{4};
+ std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ std::vector<float> ref_output_data{
+ 71, -34, 99, -20, //
+ 91, -26, 127, -4, //
+ };
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
+ input_quant_param.second, input_data);
+ Tensor filter_tensor =
+ makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data);
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data);
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
{
Shape input_shape{1, 4, 2, 2};
evalFloat();
break;
case DataType::U8:
- evalQuantized();
+ if (filter()->scales().size() == 1)
+ {
+ evalQuantized();
+ }
+ else if (filter()->scales().size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(0)));
+ evalQuantizedPerChannel();
+ }
break;
case DataType::S16:
evalQuantizedS16();
getTensorData<int32_t>(_scratch_tensor.get()));
}
+void TransposeConv::evalQuantizedPerChannel() const
+{
+ const auto *input_data = getTensorData<uint8_t>(input());
+ const auto *filter_data = getTensorData<uint8_t>(filter());
+ const auto *bias_data = getTensorData<int32_t>(bias());
+ auto *output_data = getTensorData<uint8_t>(output());
+ auto *scratch_data = getTensorData<int32_t>(_scratch_tensor.get());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+ std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int32_t));
+
+ BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const int32_t out_y_origin = in_y * stride_height - _padding_height;
+ const int32_t out_x_origin = in_x * stride_width - _padding_width;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t out_x = out_x_origin + filter_x;
+ const int32_t out_y = out_y_origin + filter_y;
+ if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const uint8_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const uint8_t filter_val =
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+ static_cast<int32_t>(input_val - input()->zero_point()) *
+ static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+ scaled_acc += output()->zero_point();
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
void TransposeConv::evalQuantizedS16() const
{
const auto *input_data = getTensorData<int16_t>(input());
private:
void evalFloat() const;
void evalQuantized() const;
+ void evalQuantizedPerChannel() const;
void evalQuantizedS16() const;
private:
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
+TEST(TransposeConvTest, UInt8_CWQ)
+{
+ const int32_t output_channels = 2;
+ std::vector<float> input_data{1, 2, 3, 4};
+ std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+ std::vector<float> bias_data{3, 4};
+ std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+ std::vector<float> ref_output_data{
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ };
+
+ // Choose quantization parameters carefully.
+ auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375); // s = 1 / 16, zp = 128
+ auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first,
+ input_quant.second, input_data);
+ Tensor filter_tensor = makeInputTensor<DataType::U8>({output_channels, 3, 3, 1}, filter_scales,
+ filter_zerops, 0, filter_data);
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
+ Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+ Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+ TransposeConvParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+ &output_tensor, params);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
TEST(TransposeConvTest, SInt16)
{
std::vector<float> input_data{1, 2, 3, 4};
return getNodeDataImpl<DataType::U8>(node, data_size);
case DataType::FLOAT32:
return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
+ case DataType::S16:
+ return getNodeDataImpl<DataType::S16>(node, data_size);
case DataType::S32:
return getNodeDataImpl<DataType::S32>(node, data_size);
+ case DataType::S64:
+ return getNodeDataImpl<DataType::S64>(node, data_size);
default:
throw std::runtime_error("Unsupported type.");
}
#include "CircleExporterImpl.h"
#include "Optimize.h"
-#include "TypeBridge.h"
#include "CircleTensorExporter.h"
#include "CircleOperationExporter.h"
#include "CircleExporterUtils.h"
// do graph optimization
optimize(graph);
- // copy shape/dtype inference data to CircleNode
- copy_shape_dtype(graph);
-
_builder.Clear();
SerializedModelData md;
optimize(graph);
- // copy shape/dtype inference data to CircleNode
- copy_shape_dtype(graph);
-
SerializedGraphData gd;
// set Subgraph name
}
}
+circle::FullyConnectedOptionsWeightsFormat
+to_circle_weightsformat(luci::CircleFullyConnected::WeightsFormat format)
+{
+ switch (format)
+ {
+ case luci::CircleFullyConnected::WeightsFormat::DEFAULT:
+ return circle::FullyConnectedOptionsWeightsFormat_DEFAULT;
+ case luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8:
+ return circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8;
+ case luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32:
+ return circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32;
+ default:
+ INTERNAL_EXN_V("trying to convert unsupported luci::WeightsFormat", oops::to_uint32(format));
+ }
+}
+
circle::DimensionType to_circle_dimensiontype(luci::DimensionType type)
{
switch (type)
circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func);
circle::TensorType to_circle_tensortype(loco::DataType type);
circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode);
+circle::FullyConnectedOptionsWeightsFormat
+to_circle_weightsformat(luci::CircleFullyConnected::WeightsFormat format);
circle::DimensionType to_circle_dimensiontype(luci::DimensionType type);
flatbuffers::Offset<void> to_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
const SparseIndexVector &sparse_idx_vec);
#include <luci/IR/CircleNode.h>
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleShapeInference.h>
#include <luci/UserSettings.h>
#include <luci/Log.h>
{
export_simple(
node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
- CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
+ CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
+ to_circle_weightsformat(node->weights_format()))
.Union());
}
CircleTensoInfo tensor_info;
tensor_info.name(tensor_name);
- tensor_info.dtype(to_circle_tensortype(luci::node_dtype(node)));
+ tensor_info.dtype(to_circle_tensortype(node->dtype()));
tensor_info.shape_signature(node->shape_signature());
if (node->shape_status() == ShapeStatus::VALID)
- tensor_info.shape(to_shape_description(luci::node_shape(node)));
+ tensor_info.shape(to_shape_description(node));
tensor_info.shape_status(node->shape_status());
tensor_info.content(dynamic_cast<luci::CircleConst *>(node));
flatbuffers::Offset<Vector<int32_t>> encodeShapeSignature(FlatBufferBuilder &builder,
const ShapeSignature &shape_signature)
{
+ if (shape_signature.rank() == 0)
+ return 0;
+
return builder.CreateVector(shape_signature.as_vector());
}
#include "ProgressReporter.h"
#include <luci/Pass/ShapeInferencePass.h>
+#include <luci/Pass/ShapeSignatureInferencePass.h>
#include <luci/Pass/TypeInferencePass.h>
#include <logo/Phase.h>
// prepare type and shape before optimization
phase.emplace_back(std::make_unique<TypeInferencePass>());
phase.emplace_back(std::make_unique<ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<ShapeSignatureInferencePass>());
// TODO add more optimization passes (with a knob)
}
{
/**
- * @breif Record the information of T/F Lite SubGraph and its mapping to loco
+ * @brief Record the information of T/F Lite SubGraph and its mapping to loco
*/
struct SubGraphContext
{
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type);
Padding luci_padding(const circle::Padding padding);
MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode);
+luci::CircleFullyConnected::WeightsFormat
+luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format);
std::unique_ptr<CircleQuantParam>
luci_quantparam(const circle::QuantizationParametersT *quantization);
return MirrorPadMode::UNDEFINED;
}
+luci::CircleFullyConnected::WeightsFormat
+luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format)
+{
+ switch (weights_format)
+ {
+ case circle::FullyConnectedOptionsWeightsFormat_DEFAULT:
+ return luci::CircleFullyConnected::WeightsFormat::DEFAULT;
+ case circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8:
+ return luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8;
+ case circle::FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32:
+ return luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32;
+ default:
+ throw std::runtime_error("Invalid FullyConnectedOptionsWeightsFormat");
+ }
+}
+
DimensionType luci_dim_type(const circle::DimensionType dim_type)
{
switch (dim_type)
const auto *options = op.builtin_options.AsFullyConnectedOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
- if (options->weights_format != circle::FullyConnectedOptionsWeightsFormat_DEFAULT)
- {
- throw oops::UserExn(
- "Unsupported weights format",
- circle::EnumNameFullyConnectedOptionsWeightsFormat(options->weights_format));
- }
+ node->weights_format(luci_weights_format(options->weights_format));
return node;
}
public:
Dilation() : _w(1), _h(1) {}
- int32_t w() const { return _w; }
- void w(int32_t w) { _w = w; }
+ uint32_t w() const { return _w; }
+ void w(uint32_t w) { _w = w; }
+ void w(int32_t w);
- int32_t h() const { return _h; }
- void h(int32_t h) { _h = h; }
+ uint32_t h() const { return _h; }
+ void h(uint32_t h) { _h = h; }
+ void h(int32_t h);
private:
- int32_t _w;
- int32_t _h;
+ uint32_t _w;
+ uint32_t _h;
};
} // namespace luci
public:
Filter() : _w(1), _h(1) {}
- int32_t w() const { return _w; }
- void w(int32_t w) { _w = w; }
+ uint32_t w() const { return _w; }
+ void w(uint32_t w) { _w = w; }
+ void w(int32_t w);
- int32_t h() const { return _h; }
- void h(int32_t h) { _h = h; }
+ uint32_t h() const { return _h; }
+ void h(uint32_t h) { _h = h; }
+ void h(int32_t h);
private:
- int32_t _w;
- int32_t _h;
+ uint32_t _w;
+ uint32_t _h;
};
} // namespace luci
public:
Stride() : _w(1), _h(1) {}
- int32_t w() const { return _w; }
- void w(int32_t w) { _w = w; }
+ uint32_t w() const { return _w; }
+ void w(uint32_t w) { _w = w; }
+ void w(int32_t w);
- int32_t h() const { return _h; }
- void h(int32_t h) { _h = h; }
+ uint32_t h() const { return _h; }
+ void h(uint32_t h) { _h = h; }
+ void h(int32_t h);
private:
- int32_t _w;
- int32_t _h;
+ uint32_t _w;
+ uint32_t _h;
};
} // namespace luci
std::vector<int32_t> _shape_signature{};
};
+bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs);
+
} // namespace luci
#endif // __LUCI_IR_SHAPE_SIGNATURE_H__
public LuciNodeMixin<LuciNodeTrait::Bias>
{
public:
+ enum class WeightsFormat
+ {
+ UNDEFINED, // This is not defined by Circle. This was added to prevent programming error.
+
+ DEFAULT,
+ SHUFFLED4x16INT8,
+ SHUFFLED16x1FLOAT32,
+ };
+
+public:
loco::Node *input(void) const { return at(0)->node(); }
void input(loco::Node *node) { at(0)->node(node); }
loco::Node *bias(void) const override { return at(2)->node(); }
void bias(loco::Node *node) override { at(2)->node(node); }
+
+public:
+ WeightsFormat weights_format(void) const { return _weights_format; }
+ void weights_format(WeightsFormat weights_format) { _weights_format = weights_format; }
+
+private:
+ WeightsFormat _weights_format{WeightsFormat::DEFAULT};
};
} // namespace luci
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef __MEMORY_H__
-#define __MEMORY_H__
+#include "luci/IR/AttrDilation.h"
-#include <cstdlib>
+#include <cassert>
-template <typename T> inline T *make_alloc(void)
+namespace luci
{
- auto ptr = malloc(sizeof(T));
- if (ptr == nullptr)
- {
- throw std::bad_alloc{};
- }
+void Dilation::w(int32_t w)
+{
+ assert(w >= 0);
+ _w = static_cast<uint32_t>(w);
+}
- return reinterpret_cast<T *>(ptr);
+void Dilation::h(int32_t h)
+{
+ assert(h >= 0);
+ _h = static_cast<uint32_t>(h);
}
-#endif // __MEMORY_H__
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrDilation.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleAttrDilationTest, set)
+{
+ auto d = luci::Dilation();
+
+ d.h(10u);
+ d.w(10u);
+
+ ASSERT_EQ(d.h(), 10u);
+ ASSERT_EQ(d.w(), 10u);
+
+ d.h(10); // int32_t
+ d.w(10);
+
+ ASSERT_EQ(d.h(), 10u);
+ ASSERT_EQ(d.w(), 10u);
+}
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "env.h"
+#include "luci/IR/AttrFilter.h"
-#include <string>
-
-#include <cstdlib>
#include <cassert>
-inline void ensure(int err) { assert(err == 0); }
-
-int main(int argc, char **argv)
+namespace luci
{
- const std::string key{"TEST"};
- const int num{3};
-
- const auto str = std::to_string(num);
-
- ensure(unsetenv(key.c_str()));
- ensure(setenv(key.c_str(), str.c_str(), 0));
-
- int value = 0;
-
- assert(value != num);
- IntVar buffer(key, value);
-
- assert(buffer() == num);
+void Filter::w(int32_t w)
+{
+ assert(w >= 0);
+ _w = static_cast<uint32_t>(w);
+}
- return 0;
+void Filter::h(int32_t h)
+{
+ assert(h >= 0);
+ _h = static_cast<uint32_t>(h);
}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrFilter.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleAttrFilterTest, set)
+{
+ auto f = luci::Filter();
+
+ f.h(10u);
+ f.w(10u);
+
+ ASSERT_EQ(f.h(), 10u);
+ ASSERT_EQ(f.w(), 10u);
+
+ f.h(10); // int32_t
+ f.w(10);
+
+ ASSERT_EQ(f.h(), 10u);
+ ASSERT_EQ(f.w(), 10u);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrStride.h"
+
+#include <cassert>
+
+namespace luci
+{
+
+void Stride::w(int32_t w)
+{
+ assert(w >= 0);
+ _w = static_cast<uint32_t>(w);
+}
+
+void Stride::h(int32_t h)
+{
+ assert(h >= 0);
+ _h = static_cast<uint32_t>(h);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/AttrStride.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleAttrStrideTest, set)
+{
+ auto s = luci::Stride();
+
+ s.h(10u);
+ s.w(10u);
+
+ ASSERT_EQ(s.h(), 10u);
+ ASSERT_EQ(s.w(), 10u);
+
+ s.h(10); // int32_t
+ s.w(10);
+
+ ASSERT_EQ(s.h(), 10u);
+ ASSERT_EQ(s.w(), 10u);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/CircleShapeSignature.h"
+
+namespace luci
+{
+
+bool operator==(const ShapeSignature &lhs, const ShapeSignature &rhs)
+{
+ if (lhs.rank() != rhs.rank())
+ return false;
+
+ for (uint32_t i = 0; i < lhs.rank(); ++i)
+ if (lhs.dim(i) != rhs.dim(i))
+ return false;
+
+ return true;
+}
+
+} // namespace luci
#include <loco.h>
+#include <luci/IR/Module.h>
+
#include <string>
#include <vector>
FusePreActivationBatchNorm,
MakeBatchNormGammaPositive,
FuseActivationFunction,
+ ShuffleWeightTo16x1Float32,
+ RemoveRedundantTranspose,
+ ReplaceMulAddWithDepthwiseConv,
+ SubstitutePackToReshape,
};
enum AlgorithmParameters
Options *options(void);
public:
+ void optimize(luci::Module *) const;
+
void optimize(loco::Graph *) const;
void quantize(loco::Graph *) const;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODULE_PASS_H__
+#define __MODULE_PASS_H__
+
+#include <loco.h>
+#include <logo/Pass.h>
+
+#include <luci/IR/Module.h>
+
+namespace luci
+{
+
+class Pass : public logo::Pass
+{
+public:
+ // Run module pass and return false if there was nothing changed
+ virtual bool run(luci::Module *) = 0;
+};
+
+} // namespace luci
+
+#endif // __MODULE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__
+#define __LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <luci/ModulePass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to infer type of circle nodes
+ */
+class CircleTypeInferencePass : public luci::Pass
+{
+public:
+ virtual const char *name(void) const { return "luci::CircleTypeInferencePass"; }
+
+public:
+ bool run(luci::Module *m);
+ bool run(loco::Graph *g);
+};
+
+} // namespace luci
+
+#endif //__LUCI_CIRCLE_TYPE_INFERENCE_PASS_H__
#ifndef __LUCI_FUSE_BCQ_PASS_H__
#define __LUCI_FUSE_BCQ_PASS_H__
-#include <logo/Pass.h>
+#include <luci/ModulePass.h>
namespace luci
{
* @brief Class to fuse certain pattern of subgraph into CircleBCQFullyConnected or CircleBCQGather
*
*/
-struct FuseBCQPass final : public logo::Pass
+struct FuseBCQPass final : public luci::Pass
{
const char *name(void) const final { return "luci::FuseBCQPass"; }
+ bool run(luci::Module *m) final;
bool run(loco::Graph *g) final;
};
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
+#define __LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
+
+#include <loco.h>
+
+#include <luci/ModulePass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to copy shape/dtype of loco to circle node
+ *
+ * CAUTION : This pass will be removed after refactoring is finished
+ */
+class MigrateLegacyShapeDtypePass : public luci::Pass
+{
+public:
+ virtual const char *name(void) const { return "luci::MigrateLegacyShapeDtypePass"; }
+
+public:
+ bool run(luci::Module *m);
+ bool run(loco::Graph *graph);
+};
+
+} // namespace luci
+
+#endif //__LUCI_MIGRATE_LEGACY_SHAPE_DTYPE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
+#define __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to propagate quantization parameters of an operator's output to input
+ */
+struct PropagateQuantParamPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::PropagateQuantParamPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__
+#define __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief fuse or remove subsequent Transpose operators
+ */
+struct RemoveRedundantTransposePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveRedundantTransposePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_TRANSPOSE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__
+#define __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to replace channel-wise mul/add with CircleDepthwiseConv2D
+ */
+struct ReplaceMulAddWithDepthwiseConvPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::ReplaceMulAddWithDepthwiseConvPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REPLACE_MUL_ADD_WITH_DEPTHWISE_CONV_PASS_H__
#include <loco.h>
-#include <logo/Pass.h>
+#include <luci/ModulePass.h>
namespace luci
{
/**
* @brief Pass to infer shape of nodes
*/
-class ShapeInferencePass : public logo::Pass
+class ShapeInferencePass : public luci::Pass
{
public:
virtual const char *name(void) const { return "luci::ShapeInferencePass"; }
public:
+ bool run(luci::Module *m);
bool run(loco::Graph *graph);
};
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
+#define __LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
+
+#include <loco.h>
+
+#include <luci/ModulePass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to infer shape_signature of nodes
+ */
+class ShapeSignatureInferencePass : public luci::Pass
+{
+public:
+ virtual const char *name(void) const { return "luci::ShapeSignatureInferencePass"; }
+
+public:
+ bool run(luci::Module *m);
+ bool run(loco::Graph *graph);
+};
+
+} // namespace luci
+
+#endif //__LUCI_SHAPE_SIGNATURE_INFERENCE_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__
+#define __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to convert weight format of FullyConnected to SHUFFLED16x1FLOAT32
+ */
+struct ShuffleWeightTo16x1Float32Pass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::ShuffleWeightTo16x1Float32Pass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SHUFFLE_WEIGHT_TO_16X1_FLOAT32_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__
+#define __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Substitute Pack with 1 input to single reshape node.
+ */
+struct SubstitutePackToReshapePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::SubstitutePackToReshapePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_PACK_TO_RESHAPE_PASS_H__
#include <loco.h>
-#include <logo/Pass.h>
+#include <luci/ModulePass.h>
namespace luci
{
/**
* @brief Pass to infer type of nodes
*/
-class TypeInferencePass : public logo::Pass
+class TypeInferencePass : public luci::Pass
{
public:
virtual const char *name(void) const { return "luci::TypeInferencePass"; }
public:
+ bool run(luci::Module *m);
bool run(loco::Graph *graph);
};
#include "luci/Pass/FuseInstanceNormPass.h"
#include "luci/Pass/FusePreActivationBatchNormPass.h"
#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+#include "luci/Pass/PropagateQuantParamPass.h"
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
#include "luci/Pass/ResolveCustomOpAddPass.h"
#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
#include "luci/Pass/ResolveCustomOpMatMulPass.h"
#include "luci/Pass/QuantizeWithMinMaxPass.h"
#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
#include "luci/Pass/SparsifyTensorPass.h"
+#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
+#include "luci/Pass/SubstitutePackToReshapePass.h"
// TODO add more passes
#include "luci/Pass/ShapeInferencePass.h"
+#include "luci/Pass/ShapeSignatureInferencePass.h"
#include "luci/Pass/TypeInferencePass.h"
+// Following passes will be removed after refactoring is finished
+#include "luci/Pass/MigrateLegacyShapeDtypePass.h"
+
// logo passes
#include <logo/RemoveDeadNodeWithQueryPass.h>
+#include "ModulePhase.h"
#include "ProgressReporter.h"
#include "CircleOptimizerUtils.h"
return _options.get();
}
+void CircleOptimizer::optimize(luci::Module *m) const
+{
+ luci::Phase phase;
+
+ // Following passes will be deprecated after refactoring is finished.
+ phase.emplace_back(std::make_unique<luci::MigrateLegacyShapeDtypePass>());
+
+ // Following passes are needed everytime when other passes create new node or modify some nodes.
+ phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::ShapeSignatureInferencePass>());
+ phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+
+ if (_options->query(Options::Algorithm::FuseBCQ))
+ {
+ phase.emplace_back(std::make_unique<FuseBCQPass>());
+ }
+
+ ModuleProgressReporter prog(m, logo::PhaseStrategy::Restart);
+ PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{m};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+}
+
void CircleOptimizer::optimize(loco::Graph *g) const
{
logo::Phase phase;
/* TRANSFORM DECLARATION BEGIN */
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
+
+ // Following passes will be deprecated after refactoring is finished.
+ phase.emplace_back(std::make_unique<luci::MigrateLegacyShapeDtypePass>());
+
+ // Following passes are needed everytime when other passes create new node or modify some nodes.
+ phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::ShapeSignatureInferencePass>());
+
if (_options->query(Options::Algorithm::ResolveCustomOpAdd))
{
phase.emplace_back(std::make_unique<luci::ResolveCustomOpAddPass>());
{
phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
}
- if (_options->query(Options::Algorithm::FuseBCQ))
- {
- phase.emplace_back(std::make_unique<FuseBCQPass>());
- }
if (_options->query(Options::Algorithm::FuseBatchNormWithTConv))
{
phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
{
phase.emplace_back(std::make_unique<luci::MakeBatchNormGammaPositivePass>());
}
+ if (_options->query(Options::Algorithm::ShuffleWeightTo16x1Float32))
+ {
+ phase.emplace_back(std::make_unique<luci::ShuffleWeightTo16x1Float32Pass>());
+ }
+ if (_options->query(Options::Algorithm::RemoveRedundantTranspose))
+ {
+ phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>());
+ }
+ if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv))
+ {
+ phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>());
+ }
+ if (_options->query(Options::Algorithm::SubstitutePackToReshape))
+ {
+ phase.emplace_back(std::make_unique<luci::SubstitutePackToReshapePass>());
+ }
- // Shape inference is needed for added nodes doing above transformations
- phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
- phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
- phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
/* TRANSFORM DECLARATION END */
- ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
- logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
phase_runner.attach(&prog);
phase_runner.run(phase);
}
luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype),
str_to_granularity(granularity));
quantizer.run(g);
+
+ // Post-quantization optimizations
+ logo::Phase phase;
+
+ phase.emplace_back(std::make_unique<luci::PropagateQuantParamPass>());
+
+ phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
}
// Requantize
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+#include <luci/Service/CircleTypeInference.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleTypeInferencePass::run(luci::Module *m)
+{
+ bool changed = false;
+
+ for (size_t g = 0; g < m->size(); ++g)
+ {
+ if (run(m->graph(g)))
+ changed = true;
+ }
+
+ return changed;
+}
+
+bool CircleTypeInferencePass::run(loco::Graph *g)
+{
+ luci::tinf::Rule type_infer_rule;
+ bool changed = false;
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ loco::DataType dtype;
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+
+ if (type_infer_rule.infer(circle_node, dtype) && circle_node->dtype() != dtype)
+ {
+ circle_node->dtype(dtype);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
namespace
{
+bool is_fusable_const(luci::CircleConst *before, luci::CircleConst *after, bool do_w_x)
+{
+ if (after->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ if (after->rank() != 2)
+ return false;
+
+ if (after->size<loco::DataType::FLOAT32>() != before->size<loco::DataType::FLOAT32>())
+ return false;
+
+ auto after_dim0 = after->dim(0).value();
+ auto after_dim1 = after->dim(1).value();
+
+ if (before->rank() == 2)
+ {
+ if (do_w_x)
+ {
+ // Check for [dim0, dim1] --> [dim0, dim1]
+ if (!(after->dim(0) == before->dim(0) && after->dim(1) == before->dim(1)))
+ return false;
+
+ for (uint32_t i = 0; i < after->size<loco::DataType::FLOAT32>(); ++i)
+ if (after->at<loco::DataType::FLOAT32>(i) != before->at<loco::DataType::FLOAT32>(i))
+ return false;
+ }
+ else
+ {
+ // Check for [dim0, dim1] --> [dim1, dim0]
+ if (!(after->dim(0) == before->dim(1) && after->dim(1) == before->dim(0)))
+ return false;
+
+ for (uint32_t i = 0; i < after_dim0; ++i)
+ for (uint32_t j = 0; j < after_dim1; ++j)
+ if (after->at<loco::DataType::FLOAT32>(i * after_dim1 + j) !=
+ before->at<loco::DataType::FLOAT32>(j * after_dim0 + i))
+ return false;
+ }
+
+ return true;
+ }
+ else if (before->rank() == 3)
+ {
+ if (do_w_x)
+ {
+ // This case is not found yet.
+ return false;
+ }
+ else
+ {
+ // When Einsum op is converted to FullyConnected, original rank can be 3.
+ auto before_dim0 = before->dim(0).value();
+ auto before_dim1 = before->dim(1).value();
+ auto before_dim2 = before->dim(2).value();
+
+ // Check if [dim0, dim1, dim2] --> [dim2, dim0 * dim1] or
+ // [dim0, dim1, dim2] --> [dim1 * dim2, dim0]
+ if ((after_dim0 == before_dim1 * before_dim2 && after_dim1 == before_dim0) ||
+ (after_dim0 == before_dim2 && after_dim1 == before_dim0 * before_dim1))
+ {
+ for (uint32_t i = 0; i < after_dim0; ++i)
+ for (uint32_t j = 0; j < after_dim1; ++j)
+ if (after->at<loco::DataType::FLOAT32>(i * after_dim1 + j) !=
+ before->at<loco::DataType::FLOAT32>(j * after_dim0 + i))
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace
+
+namespace
+{
+
// V means the version of BCQ.
template <int32_t V> class BCQFuser;
}
public:
- bool fuseBCQ(loco::Graph *g)
+ void register_bcq_info(loco::Graph *g)
{
-
- const auto output_nodes = loco::output_nodes(g);
- for (auto node : output_nodes)
+ for (auto node : loco::output_nodes(g))
{
auto output_node = loco::must_cast<luci::CircleOutput *>(node);
add_BCQ_info_node(prefix, metadata_type, circle_node);
}
}
+ }
+ bool fuseBCQ(loco::Graph *g)
+ {
if (!is_bcqinfo_valid())
return false;
- for (auto f : _fusable_op)
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
{
- auto prefix = f.first;
- luci::CircleNode *node = f.second;
-
- if (!is_valid_prefix(prefix))
- continue;
-
// Fuse Gather to BCQGather
if (auto gather = dynamic_cast<luci::CircleGather *>(node))
{
if (auto params = dynamic_cast<luci::CircleConst *>(gather->params()))
{
+ auto prefix = get_prefix_of_const(params);
+ if (prefix == -1 || !is_valid_prefix(prefix))
+ continue;
+
auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
bcq_gather->op_version(1);
- bcq_gather->input_scales(_alpha[prefix]);
- bcq_gather->input_binary(_packed_binary_code[prefix]);
+ bcq_gather->input_scales(alpha(g, prefix));
+ bcq_gather->input_binary(packed_binary_code(g, prefix));
bcq_gather->indices(gather->indices());
bcq_gather->input_clusters(packed_clusters(g, prefix));
}
}
- // Einsum is unpacked to FullyConnected, Pack and Reshape
- if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
- {
- node = dynamic_cast<luci::CircleNode *>(reshape->tensor());
- }
- if (auto pack = dynamic_cast<luci::CirclePack *>(node))
- {
- if (pack->values_count() == 1 && pack->rank() == 3)
- {
- node = dynamic_cast<luci::CircleNode *>(pack->values(0));
- }
- }
-
// Fuse FullyConnected to BCQFullyConnected
if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node))
{
if (auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights()))
{
+ auto prefix = get_prefix_of_const(weights);
+ if (prefix == -1 || !is_valid_prefix(prefix))
+ continue;
+
auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
bcq_fc->op_version(1);
- bcq_fc->weights_scales(_alpha[prefix]);
- bcq_fc->weights_binary(_packed_binary_code[prefix]);
+ bcq_fc->weights_scales(alpha(g, prefix));
+ bcq_fc->weights_binary(packed_binary_code(g, prefix));
bcq_fc->bias(fully_connected->bias());
bcq_fc->weights_clusters(packed_clusters(g, prefix));
bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
}
// If x_w formation, we should insert Transpose in front and back of BCQFullyConnected
- if (_do_w_x[prefix]->at<loco::DataType::BOOL>(0))
- {
- bcq_fc->weights_hidden_size(weights->dim(0).value());
- bcq_fc->input(bcq_input);
- loco::replace(fully_connected).with(bcq_fc);
- }
- else
- {
- bcq_fc->weights_hidden_size(weights->dim(1).value());
+ bcq_fc->weights_hidden_size(weights->dim(1).value());
- auto perm = g->nodes()->create<luci::CircleConst>();
- perm->dtype(loco::DataType::S32);
- perm->size<loco::DataType::S32>(2);
- perm->rank(1);
- perm->dim(0) = 2;
- perm->at<loco::DataType::S32>(0) = 1;
- perm->at<loco::DataType::S32>(1) = 0;
- perm->shape_status(luci::ShapeStatus::VALID);
+ auto perm = g->nodes()->create<luci::CircleConst>();
+ perm->dtype(loco::DataType::S32);
+ perm->size<loco::DataType::S32>(2);
+ perm->rank(1);
+ perm->dim(0) = 2;
+ perm->at<loco::DataType::S32>(0) = 1;
+ perm->at<loco::DataType::S32>(1) = 0;
+ perm->shape_status(luci::ShapeStatus::VALID);
- auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
- input_transpose->a(bcq_input);
- input_transpose->perm(perm);
+ auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+ input_transpose->a(bcq_input);
+ input_transpose->perm(perm);
- bcq_fc->input(input_transpose);
+ bcq_fc->input(input_transpose);
- auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
- output_transpose->a(bcq_fc);
- output_transpose->perm(perm);
+ auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+ output_transpose->a(bcq_fc);
+ output_transpose->perm(perm);
- loco::replace(fully_connected).with(output_transpose);
- }
+ loco::replace(fully_connected).with(output_transpose);
return true;
}
- else
+ else if (auto weights_as_input =
+ dynamic_cast<luci::CircleConst *>(fully_connected->input()))
{
- // TODO Is there any case that input() is constant, instead of weights()?
+ auto prefix = get_prefix_of_const(weights_as_input);
+ if (prefix == -1 || !is_valid_prefix(prefix))
+ continue;
+
+ assert(_do_w_x[prefix]->at<loco::DataType::BOOL>(0) == true);
+
+ auto perm = g->nodes()->create<luci::CircleConst>();
+ perm->dtype(loco::DataType::S32);
+ perm->size<loco::DataType::S32>(2);
+ perm->rank(1);
+ perm->dim(0) = 2;
+ perm->at<loco::DataType::S32>(0) = 1;
+ perm->at<loco::DataType::S32>(1) = 0;
+ perm->shape_status(luci::ShapeStatus::VALID);
+
+ auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+ input_transpose->a(fully_connected->weights());
+ input_transpose->perm(perm);
+
+ auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+
+ assert(dynamic_cast<luci::CircleOutputExclude *>(fully_connected->bias()) != nullptr);
+
+ bcq_fc->op_version(1);
+ bcq_fc->weights_scales(alpha(g, prefix));
+ bcq_fc->weights_binary(packed_binary_code(g, prefix));
+ bcq_fc->bias(fully_connected->bias());
+ bcq_fc->weights_clusters(packed_clusters(g, prefix));
+ bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
+
+ bcq_fc->weights_hidden_size(weights_as_input->dim(1).value());
+ bcq_fc->input(input_transpose);
+ loco::replace(fully_connected).with(bcq_fc);
+
+ return true;
}
}
}
_dequant_weight[prefix] = const_node;
}
+ int32_t get_prefix_of_const(luci::CircleConst *w_after)
+ {
+ for (auto n : _fusable_op)
+ {
+ auto prefix = n.first;
+ auto w_before = loco::must_cast<luci::CircleConst *>(n.second);
+ if (is_fusable_const(w_before, w_after, _do_w_x[prefix]->at<loco::DataType::BOOL>(0)))
+ return prefix;
+ }
+
+ return -1;
+ }
+
bool is_bcqinfo_valid()
{
LOGGER(l);
}
}
+ for (auto n : _fusable_op)
+ {
+ // fusable_op should be FLOAT32 type
+ if (n.second->dtype() != loco::DataType::FLOAT32)
+ {
+ WARN(l) << "FuseBCQPass : fusable_op has wrong type" << std::endl;
+ return false;
+ }
+ }
+
// As dequant_weight is not used for fusing, skip validation.
return true;
return false;
}
+ if (_fusable_op.find(prefix) == _fusable_op.end())
+ {
+ WARN(l) << "fusable_op is not found" << std::endl;
+ return false;
+ }
+
// As dequant_weight is not used for fusing, skip validation.
return true;
}
private:
+ luci::CircleConst *alpha(loco::Graph *graph, int32_t prefix)
+ {
+ auto new_alpha = graph->nodes()->create<luci::CircleConst>();
+
+ new_alpha->dtype(loco::DataType::FLOAT32);
+ new_alpha->size<loco::DataType::FLOAT32>(_alpha[prefix]->size<loco::DataType::FLOAT32>());
+ new_alpha->rank(1);
+ new_alpha->dim(0) = _alpha[prefix]->dim(0);
+ for (uint32_t i = 0; i < _alpha[prefix]->size<loco::DataType::FLOAT32>(); ++i)
+ new_alpha->at<loco::DataType::FLOAT32>(i) = _alpha[prefix]->at<loco::DataType::FLOAT32>(i);
+ new_alpha->shape_status(luci::ShapeStatus::VALID);
+
+ return new_alpha;
+ }
+
+ luci::CircleConst *packed_binary_code(loco::Graph *graph, int32_t prefix)
+ {
+ auto new_beta = graph->nodes()->create<luci::CircleConst>();
+
+ new_beta->dtype(loco::DataType::S32);
+ new_beta->size<loco::DataType::S32>(_packed_binary_code[prefix]->size<loco::DataType::S32>());
+ new_beta->rank(2);
+ new_beta->dim(0) = _packed_binary_code[prefix]->dim(0);
+ new_beta->dim(1) = _packed_binary_code[prefix]->dim(1);
+ for (uint32_t i = 0; i < _packed_binary_code[prefix]->size<loco::DataType::S32>(); ++i)
+ new_beta->at<loco::DataType::S32>(i) =
+ _packed_binary_code[prefix]->at<loco::DataType::S32>(i);
+ new_beta->shape_status(luci::ShapeStatus::VALID);
+
+ return new_beta;
+ }
+
luci::CircleConst *packed_clusters(loco::Graph *graph, int32_t prefix)
{
auto qbits_of_clusters = _qbits_of_clusters[prefix];
namespace luci
{
-bool FuseBCQPass::run(loco::Graph *g)
+bool FuseBCQPass::run(luci::Module *m)
{
bool changed = false;
const int32_t start_magicnum = -2e9 + 27;
const int32_t end_magicnum = 2e9 - 27;
+ loco::Graph *main_graph = m->graph(0);
+
luci::CircleConst *metadata_node = nullptr;
- for (auto node : loco::output_nodes(g))
+ for (auto node : loco::output_nodes(main_graph))
{
auto output_node = loco::must_cast<luci::CircleOutput *>(node);
const auto bundle_cnt = metadata_node->at<loco::DataType::S32>(3);
BCQFuser<1> fuser{original_output_cnt, bundle_cnt};
- if (fuser.fuseBCQ(g))
- changed = true;
+ fuser.register_bcq_info(main_graph);
+
+ for (size_t g = 0; g < m->size(); ++g)
+ if (fuser.fuseBCQ(m->graph(g)))
+ changed = true;
}
else
{
// Remove all of BCQ information nodes iff there is no change
if (changed == false)
{
- for (auto node : loco::output_nodes(g))
+ for (auto node : loco::output_nodes(main_graph))
{
auto output_node = loco::must_cast<luci::CircleOutput *>(node);
if (output_node->index() == 0 || (int)output_node->index() > original_output_cnt)
{
- auto noOp = g->nodes()->create<luci::CircleOutputExclude>();
+ auto noOp = main_graph->nodes()->create<luci::CircleOutputExclude>();
noOp->dtype(loco::DataType::FLOAT32); // TODO Remove this setting
output_node->from(noOp);
changed = true;
return changed;
}
+bool FuseBCQPass::run(loco::Graph *)
+{
+ // Do nothing for graph
+ return false;
+}
+
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/MigrateLegacyShapeDtypePass.h"
+
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/TypeInference.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco.h>
+
+namespace
+{
+
+bool has_same_shape(luci::CircleNode *node, loco::TensorShape shape)
+{
+ if (node->rank() != shape.rank())
+ return false;
+
+ for (uint32_t i = 0; i < shape.rank(); ++i)
+ if (!(node->dim(i) == shape.dim(i)))
+ return false;
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool MigrateLegacyShapeDtypePass::run(luci::Module *m)
+{
+ bool changed = false;
+
+ for (size_t g = 0; g < m->size(); ++g)
+ {
+ if (run(m->graph(g)))
+ changed = true;
+ }
+
+ return changed;
+}
+
+bool MigrateLegacyShapeDtypePass::run(loco::Graph *g)
+{
+ bool changed = false;
+
+ for (auto node : loco::all_nodes(g))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (loco::shape_known(node))
+ {
+ auto loco_shape = loco::shape_get(node).as<loco::TensorShape>();
+
+ assert(circle_node->shape_signature().rank() == 0 ||
+ circle_node->shape_signature().rank() == loco_shape.rank());
+
+ // When shape of loco is copied to circle node, ShapeSignature should be applied.
+ loco::TensorShape new_shape;
+ new_shape.rank(loco_shape.rank());
+ for (uint32_t i = 0; i < loco_shape.rank(); ++i)
+ {
+ if (circle_node->shape_signature().rank() > 0 &&
+ circle_node->shape_signature().dim(i) == -1)
+ new_shape.dim(i) = 1;
+ else
+ new_shape.dim(i) = loco_shape.dim(i);
+ }
+
+ if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED ||
+ !has_same_shape(circle_node, new_shape))
+ {
+ circle_node->rank(new_shape.rank());
+ for (uint32_t i = 0; i < new_shape.rank(); ++i)
+ circle_node->dim(i) = new_shape.dim(i);
+
+ if (circle_node->shape_status() == luci::ShapeStatus::UNDEFINED)
+ circle_node->shape_status(luci::ShapeStatus::VALID);
+
+ changed = true;
+ }
+ }
+
+ if (loco::dtype_known(node))
+ {
+ if (loco::dtype_get(node) != circle_node->dtype())
+ {
+ circle_node->dtype(loco::dtype_get(node));
+ changed = true;
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModulePhase.h"
+
+namespace luci
+{
+
+void PhaseRunner<logo::PhaseStrategy::Saturate>::run(const Phase &phase) const
+{
+ notifyPhaseBegin();
+
+ for (bool changed = true; changed;)
+ {
+ changed = false;
+
+ for (auto &pass : phase)
+ {
+ notifyPassBegin(pass.get());
+
+ bool pass_changed = pass->run(_module);
+ changed = changed || pass_changed;
+
+ notifyPassEnd(pass.get(), pass_changed);
+ }
+ }
+
+ notifyPhaseEnd();
+}
+
+void PhaseRunner<logo::PhaseStrategy::Restart>::run(const Phase &phase) const
+{
+ notifyPhaseBegin();
+
+ for (bool changed = true; changed;)
+ {
+ changed = false;
+
+ for (auto &pass : phase)
+ {
+ notifyPassBegin(pass.get());
+
+ bool pass_changed = pass->run(_module);
+ changed = changed || pass_changed;
+
+ notifyPassEnd(pass.get(), pass_changed);
+
+ if (changed)
+ {
+ break;
+ }
+ }
+ }
+
+ notifyPhaseEnd();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MODULE_PHASE_H__
+#define __MODULE_PHASE_H__
+
+#include <luci/ModulePass.h>
+
+#include <logo/Phase.h>
+
+#include <vector>
+
+namespace luci
+{
+
+using Phase = std::vector<std::unique_ptr<Pass>>;
+
+template <logo::PhaseStrategy S> class PhaseRunner;
+
+template <>
+class PhaseRunner<logo::PhaseStrategy::Saturate> final : public logo::PhaseRunnerMixinObservable
+{
+public:
+ PhaseRunner(luci::Module *module) : _module{module}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void run(const Phase &) const;
+
+private:
+ luci::Module *_module;
+};
+
+template <>
+class PhaseRunner<logo::PhaseStrategy::Restart> final : public logo::PhaseRunnerMixinObservable
+{
+public:
+ PhaseRunner(luci::Module *module) : _module{module}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void run(const Phase &) const;
+
+private:
+ luci::Module *_module;
+};
+
+} // namespace luci
+
+#endif // __MODULE_PHASE_H__
INFO(prime) << luci::fmt(graph());
}
+void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "==============================================================";
+ INFO(prime) << "ModulePhaseRunner<" << to_str(strategy()) << ">";
+ INFO(prime) << "Initial graphs";
+ for (size_t g = 0; g < module()->size(); ++g)
+ {
+ INFO(prime) << "graphs #" << g;
+ INFO(prime) << luci::fmt(module()->graph(g));
+ }
+}
+
+void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "ModulePhaseRunner<" << to_str(strategy()) << "> - done";
+}
+
+void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "--------------------------------------------------------------";
+ INFO(prime) << "Before " << logo::pass_name(info->pass());
+}
+
+void ModuleProgressReporter::notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *info)
+{
+ LOGGER(prime);
+
+ INFO(prime) << "After " << logo::pass_name(info->pass())
+ << " (changed: " << to_char(info->changed()) << ")";
+ for (size_t g = 0; g < module()->size(); ++g)
+ {
+ INFO(prime) << "graphs #" << g;
+ INFO(prime) << luci::fmt(module()->graph(g));
+ }
+}
+
} // namespace luci
#include <loco.h>
+#include <luci/IR/Module.h>
+
namespace luci
{
logo::PhaseStrategy _strategy;
};
+class ModuleProgressReporter : public logo::PhaseEventListener
+{
+public:
+ ModuleProgressReporter(luci::Module *module, logo::PhaseStrategy strategy)
+ : _module{module}, _strategy{strategy}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PhaseEnd> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassBegin> *) override;
+ void notify(const logo::PhaseEventInfo<logo::PhaseEvent::PassEnd> *) override;
+
+public:
+ luci::Module *module(void) const { return _module; }
+ logo::PhaseStrategy strategy(void) const { return _strategy; }
+
+private:
+ luci::Module *_module;
+ logo::PhaseStrategy _strategy;
+};
+
} // namespace luci
#endif // __LUCI_PROGRESSREPORTER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQuantParamPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <iostream>
+
+namespace
+{
+
+bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst)
+{
+ assert(src->scale.size() == dst->scale.size());
+ assert(src->zerop.size() == dst->zerop.size());
+
+ // src and dst have the same qparam
+ if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) &&
+ std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) &&
+ src->quantized_dimension == dst->quantized_dimension)
+ return false;
+
+ dst->scale.assign(src->scale.begin(), src->scale.end());
+ dst->zerop.assign(src->zerop.begin(), src->zerop.end());
+ dst->quantized_dimension = src->quantized_dimension;
+ return true;
+}
+
+bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst)
+{
+ // Skip nodes that do not have quantparams
+ auto src_qparam = src->quantparam();
+ if (not src_qparam)
+ return false;
+
+ auto dst_qparam = dst->quantparam();
+ if (not dst_qparam)
+ return false;
+
+ return copy_qparam(src_qparam, dst_qparam);
+}
+
+// Visitor to propagate quantization parameters
+struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor<bool>
+{
+ PropagateQuantParam() = default;
+
+ bool visit(luci::CircleNode *) { return false; }
+
+ bool visit(luci::CircleReshape *node)
+ {
+ auto input = node->tensor();
+ if (loco::succs(input).size() != 1)
+ return false;
+
+ auto input_node = loco::must_cast<luci::CircleNode *>(input);
+ return copy_qparam(node, input_node);
+ }
+
+ // TODO : Add more Ops (e.g., Transpose)
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool PropagateQuantParamPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ LOGGER(l);
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ INFO(l) << "PropagateQuantParamPass visit node: " << circle_node->name() << std::endl;
+
+ PropagateQuantParam pqp;
+ changed = circle_node->accept(&pqp);
+ if (changed)
+ break;
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQuantParamPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale,
+ const std::vector<int64_t> &zp)
+{
+ assert(node->quantparam() == nullptr);
+
+ auto quantparam = std::make_unique<luci::CircleQuantParam>();
+ quantparam->scale = scale;
+ quantparam->zerop = zp;
+ node->quantparam(std::move(quantparam));
+}
+
+/**
+ * Simple graph for test
+ *
+ * BEFORE
+ *
+ * [Conv] (qparam 1)
+ * |
+ * [Reshape] (qparam 2)
+ *
+ * AFTER
+ *
+ * [Conv] (qparam 2)
+ * |
+ * [Reshape] (qparam 2)
+ *
+ */
+class SimpleGraph
+{
+public:
+ SimpleGraph()
+ {
+ input = g.nodes()->create<luci::CircleInput>();
+ conv = g.nodes()->create<luci::CircleConv2D>();
+ reshape = g.nodes()->create<luci::CircleReshape>();
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20});
+ addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10});
+
+ conv->input(input);
+ reshape->tensor(conv);
+ output->from(reshape);
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input;
+ luci::CircleConv2D *conv;
+ luci::CircleReshape *reshape;
+ luci::CircleOutput *output;
+};
+
+} // namespace
+
+TEST(PropagateQuantParam, simple)
+{
+ SimpleGraph g;
+
+ luci::PropagateQuantParamPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[0]);
+ EXPECT_FLOAT_EQ(0.4, g.conv->quantparam()->scale[1]);
+ EXPECT_FLOAT_EQ(0.6, g.conv->quantparam()->scale[2]);
+ EXPECT_EQ(-10, g.conv->quantparam()->zerop[0]);
+ EXPECT_EQ(0, g.conv->quantparam()->zerop[1]);
+ EXPECT_EQ(10, g.conv->quantparam()->zerop[2]);
+}
+
+TEST(PropagateQuantParam, wrong_op_NEG)
+{
+ SimpleGraph g;
+ g.output->from(g.conv);
+ g.reshape->drop();
+
+ luci::PropagateQuantParamPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]);
+ EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]);
+ EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]);
+ EXPECT_EQ(0, g.conv->quantparam()->zerop[0]);
+ EXPECT_EQ(10, g.conv->quantparam()->zerop[1]);
+ EXPECT_EQ(20, g.conv->quantparam()->zerop[2]);
+}
}
}
+// Quantize const per channel
+//
+// The last dimension of const is the same as the dimension of channel
+// And the rest of the const dimensions should be 1
+// So, a 'single value' is quantized per channel
+//
+// Quantization spec (f: fp value, q: quantized value)
+//
+// uint8
+// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
+// Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1]
+//
+// int16
+// Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
+// Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0]
+void quant_const_per_channel(CircleConst *node, loco::DataType quant_type)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+ assert(node->rank() > 0);
+
+ for (uint32_t i = 0; i < node->rank() - 1; i++)
+ {
+ // Caller should call this function when the below condition is satisfied
+ if (node->dim(i).value() != 1)
+ throw std::runtime_error("Non-channel dimension of const node must be 1");
+ }
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ assert(size == node->dim(node->rank() - 1).value());
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->quantized_dimension = node->rank() - 1;
+ std::vector<int32_t> quantized_data(size);
+
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ auto data = node->at<loco::DataType::FLOAT32>(i);
+ if (quant_type == loco::DataType::U8)
+ {
+ if (data >= 0)
+ {
+ quantparam->scale.push_back(data);
+ quantparam->zerop.push_back(0);
+ quantized_data[i] = 1;
+ }
+ else
+ {
+ quantparam->scale.push_back(-data);
+ quantparam->zerop.push_back(1);
+ quantized_data[i] = 0;
+ }
+ }
+ else if (quant_type == loco::DataType::S16)
+ {
+ if (data >= 0)
+ {
+ quantparam->scale.push_back(data);
+ quantized_data[i] = 1;
+ }
+ else
+ {
+ quantparam->scale.push_back(-data);
+ quantized_data[i] = -1;
+ }
+ quantparam->zerop.push_back(0);
+ }
+ }
+ node->quantparam(std::move(quantparam));
+
+ switch (quant_type)
+ {
+ case loco::DataType::U8:
+ node->dtype(loco::DataType::U8);
+ node->size<loco::DataType::U8>(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ assert(quantized_data[i] == 0 || quantized_data[i] == 1);
+ node->at<loco::DataType::U8>(i) = quantized_data[i];
+ }
+ break;
+ case loco::DataType::S16:
+ node->dtype(loco::DataType::S16);
+ node->size<loco::DataType::S16>(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ assert(quantized_data[i] == -1 || quantized_data[i] == 1);
+ node->at<loco::DataType::S16>(i) = quantized_data[i];
+ }
+ break;
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+}
+
void quant_const(CircleConst *node, loco::DataType quant_type)
{
assert(node->dtype() == loco::DataType::FLOAT32);
}
};
+void quant_instnorm(luci::CircleInstanceNorm *node, loco::DataType output_type,
+ QuantizationGranularity granularity)
+{
+ auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
+ auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
+ assert(gamma->dtype() == loco::DataType::FLOAT32);
+ assert(beta->dtype() == loco::DataType::FLOAT32);
+
+ if (granularity == QuantizationGranularity::LayerWise)
+ {
+ quant_const(gamma, output_type);
+ quant_const(beta, output_type);
+ }
+ else if (granularity == QuantizationGranularity::ChannelWise)
+ {
+ quant_const_per_channel(gamma, output_type);
+ quant_const_per_channel(beta, output_type);
+ }
+ else
+ throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+}
+
+void quant_prelu(luci::CirclePRelu *node, loco::DataType output_type,
+ QuantizationGranularity granularity)
+{
+ auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
+ assert(alpha->dtype() == loco::DataType::FLOAT32);
+
+ if (granularity == QuantizationGranularity::LayerWise)
+ {
+ quant_const(alpha, output_type);
+ }
+ else if (granularity == QuantizationGranularity::ChannelWise)
+ {
+ quant_const_per_channel(alpha, output_type);
+ }
+ else
+ throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+}
+
/**
* @brief Quantize const input tensors using min/max of const values
*/
-void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
+void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type,
+ QuantizationGranularity granularity)
{
auto opcode = node->opcode();
auto arity = node->arity();
quant_const(const_node, output_type);
break;
+ case luci::CircleOpcode::INSTANCE_NORM:
+ quant_instnorm(loco::must_cast<luci::CircleInstanceNorm *>(node), output_type, granularity);
+ break;
+
+ case luci::CircleOpcode::PRELU:
+ quant_prelu(loco::must_cast<luci::CirclePRelu *>(node), output_type, granularity);
+ break;
+
case luci::CircleOpcode::ADD:
case luci::CircleOpcode::ADD_N:
case luci::CircleOpcode::DIV:
case luci::CircleOpcode::EQUAL:
case luci::CircleOpcode::GREATER:
case luci::CircleOpcode::GREATER_EQUAL:
- case luci::CircleOpcode::INSTANCE_NORM:
case luci::CircleOpcode::LESS:
case luci::CircleOpcode::LESS_EQUAL:
case luci::CircleOpcode::MAXIMUM:
case luci::CircleOpcode::MINIMUM:
case luci::CircleOpcode::MUL:
case luci::CircleOpcode::NOT_EQUAL:
- case luci::CircleOpcode::PRELU:
case luci::CircleOpcode::SUB:
// Quantize all const inputs using their values
for (uint32_t i = 0; i < arity; i++)
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- quantize_const_inputs(circle_node, _output_dtype);
+ quantize_const_inputs(circle_node, _output_dtype, _granularity);
}
// Propagate quantization parameters of concat Op
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/// @brief Return true if first_perm[second_perm[i]] == i
+bool check_perm(const luci::CircleConst *first_perm, const luci::CircleConst *second_perm)
+{
+ assert(first_perm->rank() == 1);
+ assert(second_perm->rank() == 1);
+ assert(second_perm->size<loco::DataType::S32>() == first_perm->size<loco::DataType::S32>());
+ for (int32_t i = 0; i < static_cast<int32_t>(first_perm->size<loco::DataType::S32>()); i++)
+ {
+ if (first_perm->at<loco::DataType::S32>(second_perm->at<loco::DataType::S32>(i)) != i)
+ return false;
+ }
+ return true;
+}
+
+bool remove_consecutive_transpose_function(luci::CircleNode *node)
+{
+ auto target_node = dynamic_cast<luci::CircleTranspose *>(node);
+ if (target_node == nullptr)
+ return false;
+ auto pred_node = dynamic_cast<luci::CircleTranspose *>(target_node->a());
+ if (pred_node == nullptr)
+ return false;
+ if (loco::succs(pred_node).size() != 1)
+ return false;
+
+ auto pred_perm = dynamic_cast<luci::CircleConst *>(target_node->perm());
+ if (pred_perm == nullptr)
+ return false;
+
+ auto main_perm = dynamic_cast<luci::CircleConst *>(pred_node->perm());
+ if (main_perm == nullptr)
+ return false;
+
+ auto main_node = loco::must_cast<luci::CircleNode *>(pred_node->a());
+ if (check_perm(pred_perm, main_perm))
+ {
+ replace(node).with(main_node);
+ }
+ else
+ {
+ auto g = main_perm->graph();
+ auto new_const_node = g->nodes()->create<luci::CircleConst>();
+
+ new_const_node->dtype(loco::DataType::S32);
+ new_const_node->rank(1);
+ new_const_node->dim(0) = main_perm->dim(0);
+ new_const_node->size<loco::DataType::S32>(main_perm->dim(0).value());
+ new_const_node->shape_status(luci::ShapeStatus::VALID);
+ for (uint32_t i = 0; i < main_perm->size<loco::DataType::S32>(); i++)
+ {
+ new_const_node->at<loco::DataType::S32>(i) =
+ pred_perm->at<loco::DataType::S32>(main_perm->at<loco::DataType::S32>(i));
+ }
+ pred_node->perm(new_const_node);
+ replace(node).with(pred_node);
+ }
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * (main_node) (main_perm)
+ * \ /
+ * [CircleTranspose] [CircleConst]
+ * (pred_node) (pred_perm)
+ * \ /
+ * [CircleTranspose]
+ * (target_node)
+ * |
+ *
+ * AFTER
+ * <Optional Case>
+ *
+ * | | |
+ * [CircleNode] [CircleConst] |
+ * (main_node) (new_const_node) |
+ * \ / or [CircleNode]
+ * [CircleTranspose] (main_node)
+ * (pred_node) |
+ * | |
+ *
+ */
+bool RemoveRedundantTransposePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (remove_consecutive_transpose_function(circle_node))
+ {
+ changed = true;
+ break;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveRedundantTransposePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void setValue(luci::CircleConst *node, const std::vector<int> &v)
+{
+ node->dtype(loco::DataType::S32);
+ node->size<loco::DataType::S32>(v.size());
+ node->rank(1);
+ node->dim(0).set(v.size());
+ for (int i = 0; i < v.size(); ++i)
+ {
+ node->at<loco::DataType::S32>(i) = v[i];
+ }
+}
+
+/**
+ * Type1
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleTranspose] [CircleConst]
+ * \ /
+ * [CircleTranspose]
+ * |
+ *
+ * AFTER
+ * |
+ * [CircleNode]
+ * | Remove Both
+ *
+ * --------------------------------------------
+ *
+ * Type2
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleTranspose] [CircleConst]
+ * \ /
+ * [CircleTranspose]
+ * |
+ *
+ * AFTER
+ * | |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleTranspose]
+ * |
+ *
+ */
+void create_redundunt_transpose(loco::Graph *g, const std::vector<int32_t> &perm1,
+ const std::vector<int32_t> &perm2)
+{
+ assert(g);
+
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto graph_input = g->inputs()->create();
+ input->index(graph_input->index());
+
+ // Create perm1
+ auto perm1_node = g->nodes()->create<luci::CircleConst>();
+ setValue(perm1_node, perm1);
+
+ auto transpose1 = g->nodes()->create<luci::CircleTranspose>();
+ transpose1->dtype(loco::DataType::FLOAT32);
+ transpose1->a(input);
+ transpose1->perm(perm1_node);
+
+ // Create perm2
+ auto perm2_node = g->nodes()->create<luci::CircleConst>();
+ setValue(perm2_node, perm2);
+
+ auto transpose2 = g->nodes()->create<luci::CircleTranspose>();
+ transpose2->dtype(loco::DataType::FLOAT32);
+ transpose2->a(transpose1);
+ transpose2->perm(perm2_node);
+
+ // Output
+ auto output = g->nodes()->create<luci::CircleOutput>();
+ output->from(transpose2);
+ auto graph_output = g->outputs()->create();
+ output->index(graph_output->index());
+}
+
+} // namespace
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type1)
+{
+ auto graph = loco::make_graph();
+ create_redundunt_transpose(graph.get(), {1, 0, 2, 3}, {1, 0, 2, 3});
+
+ luci::RemoveRedundantTransposePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleTranspose *transpose_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+ if (not trans)
+ continue;
+ transpose_node = trans;
+ break;
+ }
+ // No transpose node is in graph.
+ ASSERT_EQ(nullptr, transpose_node);
+}
+
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2)
+{
+ auto graph = loco::make_graph();
+ create_redundunt_transpose(graph.get(), {0, 1, 3, 2}, {1, 0, 2, 3});
+
+ luci::RemoveRedundantTransposePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleTranspose *transpose_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+ if (not trans)
+ continue;
+ transpose_node = trans;
+ break;
+ }
+ // Just one transpose node, with updated perm constant.
+ ASSERT_NE(nullptr, transpose_node);
+ auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+ ASSERT_EQ(1, perm->at<loco::DataType::S32>(0));
+ ASSERT_EQ(0, perm->at<loco::DataType::S32>(1));
+ ASSERT_EQ(3, perm->at<loco::DataType::S32>(2));
+ ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma)
+{
+ assert(gamma->rank() == 1);
+ auto channel_size = gamma->dim(0).value();
+
+ // Channel-wise MUL is the same as DEPTHWISE_CONV2D with filter shape (1,1,1,channel_size)
+ auto weights = gamma->graph()->nodes()->create<luci::CircleConst>();
+ weights->dtype(loco::DataType::FLOAT32);
+ weights->rank(4);
+ weights->dim(0).set(1);
+ weights->dim(1).set(1);
+ weights->dim(2).set(1);
+ weights->dim(3).set(channel_size);
+ weights->shape_status(luci::ShapeStatus::VALID);
+ weights->size<loco::DataType::FLOAT32>(channel_size);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ weights->at<loco::DataType::FLOAT32>(i) = gamma->at<loco::DataType::FLOAT32>(i);
+ }
+
+ return weights;
+}
+
+luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta)
+{
+ assert(beta->rank() == 1);
+ auto channel_size = beta->dim(0).value();
+
+ // Channel-wise ADD is the same as bias (shape = (channel_size)) of DEPTHWISE_CONV2D
+ auto bias = beta->graph()->nodes()->create<luci::CircleConst>();
+ bias->dtype(loco::DataType::FLOAT32);
+ bias->rank(1);
+ bias->dim(0).set(channel_size);
+ bias->size<loco::DataType::FLOAT32>(channel_size);
+ bias->shape_status(luci::ShapeStatus::VALID);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ bias->at<loco::DataType::FLOAT32>(i) = beta->at<loco::DataType::FLOAT32>(i);
+ }
+
+ return bias;
+}
+
+bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::CircleConst *&beta)
+{
+ auto x = loco::must_cast<luci::CircleNode *>(add->x());
+ auto y = loco::must_cast<luci::CircleNode *>(add->y());
+
+ luci::CircleMul *pred = nullptr;
+ luci::CircleConst *constant = nullptr;
+
+ if (x->opcode() == luci::CircleOpcode::CIRCLECONST && y->opcode() == luci::CircleOpcode::MUL)
+ {
+ pred = loco::must_cast<luci::CircleMul *>(y);
+ constant = loco::must_cast<luci::CircleConst *>(x);
+ }
+ else if (x->opcode() == luci::CircleOpcode::MUL && y->opcode() == luci::CircleOpcode::CIRCLECONST)
+ {
+ pred = loco::must_cast<luci::CircleMul *>(x);
+ constant = loco::must_cast<luci::CircleConst *>(y);
+ }
+ else
+ {
+ return false;
+ }
+
+ if (constant->rank() != 1)
+ return false;
+
+ auto channel_dim = constant->dim(0);
+ // Assumption: Layout is channel-last
+ if (!(channel_dim == add->dim(add->rank() - 1)))
+ return false;
+
+ mul = pred;
+ beta = constant;
+ return true;
+}
+
+// Check if mul is batchnorm mul
+bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
+ luci::CircleConst *&gamma)
+{
+ auto x = dynamic_cast<luci::CircleConst *>(mul->x());
+ auto y = dynamic_cast<luci::CircleConst *>(mul->y());
+
+ luci::CircleNode *pred = nullptr;
+ luci::CircleConst *constant = nullptr;
+
+ if (x != nullptr && y == nullptr)
+ {
+ pred = loco::must_cast<luci::CircleNode *>(mul->y());
+ constant = x;
+ }
+ else if (x == nullptr && y != nullptr)
+ {
+ pred = loco::must_cast<luci::CircleNode *>(mul->x());
+ constant = y;
+ }
+ else
+ {
+ return false;
+ }
+
+ if (constant->rank() != 1)
+ return false;
+
+ auto channel_dim = constant->dim(0);
+ if (!(channel_dim == mul->dim(mul->rank() - 1)))
+ return false;
+
+ pred_node = pred;
+ gamma = constant;
+ return true;
+}
+
+/**
+ * Replace channel-wise Mul/Add with DepthwiseConv2D
+ *
+ * BEFORE
+ *
+ * [Node] [gamma]
+ * | /
+ * [Mul] [beta]
+ * | /
+ * [Add]
+ *
+ * AFTER
+ *
+ * [Node] [weights] [bias]
+ * \ / /
+ * [DepthwiseConv2D]
+ */
+bool replace_mul_add_with_dwconv(luci::CircleAdd *add)
+{
+ luci::CircleNode *pred_node = nullptr;
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *beta = nullptr;
+ luci::CircleConst *gamma = nullptr;
+
+ if (!is_batchnorm_add(add, mul, beta))
+ return false;
+
+ if (loco::succs(mul).size() != 1)
+ return false;
+
+ if (!is_batchnorm_mul(mul, pred_node, gamma))
+ return false;
+
+ if (pred_node->rank() != 4)
+ return false;
+
+ if (pred_node->dtype() != loco::DataType::FLOAT32 || beta->dtype() != loco::DataType::FLOAT32 ||
+ gamma->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ auto weights = create_weights_from_gamma(gamma);
+ auto bias = create_bias_from_beta(beta);
+
+ auto dwconv = add->graph()->nodes()->create<luci::CircleDepthwiseConv2D>();
+ dwconv->input(pred_node);
+ dwconv->filter(weights);
+ dwconv->bias(bias);
+ dwconv->padding(luci::Padding::SAME);
+ dwconv->stride()->w(1);
+ dwconv->stride()->h(1);
+ dwconv->depthMultiplier(1);
+ dwconv->dilation()->w(1);
+ dwconv->dilation()->h(1);
+ dwconv->fusedActivationFunction(add->fusedActivationFunction());
+
+ loco::replace(add).with(dwconv);
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ReplaceMulAddWithDepthwiseConvPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto add = dynamic_cast<luci::CircleAdd *>(node);
+ if (not add)
+ continue;
+
+ if (replace_mul_add_with_dwconv(add))
+ {
+ changed = true;
+ break;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * Simple graph for test
+ *
+ * BEFORE
+ *
+ * [Node] [gamma]
+ * | /
+ * [Mul] [beta]
+ * | /
+ * [Add]
+ *
+ * AFTER
+ *
+ * [Node] [weights] [bias]
+ * \ / /
+ * [DepthwiseConv2D]
+ */
+class SimpleGraph
+{
+public:
+ SimpleGraph()
+ {
+ input = g.nodes()->create<luci::CircleInput>();
+ mul = g.nodes()->create<luci::CircleMul>();
+ gamma = g.nodes()->create<luci::CircleConst>();
+ add = g.nodes()->create<luci::CircleAdd>();
+ beta = g.nodes()->create<luci::CircleConst>();
+ output = g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ input->dtype(loco::DataType::FLOAT32);
+ mul->dtype(loco::DataType::FLOAT32);
+ gamma->dtype(loco::DataType::FLOAT32);
+ add->dtype(loco::DataType::FLOAT32);
+ beta->dtype(loco::DataType::FLOAT32);
+ output->dtype(loco::DataType::FLOAT32);
+
+ uint32_t channel_size = 16;
+ input->shape({1, 4, 4, channel_size});
+ mul->shape({1, 4, 4, channel_size});
+ gamma->shape({channel_size});
+ add->shape({1, 4, 4, channel_size});
+ beta->shape({channel_size});
+ output->shape({1, 4, 4, channel_size});
+
+ gamma->size<loco::DataType::FLOAT32>(channel_size);
+ beta->size<loco::DataType::FLOAT32>(channel_size);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ gamma->at<loco::DataType::FLOAT32>(i) = i;
+ beta->at<loco::DataType::FLOAT32>(i) = i;
+ }
+
+ mul->x(input);
+ mul->y(gamma);
+ add->x(mul);
+ add->y(beta);
+ output->from(add);
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleMul *mul = nullptr;
+ luci::CircleConst *gamma = nullptr;
+ luci::CircleAdd *add = nullptr;
+ luci::CircleConst *beta = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(ReplaceMulAddWithDepthwiseConv, simple)
+{
+ SimpleGraph g;
+
+ luci::ReplaceMulAddWithDepthwiseConvPass pass;
+ while (pass.run(&g.g))
+ ;
+
+ auto dwconv = dynamic_cast<luci::CircleDepthwiseConv2D *>(g.output->from());
+ EXPECT_NE(nullptr, dwconv);
+
+ uint32_t channel_size = 16;
+ auto weights = dynamic_cast<luci::CircleConst *>(dwconv->filter());
+ auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias());
+ EXPECT_NE(nullptr, weights);
+ EXPECT_EQ(4, weights->rank());
+ EXPECT_EQ(channel_size, weights->dim(3).value());
+ EXPECT_NE(nullptr, bias);
+ EXPECT_EQ(1, bias->rank());
+ EXPECT_EQ(channel_size, bias->dim(0).value());
+
+ for (int i = 0; i < channel_size; i++)
+ {
+ EXPECT_FLOAT_EQ(i, weights->at<loco::DataType::FLOAT32>(i));
+ EXPECT_FLOAT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
+ }
+}
+
+TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG)
+{
+ SimpleGraph g;
+ // swap mul/add (changed to add->mul)
+ g.add->x(g.input);
+ loco::replace(g.add).with(g.mul);
+ g.mul->x(g.add);
+
+ luci::ReplaceMulAddWithDepthwiseConvPass pass;
+ auto changed = pass.run(&g.g);
+
+ EXPECT_EQ(false, changed);
+}
namespace luci
{
+bool ShapeInferencePass::run(luci::Module *m)
+{
+ bool changed = false;
+
+ for (size_t g = 0; g < m->size(); ++g)
+ {
+ if (run(m->graph(g)))
+ changed = true;
+ }
+
+ return changed;
+}
+
bool ShapeInferencePass::run(loco::Graph *g)
{
loco::CanonicalShapeInferenceRule canonical_rule;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ShapeSignatureInferencePass.h"
+
+#include <luci/IR/CircleShapeSignature.h>
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool ShapeSignatureInferencePass::run(luci::Module *m)
+{
+ bool changed = false;
+
+ for (size_t g = 0; g < m->size(); ++g)
+ {
+ if (run(m->graph(g)))
+ changed = true;
+ }
+
+ return changed;
+}
+
+bool ShapeSignatureInferencePass::run(loco::Graph *g)
+{
+ luci::ssinf::Rule signature_inference_rule;
+ bool changed = false;
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ luci::ShapeSignature shape_signature;
+
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (signature_inference_rule.infer(circle_node, shape_signature))
+ {
+ if (!(circle_node->shape_signature() == shape_signature))
+ {
+ circle_node->shape_signature(shape_signature);
+ changed = true;
+ }
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+bool satisfy_precondition(luci::CircleFullyConnected *fc)
+{
+ // check if it's already been shuffled
+ if (fc->weights_format() != luci::CircleFullyConnected::WeightsFormat::DEFAULT)
+ return false;
+
+ // check if its data type is FLOAT32
+ if (fc->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ auto weights = loco::must_cast<luci::CircleConst *>(fc->weights());
+ // rank must be 2
+ if (weights->rank() != 2)
+ return false;
+
+ // check if it has sparsity parameter
+ if (weights->sparsityparam())
+ return false;
+
+ // check if the number of row of FullyConnected's weight is a multiple of 16
+ const uint32_t MULTIPLE = 16;
+ uint32_t rows = weights->dim(0).value();
+ if (rows % MULTIPLE)
+ return false;
+
+ return true;
+}
+
+// get FullyConnected op vector that has same tensor
+void get_FCs_having_same_tensor(std::vector<luci::CircleFullyConnected *> &fc_vec, loco::Graph *g,
+ luci::CircleFullyConnected *fc)
+{
+ auto the_tensor = fc->weights();
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+ if (not fc)
+ continue;
+
+ if (fc->weights() == the_tensor)
+ fc_vec.push_back(fc);
+ }
+}
+
+luci::CircleConst *shuffle_weight(luci::CircleFullyConnected *fc)
+{
+ auto the_weights = loco::must_cast<luci::CircleConst *>(fc->weights());
+
+ // create CircleConst where shuffled data will be stored
+ luci::CircleConst *new_weights = fc->graph()->nodes()->create<luci::CircleConst>();
+ new_weights->dtype(loco::DataType::FLOAT32);
+ new_weights->size<loco::DataType::FLOAT32>(the_weights->size<loco::DataType::FLOAT32>());
+ new_weights->rank(the_weights->rank());
+ new_weights->shape_status(the_weights->shape_status());
+ for (uint32_t r = 0; r < new_weights->rank(); r++)
+ {
+ new_weights->dim(r).set(the_weights->dim(r).value());
+ }
+
+ // suffle weight
+ const uint32_t MULTIPLE = 16;
+ const uint32_t rows = the_weights->dim(0).value();
+ const uint32_t cols = the_weights->dim(1).value();
+ const uint32_t r_step = rows / MULTIPLE;
+ uint32_t index = 0;
+ for (uint32_t r = 0; r < r_step; r++)
+ {
+ for (uint32_t c = 0; c < cols; c++)
+ {
+ for (uint32_t i = 0; i < MULTIPLE; i++)
+ {
+ new_weights->at<loco::DataType::FLOAT32>(index++) =
+ the_weights->at<loco::DataType::FLOAT32>((r * MULTIPLE + i) * cols + c);
+ }
+ }
+ }
+
+ return new_weights;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ShuffleWeightTo16x1Float32Pass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+ if (not fc)
+ continue;
+
+ if (not satisfy_precondition(fc))
+ continue;
+
+ std::vector<luci::CircleFullyConnected *> fc_vec;
+ get_FCs_having_same_tensor(fc_vec, g, fc);
+ auto new_weights = shuffle_weight(fc);
+
+ // replace to new weights
+ for (const auto fc : fc_vec)
+ {
+ fc->weights(new_weights);
+ fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED16x1FLOAT32);
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+void create_fc_net(loco::Graph *g)
+{
+ assert(g);
+
+ const uint32_t ROW = 16;
+ const uint32_t COL = 2;
+ const uint32_t elements_num = ROW * COL;
+
+ // input
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto graph_input = g->inputs()->create();
+ input->index(graph_input->index());
+
+ // fc weights
+ auto weights = g->nodes()->create<luci::CircleConst>();
+ weights->dtype(loco::DataType::FLOAT32);
+ weights->size<loco::DataType::FLOAT32>(elements_num);
+ weights->rank(2);
+ weights->dim(0).set(ROW);
+ weights->dim(1).set(COL);
+ for (uint32_t idx = 0; idx < elements_num; idx++)
+ {
+ weights->at<loco::DataType::FLOAT32>(idx) = idx;
+ }
+
+ // fc
+ auto fc = g->nodes()->create<luci::CircleFullyConnected>();
+ fc->dtype(loco::DataType::FLOAT32);
+ fc->input(input);
+ fc->weights(weights);
+
+ // output
+ auto output = g->nodes()->create<luci::CircleOutput>();
+ output->from(fc);
+ auto graph_output = g->outputs()->create();
+ output->index(graph_output->index());
+}
+
+TEST(ShuffleWeightTo16x1Float32PassTest, SimpleTest1)
+{
+ auto graph = loco::make_graph();
+ create_fc_net(graph.get());
+
+ luci::CircleFullyConnected *fc_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+ if (not fc)
+ continue;
+
+ fc_node = fc;
+ break;
+ }
+ ASSERT_NE(fc_node, nullptr);
+ auto weights = loco::must_cast<luci::CircleConst *>(fc_node->weights());
+ // before
+ ASSERT_EQ(0, weights->at<loco::DataType::FLOAT32>(0));
+ ASSERT_EQ(1, weights->at<loco::DataType::FLOAT32>(1));
+ ASSERT_EQ(2, weights->at<loco::DataType::FLOAT32>(2));
+ ASSERT_EQ(3, weights->at<loco::DataType::FLOAT32>(3));
+ ASSERT_EQ(4, weights->at<loco::DataType::FLOAT32>(4));
+ ASSERT_EQ(5, weights->at<loco::DataType::FLOAT32>(5));
+ ASSERT_EQ(6, weights->at<loco::DataType::FLOAT32>(6));
+ ASSERT_EQ(7, weights->at<loco::DataType::FLOAT32>(7));
+ ASSERT_EQ(8, weights->at<loco::DataType::FLOAT32>(8));
+ ASSERT_EQ(9, weights->at<loco::DataType::FLOAT32>(9));
+ ASSERT_EQ(10, weights->at<loco::DataType::FLOAT32>(10));
+ ASSERT_EQ(11, weights->at<loco::DataType::FLOAT32>(11));
+ ASSERT_EQ(12, weights->at<loco::DataType::FLOAT32>(12));
+ ASSERT_EQ(13, weights->at<loco::DataType::FLOAT32>(13));
+ ASSERT_EQ(14, weights->at<loco::DataType::FLOAT32>(14));
+ ASSERT_EQ(15, weights->at<loco::DataType::FLOAT32>(15));
+
+ luci::ShuffleWeightTo16x1Float32Pass pass;
+ while (pass.run(graph.get()))
+ ;
+
+ weights = loco::must_cast<luci::CircleConst *>(fc_node->weights());
+ // after
+ ASSERT_EQ(0, weights->at<loco::DataType::FLOAT32>(0));
+ ASSERT_EQ(2, weights->at<loco::DataType::FLOAT32>(1));
+ ASSERT_EQ(4, weights->at<loco::DataType::FLOAT32>(2));
+ ASSERT_EQ(6, weights->at<loco::DataType::FLOAT32>(3));
+ ASSERT_EQ(8, weights->at<loco::DataType::FLOAT32>(4));
+ ASSERT_EQ(10, weights->at<loco::DataType::FLOAT32>(5));
+ ASSERT_EQ(12, weights->at<loco::DataType::FLOAT32>(6));
+ ASSERT_EQ(14, weights->at<loco::DataType::FLOAT32>(7));
+ ASSERT_EQ(16, weights->at<loco::DataType::FLOAT32>(8));
+ ASSERT_EQ(18, weights->at<loco::DataType::FLOAT32>(9));
+ ASSERT_EQ(20, weights->at<loco::DataType::FLOAT32>(10));
+ ASSERT_EQ(22, weights->at<loco::DataType::FLOAT32>(11));
+ ASSERT_EQ(24, weights->at<loco::DataType::FLOAT32>(12));
+ ASSERT_EQ(26, weights->at<loco::DataType::FLOAT32>(13));
+ ASSERT_EQ(28, weights->at<loco::DataType::FLOAT32>(14));
+ ASSERT_EQ(30, weights->at<loco::DataType::FLOAT32>(15));
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstitutePackToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool substitute_pack_to_reshape(luci::CircleNode *node)
+{
+ auto target_node = dynamic_cast<luci::CirclePack *>(node);
+ if (target_node == nullptr)
+ return false;
+ if (target_node->values_count() != 1)
+ return false;
+ auto value_node = loco::must_cast<luci::CircleNode *>(target_node->values(0));
+ if (value_node->shape_status() != luci::ShapeStatus::VALID)
+ return false;
+ int32_t axis = target_node->axis();
+ if (axis < 0)
+ axis = axis + static_cast<int32_t>(value_node->rank()) + 1;
+
+ auto graph = target_node->graph();
+ auto reshape_node = graph->nodes()->create<luci::CircleReshape>();
+ reshape_node->tensor(value_node);
+
+ auto const_node = graph->nodes()->create<luci::CircleConst>();
+ const_node->dtype(loco::DataType::S32);
+ const_node->size<loco::DataType::S32>(value_node->rank() + 1);
+ const_node->shape_status(luci::ShapeStatus::VALID);
+ const_node->rank(1);
+ const_node->dim(0).set(value_node->rank() + 1);
+ for (int32_t i = 0; i < static_cast<int32_t>(value_node->rank()) + 1; i++)
+ {
+ if (i == axis)
+ {
+ const_node->at<loco::DataType::S32>(i) = 1;
+ }
+ else if (i < axis)
+ {
+ const_node->at<loco::DataType::S32>(i) = value_node->dim(i).value();
+ }
+ else
+ {
+ const_node->at<loco::DataType::S32>(i) = value_node->dim(i - 1).value();
+ }
+ }
+ reshape_node->shape(const_node);
+ replace(target_node).with(reshape_node);
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ * |
+ * [CircleNode]
+ * |
+ * [CirclePack]
+ * |
+ * [CircleNode]
+ * |
+ *
+ * AFTER
+ * |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleReshape]
+ * |
+ * [CircleNode]
+ * |
+ *
+ */
+bool SubstitutePackToReshapePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (substitute_pack_to_reshape(circle_node))
+ {
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/SubstitutePackToReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * BEFORE
+ * |
+ * [CircleNode]
+ * |
+ * [CirclePack]
+ * |
+ * [CircleNode]
+ * |
+ *
+ * AFTER
+ * |
+ * [CircleNode] [CircleConst]
+ * \ /
+ * [CircleReshape]
+ * |
+ * [CircleNode]
+ * |
+ *
+ */
+void create_substitute_pack_to_reshape(loco::Graph *g, const std::initializer_list<uint32_t> shape,
+ int32_t axis)
+{
+ assert(g);
+
+ // Input Create.
+ auto input = g->nodes()->create<luci::CircleInput>();
+ auto graph_input = g->inputs()->create();
+ input->index(graph_input->index());
+ input->shape_status(luci::ShapeStatus::VALID);
+ input->rank(shape.size());
+ input->shape(shape);
+
+ // Pack Node create.
+ auto pack = g->nodes()->create<luci::CirclePack>(1);
+ pack->values(0, input);
+ pack->axis(axis);
+
+ // Output Connect.
+ auto output = g->nodes()->create<luci::CircleOutput>();
+ output->from(pack);
+ auto graph_output = g->outputs()->create();
+ output->index(graph_output->index());
+
+ return;
+}
+
+} // namespace
+
+TEST(SubstitutePackToReshapePass, simple_case)
+{
+ auto graph = loco::make_graph();
+ create_substitute_pack_to_reshape(graph.get(), {1, 2, 3, 4}, 0);
+ luci::SubstitutePackToReshapePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleReshape *reshape_node = nullptr;
+ luci::CirclePack *pack_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+ reshape_node = reshape;
+ else if (auto pack = dynamic_cast<luci::CirclePack *>(node))
+ pack_node = pack;
+ }
+ ASSERT_NE(nullptr, reshape_node);
+ ASSERT_EQ(nullptr, pack_node);
+ auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+ ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0));
+ ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(1));
+ ASSERT_EQ(2, new_shape->at<loco::DataType::S32>(2));
+ ASSERT_EQ(3, new_shape->at<loco::DataType::S32>(3));
+ ASSERT_EQ(4, new_shape->at<loco::DataType::S32>(4));
+}
+
+TEST(SubstitutePackToReshapePass, simple_case_neg_axis)
+{
+ auto graph = loco::make_graph();
+ create_substitute_pack_to_reshape(graph.get(), {1, 2, 3, 4}, -1);
+ luci::SubstitutePackToReshapePass pass;
+ while (pass.run(graph.get()))
+ ;
+ luci::CircleReshape *reshape_node = nullptr;
+ luci::CirclePack *pack_node = nullptr;
+ for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+ {
+ if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+ reshape_node = reshape;
+ else if (auto pack = dynamic_cast<luci::CirclePack *>(node))
+ pack_node = pack;
+ }
+ ASSERT_NE(nullptr, reshape_node);
+ ASSERT_EQ(nullptr, pack_node);
+ auto new_shape = loco::must_cast<luci::CircleConst *>(reshape_node->shape());
+ ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(0));
+ ASSERT_EQ(2, new_shape->at<loco::DataType::S32>(1));
+ ASSERT_EQ(3, new_shape->at<loco::DataType::S32>(2));
+ ASSERT_EQ(4, new_shape->at<loco::DataType::S32>(3));
+ ASSERT_EQ(1, new_shape->at<loco::DataType::S32>(4));
+}
namespace luci
{
+bool TypeInferencePass::run(luci::Module *m)
+{
+ bool changed = false;
+
+ for (size_t g = 0; g < m->size(); ++g)
+ {
+ if (run(m->graph(g)))
+ changed = true;
+ }
+
+ return changed;
+}
+
bool TypeInferencePass::run(loco::Graph *g)
{
loco::CanonicalTypeInferenceRule canonical_rule;
#include <loco/IR/Nodes.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/CircleShapeInferenceHelper.h>
+
namespace luci
{
static ShapeDescription get(loco::Node *node);
};
+namespace sinf // namespace for Shape Inference
+{
+
+struct Rule
+{
+ bool infer(const luci::CircleNode *, loco::TensorShape &) const;
+};
+
+class Algorithm final : public luci::CircleNodeVisitor<loco::TensorShape>
+{
+public:
+ // TODO Remove this when all of visit function is implemented
+ loco::TensorShape visit(const luci::CircleNode *node) final { return sinf::circle_shape(node); }
+
+ // loco::TensorShape visit(const luci::CircleAbs *node) final;
+ // loco::TensorShape visit(const luci::CircleAdd *node) final;
+ // loco::TensorShape visit(const luci::CircleAddN *node) final;
+ // loco::TensorShape visit(const luci::CircleArgMax *node) final;
+ // loco::TensorShape visit(const luci::CircleArgMin *node) final;
+ // loco::TensorShape visit(const luci::CircleAveragePool2D *node) final;
+ // loco::TensorShape visit(const luci::CircleBatchMatMul *node) final;
+ // loco::TensorShape visit(const luci::CircleBatchToSpaceND *node) final;
+ // loco::TensorShape visit(const luci::CircleCast *node) final;
+ // loco::TensorShape visit(const luci::CircleCeil *node) final;
+ // loco::TensorShape visit(const luci::CircleConcatenation *node) final;
+ // loco::TensorShape visit(const luci::CircleConst *node) final;
+ // loco::TensorShape visit(const luci::CircleConv2D *node) final;
+ // loco::TensorShape visit(const luci::CircleCos *node) final;
+ // loco::TensorShape visit(const luci::CircleCustom *node) final;
+ // loco::TensorShape visit(const luci::CircleDepthToSpace *node) final;
+ // loco::TensorShape visit(const luci::CircleDepthwiseConv2D *node) final;
+ // loco::TensorShape visit(const luci::CircleDequantize *node) final;
+ // loco::TensorShape visit(const luci::CircleDiv *node) final;
+ // loco::TensorShape visit(const luci::CircleElu *node) final;
+ // loco::TensorShape visit(const luci::CircleEqual *node) final;
+ // loco::TensorShape visit(const luci::CircleExp *node) final;
+ // loco::TensorShape visit(const luci::CircleExpandDims *node) final;
+ // loco::TensorShape visit(const luci::CircleFill *node) final;
+ // loco::TensorShape visit(const luci::CircleFloor *node) final;
+ // loco::TensorShape visit(const luci::CircleFloorDiv *node) final;
+ // loco::TensorShape visit(const luci::CircleFloorMod *node) final;
+ // loco::TensorShape visit(const luci::CircleFullyConnected *node) final;
+ // loco::TensorShape visit(const luci::CircleGather *node) final;
+ // loco::TensorShape visit(const luci::CircleGatherNd *node) final;
+ // loco::TensorShape visit(const luci::CircleGreater *node) final;
+ // loco::TensorShape visit(const luci::CircleGreaterEqual *node) final;
+ // loco::TensorShape visit(const luci::CircleIf *node) final;
+ // loco::TensorShape visit(const luci::CircleL2Normalize *node) final;
+ // loco::TensorShape visit(const luci::CircleL2Pool2D *node) final;
+ // loco::TensorShape visit(const luci::CircleLeakyRelu *node) final;
+ // loco::TensorShape visit(const luci::CircleLess *node) final;
+ // loco::TensorShape visit(const luci::CircleLessEqual *node) final;
+ // loco::TensorShape visit(const luci::CircleLocalResponseNormalization *node) final;
+ // loco::TensorShape visit(const luci::CircleLog *node) final;
+ // loco::TensorShape visit(const luci::CircleLogicalAnd *node) final;
+ // loco::TensorShape visit(const luci::CircleLogicalNot *node) final;
+ // loco::TensorShape visit(const luci::CircleLogicalOr *node) final;
+ // loco::TensorShape visit(const luci::CircleLogistic *node) final;
+ // loco::TensorShape visit(const luci::CircleLogSoftmax *node) final;
+ // loco::TensorShape visit(const luci::CircleMatrixDiag *node) final;
+ // loco::TensorShape visit(const luci::CircleMatrixSetDiag *node) final;
+ // loco::TensorShape visit(const luci::CircleMaximum *node) final;
+ // loco::TensorShape visit(const luci::CircleMaxPool2D *node) final;
+ // loco::TensorShape visit(const luci::CircleMean *node) final;
+ // loco::TensorShape visit(const luci::CircleMinimum *node) final;
+ // loco::TensorShape visit(const luci::CircleMirrorPad *node) final;
+ // loco::TensorShape visit(const luci::CircleNeg *node) final;
+ // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4 *node) final;
+ // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5 *node) final;
+ // loco::TensorShape visit(const luci::CircleNotEqual *node) final;
+ // loco::TensorShape visit(const luci::CirclePack *node) final;
+ // loco::TensorShape visit(const luci::CirclePad *node) final;
+ // loco::TensorShape visit(const luci::CirclePadV2 *node) final;
+ // loco::TensorShape visit(const luci::CirclePow *node) final;
+ // loco::TensorShape visit(const luci::CirclePRelu *node) final;
+ // loco::TensorShape visit(const luci::CircleRange *node) final;
+ // loco::TensorShape visit(const luci::CircleRank *node) final;
+ // loco::TensorShape visit(const luci::CircleMul *node) final;
+ // loco::TensorShape visit(const luci::CircleOneHot *node) final;
+ // loco::TensorShape visit(const luci::CircleReduceAny *node) final;
+ // loco::TensorShape visit(const luci::CircleReduceMax *node) final;
+ // loco::TensorShape visit(const luci::CircleReduceMin *node) final;
+ // loco::TensorShape visit(const luci::CircleReduceProd *node) final;
+ // loco::TensorShape visit(const luci::CircleRelu *node) final;
+ // loco::TensorShape visit(const luci::CircleRelu6 *node) final;
+ // loco::TensorShape visit(const luci::CircleReluN1To1 *node) final;
+ // loco::TensorShape visit(const luci::CircleReshape *node) final;
+ // loco::TensorShape visit(const luci::CircleResizeBilinear *node) final;
+ // loco::TensorShape visit(const luci::CircleResizeNearestNeighbor *node) final;
+ // loco::TensorShape visit(const luci::CircleReverseSequence *node) final;
+ // loco::TensorShape visit(const luci::CircleReverseV2 *node) final;
+ // loco::TensorShape visit(const luci::CircleRound *node) final;
+ // loco::TensorShape visit(const luci::CircleRsqrt *node) final;
+ // loco::TensorShape visit(const luci::CircleScatterNd *node) final;
+ // loco::TensorShape visit(const luci::CircleSegmentSum *node) final;
+ // loco::TensorShape visit(const luci::CircleSelect *node) final;
+ // loco::TensorShape visit(const luci::CircleSelectV2 *node) final;
+ // loco::TensorShape visit(const luci::CircleShape *node) final;
+ // loco::TensorShape visit(const luci::CircleSin *node) final;
+ // loco::TensorShape visit(const luci::CircleSlice *node) final;
+ // loco::TensorShape visit(const luci::CircleSoftmax *node) final;
+ // loco::TensorShape visit(const luci::CircleSpaceToBatchND *node) final;
+ // loco::TensorShape visit(const luci::CircleSpaceToDepth *node) final;
+ // loco::TensorShape visit(const luci::CircleSparseToDense *node) final;
+ // loco::TensorShape visit(const luci::CircleSplit *node) final;
+ // loco::TensorShape visit(const luci::CircleSplitV *node) final;
+ // loco::TensorShape visit(const luci::CircleSqrt *node) final;
+ // loco::TensorShape visit(const luci::CircleSquare *node) final;
+ // loco::TensorShape visit(const luci::CircleSquaredDifference *node) final;
+ // loco::TensorShape visit(const luci::CircleSqueeze *node) final;
+ // loco::TensorShape visit(const luci::CircleStridedSlice *node) final;
+ // loco::TensorShape visit(const luci::CircleSub *node) final;
+ // loco::TensorShape visit(const luci::CircleSum *node) final;
+ // loco::TensorShape visit(const luci::CircleTanh *node) final;
+ // loco::TensorShape visit(const luci::CircleTile *node) final;
+ // loco::TensorShape visit(const luci::CircleTopKV2 *node) final;
+ // loco::TensorShape visit(const luci::CircleTranspose *node) final;
+ // loco::TensorShape visit(const luci::CircleTransposeConv *node) final;
+ // loco::TensorShape visit(const luci::CircleUnidirectionalSequenceLSTM *node) final;
+ // loco::TensorShape visit(const luci::CircleUnique *node) final;
+ // loco::TensorShape visit(const luci::CircleUnpack *node) final;
+ // loco::TensorShape visit(const luci::CircleWhere *node) final;
+ // loco::TensorShape visit(const luci::CircleWhile *node) final;
+ // loco::TensorShape visit(const luci::CircleZerosLike *node) final;
+
+ // Circle Only
+ // loco::TensorShape visit(const luci::CircleBCQFullyConnected *node) final;
+ // loco::TensorShape visit(const luci::CircleBCQGather *node) final;
+ // loco::TensorShape visit(const luci::CircleInstanceNorm *node) final;
+
+ // Virtual
+ // loco::TensorShape visit(const luci::CircleInput *node) final;
+ // loco::TensorShape visit(const luci::CircleOutput *node) final;
+ // loco::TensorShape visit(const luci::CircleOutputDummy *node) final;
+ // loco::TensorShape visit(const luci::CircleOutputExclude *node) final;
+ // loco::TensorShape visit(const luci::CircleCustomOut *node) final;
+ // loco::TensorShape visit(const luci::CircleIfOut *node) final;
+ // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
+ // loco::TensorShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
+ // loco::TensorShape visit(const luci::CircleSplitOut *node) final;
+ // loco::TensorShape visit(const luci::CircleSplitVOut *node) final;
+ // loco::TensorShape visit(const luci::CircleTopKV2Out *node) final;
+ // loco::TensorShape visit(const luci::CircleUniqueOut *node) final;
+ // loco::TensorShape visit(const luci::CircleUnpackOut *node) final;
+ // loco::TensorShape visit(const luci::CircleWhileOut *node) final;
+};
+
+} // namespace sinf
+
} // namespace luci
#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
+
+#include <loco/IR/TensorShape.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleShapeSignature.h>
+
+namespace luci
+{
+namespace sinf // Namespace for Shape Inference
+{
+
+// Return shape of circle node as loco::TensorShape
+loco::TensorShape circle_shape(const luci::CircleNode *node);
+
+} // namespace sinf
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_SHAPE_INFERENCE_HELPER_H__
* limitations under the License.
*/
-#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__
-#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__
+#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
+#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
#include <luci/IR/CircleShapeSignature.h>
+#include <luci/Service/CircleShapeSignatureInferenceHelper.h>
namespace luci
{
-struct CircleShapeSignatureInferenceRule
+namespace ssinf // namespace for Shape Signature Inference
+{
+
+struct Rule
{
bool infer(const luci::CircleNode *, ShapeSignature &) const;
};
-class ShapeSignatureInferenceAlgorithm final : public luci::CircleNodeVisitor<ShapeSignature>
+class Algorithm final : public luci::CircleNodeVisitor<ShapeSignature>
{
public:
// TODO Remove this when visit function is implemented for all the operations.
// ShapeSignature visit(const luci::CircleMatrixSetDiag *node) final;
// ShapeSignature visit(const luci::CircleMaximum *node) final;
// ShapeSignature visit(const luci::CircleMaxPool2D *node) final;
- // ShapeSignature visit(const luci::CircleMean *node) final;
+ ShapeSignature visit(const luci::CircleMean *node) final;
// ShapeSignature visit(const luci::CircleMinimum *node) final;
// ShapeSignature visit(const luci::CircleMirrorPad *node) final;
// ShapeSignature visit(const luci::CircleNeg *node) final;
// ShapeSignature visit(const luci::CircleRank *node) final;
// ShapeSignature visit(const luci::CircleMul *node) final;
// ShapeSignature visit(const luci::CircleOneHot *node) final;
- // ShapeSignature visit(const luci::CircleReduceAny *node) final;
- // ShapeSignature visit(const luci::CircleReduceMax *node) final;
- // ShapeSignature visit(const luci::CircleReduceMin *node) final;
- // ShapeSignature visit(const luci::CircleReduceProd *node) final;
- // ShapeSignature visit(const luci::CircleRelu *node) final;
- // ShapeSignature visit(const luci::CircleRelu6 *node) final;
- // ShapeSignature visit(const luci::CircleReluN1To1 *node) final;
+ ShapeSignature visit(const luci::CircleReduceAny *node) final;
+ ShapeSignature visit(const luci::CircleReduceMax *node) final;
+ ShapeSignature visit(const luci::CircleReduceMin *node) final;
+ ShapeSignature visit(const luci::CircleReduceProd *node) final;
+ ShapeSignature visit(const luci::CircleRelu *node) final;
+ ShapeSignature visit(const luci::CircleRelu6 *node) final;
+ ShapeSignature visit(const luci::CircleReluN1To1 *node) final;
// ShapeSignature visit(const luci::CircleReshape *node) final;
// ShapeSignature visit(const luci::CircleResizeBilinear *node) final;
// ShapeSignature visit(const luci::CircleResizeNearestNeighbor *node) final;
// ShapeSignature visit(const luci::CircleSqueeze *node) final;
// ShapeSignature visit(const luci::CircleStridedSlice *node) final;
// ShapeSignature visit(const luci::CircleSub *node) final;
- // ShapeSignature visit(const luci::CircleSum *node) final;
+ ShapeSignature visit(const luci::CircleSum *node) final;
// ShapeSignature visit(const luci::CircleTanh *node) final;
// ShapeSignature visit(const luci::CircleTile *node) final;
// ShapeSignature visit(const luci::CircleTopKV2 *node) final;
// ShapeSignature visit(const luci::CircleInstanceNorm *node) final;
// Virtual
- // ShapeSignature visit(const luci::CircleInput *node) final;
- // ShapeSignature visit(const luci::CircleOutput *node) final;
- // ShapeSignature visit(const luci::CircleOutputDummy *node) final;
- // ShapeSignature visit(const luci::CircleOutputExclude *node) final;
+ ShapeSignature visit(const luci::CircleInput *node) final;
+ ShapeSignature visit(const luci::CircleOutput *node) final;
+ ShapeSignature visit(const luci::CircleOutputDummy *node) final;
+ ShapeSignature visit(const luci::CircleOutputExclude *node) final;
// ShapeSignature visit(const luci::CircleCustomOut *node) final;
// ShapeSignature visit(const luci::CircleIfOut *node) final;
// ShapeSignature visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
// ShapeSignature visit(const luci::CircleWhileOut *node) final;
};
+} // namespace ssinf
+
} // namespace luci
-#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_RULE_H__
+#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleShapeSignature.h>
+
+namespace luci
+{
+
+namespace ssinf // Namespace for Shape Signature Inference
+{
+
+// Return empty signature if all of dimensions are known.
+// If at least one of dimensions is unknown, return signature without change.
+ShapeSignature legalized_signature(const luci::ShapeSignature &signature);
+
+// Return reduced input_signature with indices and keep_dims.
+// - indices : reduction index
+// - keep_dims : If true, rank is not changed. If false, rank is reduced along indices.
+ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims);
+
+// Return signature of index-th argument of node.
+ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index);
+
+} // namespace ssinf
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_SHAPE_SIGNATURE_INFERENCE_HELPER_H__
#include <mio/circle/schema_generated.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/CircleTypeInferenceHelper.h>
+
namespace luci
{
static circle::TensorType get(loco::Node *node);
};
+namespace tinf // namespace for Type Inference
+{
+
+struct Rule
+{
+ bool infer(const luci::CircleNode *, loco::DataType &) const;
+};
+
+class Algorithm final : public luci::CircleNodeVisitor<loco::DataType>
+{
+public:
+ // TODO Remove this when all of visit function is implemented
+ loco::DataType visit(const luci::CircleNode *node) final { return node->dtype(); }
+
+ // loco::DataType visit(const luci::CircleAbs *node) final;
+ // loco::DataType visit(const luci::CircleAdd *node) final;
+ // loco::DataType visit(const luci::CircleAddN *node) final;
+ // loco::DataType visit(const luci::CircleArgMax *node) final;
+ // loco::DataType visit(const luci::CircleArgMin *node) final;
+ // loco::DataType visit(const luci::CircleAveragePool2D *node) final;
+ // loco::DataType visit(const luci::CircleBatchMatMul *node) final;
+ // loco::DataType visit(const luci::CircleBatchToSpaceND *node) final;
+ // loco::DataType visit(const luci::CircleCast *node) final;
+ // loco::DataType visit(const luci::CircleCeil *node) final;
+ // loco::DataType visit(const luci::CircleConcatenation *node) final;
+ // loco::DataType visit(const luci::CircleConst *node) final;
+ // loco::DataType visit(const luci::CircleConv2D *node) final;
+ // loco::DataType visit(const luci::CircleCos *node) final;
+ // loco::DataType visit(const luci::CircleCustom *node) final;
+ // loco::DataType visit(const luci::CircleDepthToSpace *node) final;
+ // loco::DataType visit(const luci::CircleDepthwiseConv2D *node) final;
+ // loco::DataType visit(const luci::CircleDequantize *node) final;
+ // loco::DataType visit(const luci::CircleDiv *node) final;
+ // loco::DataType visit(const luci::CircleElu *node) final;
+ // loco::DataType visit(const luci::CircleEqual *node) final;
+ // loco::DataType visit(const luci::CircleExp *node) final;
+ // loco::DataType visit(const luci::CircleExpandDims *node) final;
+ // loco::DataType visit(const luci::CircleFill *node) final;
+ // loco::DataType visit(const luci::CircleFloor *node) final;
+ // loco::DataType visit(const luci::CircleFloorDiv *node) final;
+ // loco::DataType visit(const luci::CircleFloorMod *node) final;
+ // loco::DataType visit(const luci::CircleFullyConnected *node) final;
+ // loco::DataType visit(const luci::CircleGather *node) final;
+ // loco::DataType visit(const luci::CircleGatherNd *node) final;
+ // loco::DataType visit(const luci::CircleGreater *node) final;
+ // loco::DataType visit(const luci::CircleGreaterEqual *node) final;
+ // loco::DataType visit(const luci::CircleIf *node) final;
+ // loco::DataType visit(const luci::CircleL2Normalize *node) final;
+ // loco::DataType visit(const luci::CircleL2Pool2D *node) final;
+ // loco::DataType visit(const luci::CircleLeakyRelu *node) final;
+ // loco::DataType visit(const luci::CircleLess *node) final;
+ // loco::DataType visit(const luci::CircleLessEqual *node) final;
+ // loco::DataType visit(const luci::CircleLocalResponseNormalization *node) final;
+ // loco::DataType visit(const luci::CircleLog *node) final;
+ // loco::DataType visit(const luci::CircleLogicalAnd *node) final;
+ // loco::DataType visit(const luci::CircleLogicalNot *node) final;
+ // loco::DataType visit(const luci::CircleLogicalOr *node) final;
+ // loco::DataType visit(const luci::CircleLogistic *node) final;
+ // loco::DataType visit(const luci::CircleLogSoftmax *node) final;
+ // loco::DataType visit(const luci::CircleMatrixDiag *node) final;
+ // loco::DataType visit(const luci::CircleMatrixSetDiag *node) final;
+ // loco::DataType visit(const luci::CircleMaximum *node) final;
+ // loco::DataType visit(const luci::CircleMaxPool2D *node) final;
+ // loco::DataType visit(const luci::CircleMean *node) final;
+ // loco::DataType visit(const luci::CircleMinimum *node) final;
+ // loco::DataType visit(const luci::CircleMirrorPad *node) final;
+ // loco::DataType visit(const luci::CircleNeg *node) final;
+ // loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final;
+ // loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final;
+ // loco::DataType visit(const luci::CircleNotEqual *node) final;
+ // loco::DataType visit(const luci::CirclePack *node) final;
+ // loco::DataType visit(const luci::CirclePad *node) final;
+ // loco::DataType visit(const luci::CirclePadV2 *node) final;
+ // loco::DataType visit(const luci::CirclePow *node) final;
+ // loco::DataType visit(const luci::CirclePRelu *node) final;
+ // loco::DataType visit(const luci::CircleRange *node) final;
+ // loco::DataType visit(const luci::CircleRank *node) final;
+ // loco::DataType visit(const luci::CircleMul *node) final;
+ // loco::DataType visit(const luci::CircleOneHot *node) final;
+ // loco::DataType visit(const luci::CircleReduceAny *node) final;
+ // loco::DataType visit(const luci::CircleReduceMax *node) final;
+ // loco::DataType visit(const luci::CircleReduceMin *node) final;
+ // loco::DataType visit(const luci::CircleReduceProd *node) final;
+ // loco::DataType visit(const luci::CircleRelu *node) final;
+ // loco::DataType visit(const luci::CircleRelu6 *node) final;
+ // loco::DataType visit(const luci::CircleReluN1To1 *node) final;
+ // loco::DataType visit(const luci::CircleReshape *node) final;
+ // loco::DataType visit(const luci::CircleResizeBilinear *node) final;
+ // loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final;
+ // loco::DataType visit(const luci::CircleReverseSequence *node) final;
+ // loco::DataType visit(const luci::CircleReverseV2 *node) final;
+ // loco::DataType visit(const luci::CircleRound *node) final;
+ // loco::DataType visit(const luci::CircleRsqrt *node) final;
+ // loco::DataType visit(const luci::CircleScatterNd *node) final;
+ // loco::DataType visit(const luci::CircleSegmentSum *node) final;
+ // loco::DataType visit(const luci::CircleSelect *node) final;
+ // loco::DataType visit(const luci::CircleSelectV2 *node) final;
+ // loco::DataType visit(const luci::CircleShape *node) final;
+ // loco::DataType visit(const luci::CircleSin *node) final;
+ // loco::DataType visit(const luci::CircleSlice *node) final;
+ // loco::DataType visit(const luci::CircleSoftmax *node) final;
+ // loco::DataType visit(const luci::CircleSpaceToBatchND *node) final;
+ // loco::DataType visit(const luci::CircleSpaceToDepth *node) final;
+ // loco::DataType visit(const luci::CircleSparseToDense *node) final;
+ // loco::DataType visit(const luci::CircleSplit *node) final;
+ // loco::DataType visit(const luci::CircleSplitV *node) final;
+ // loco::DataType visit(const luci::CircleSqrt *node) final;
+ // loco::DataType visit(const luci::CircleSquare *node) final;
+ // loco::DataType visit(const luci::CircleSquaredDifference *node) final;
+ // loco::DataType visit(const luci::CircleSqueeze *node) final;
+ // loco::DataType visit(const luci::CircleStridedSlice *node) final;
+ // loco::DataType visit(const luci::CircleSub *node) final;
+ // loco::DataType visit(const luci::CircleSum *node) final;
+ // loco::DataType visit(const luci::CircleTanh *node) final;
+ // loco::DataType visit(const luci::CircleTile *node) final;
+ // loco::DataType visit(const luci::CircleTopKV2 *node) final;
+ // loco::DataType visit(const luci::CircleTranspose *node) final;
+ // loco::DataType visit(const luci::CircleTransposeConv *node) final;
+ // loco::DataType visit(const luci::CircleUnidirectionalSequenceLSTM *node) final;
+ // loco::DataType visit(const luci::CircleUnique *node) final;
+ // loco::DataType visit(const luci::CircleUnpack *node) final;
+ // loco::DataType visit(const luci::CircleWhere *node) final;
+ // loco::DataType visit(const luci::CircleWhile *node) final;
+ // loco::DataType visit(const luci::CircleZerosLike *node) final;
+
+ // Circle Only
+ // loco::DataType visit(const luci::CircleBCQFullyConnected *node) final;
+ // loco::DataType visit(const luci::CircleBCQGather *node) final;
+ // loco::DataType visit(const luci::CircleInstanceNorm *node) final;
+
+ // Virtual
+ // loco::DataType visit(const luci::CircleInput *node) final;
+ // loco::DataType visit(const luci::CircleOutput *node) final;
+ // loco::DataType visit(const luci::CircleOutputDummy *node) final;
+ // loco::DataType visit(const luci::CircleOutputExclude *node) final;
+ // loco::DataType visit(const luci::CircleCustomOut *node) final;
+ // loco::DataType visit(const luci::CircleIfOut *node) final;
+ // loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final;
+ // loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final;
+ // loco::DataType visit(const luci::CircleSplitOut *node) final;
+ // loco::DataType visit(const luci::CircleSplitVOut *node) final;
+ // loco::DataType visit(const luci::CircleTopKV2Out *node) final;
+ // loco::DataType visit(const luci::CircleUniqueOut *node) final;
+ // loco::DataType visit(const luci::CircleUnpackOut *node) final;
+ // loco::DataType visit(const luci::CircleWhileOut *node) final;
+};
+
+} // namespace tinf
+
} // namespace luci
#endif // __LUCI_CIRCLE_TYPE_INFERENCE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
+#define __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco/IR/DataType.h>
+
+namespace luci
+{
+namespace tinf // Namespace for Type Inference
+{
+
+// Helper function will be added
+
+} // namespace tinf
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_TYPE_INFERENCE_HELPER_H__
#include <loco/IR/PermutingCodec.h>
#include <loco/IR/NodeShape.h>
+#include <luci/IR/CircleNodes.h>
+
#include <cstdint>
#include <vector>
};
// TODO remove these when CircleDialect is fully functioal
+ShapeDescription to_shape_description(const luci::CircleNode *node);
ShapeDescription to_shape_description(const loco::TensorShape &shape);
ShapeDescription to_shape_description(const loco::FeatureShape &shape);
ShapeDescription to_shape_description(const loco::FilterShape &shape);
#include <loco.h>
#include <loco/Service/ShapeInference.h>
+#include <luci/Log.h>
+
#include <cassert>
+#include <iostream>
namespace luci
{
}
} // namespace luci
+
+namespace
+{
+
+std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape)
+{
+ os << "[";
+ for (uint32_t r = 0; r < tensor_shape.rank(); ++r)
+ {
+ if (r)
+ os << ",";
+ os << tensor_shape.dim(r).value();
+ }
+ os << "]";
+ return os;
+}
+
+bool inputs_shape_ready(const luci::CircleNode *node)
+{
+ for (uint32_t arity = 0; arity < node->arity(); ++arity)
+ {
+ auto node_input = loco::must_cast<luci::CircleNode *>(node->arg(arity));
+ if (node_input->shape_status() == luci::ShapeStatus::UNDEFINED)
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+namespace sinf
+{
+
+bool Rule::infer(const luci::CircleNode *circle_node, loco::TensorShape &shape) const
+{
+ LOGGER(l);
+ VERBOSE(l, 1) << "[CircleShapeInference] " << circle_node->name();
+ VERBOSE(l, 1) << " before: " << circle_shape(circle_node);
+
+ if (!inputs_shape_ready(circle_node))
+ {
+ VERBOSE(l, 1) << " after: Some inputs are not ready for inference";
+ return false;
+ }
+
+ Algorithm alg;
+ shape = circle_node->accept(&alg);
+ VERBOSE(l, 1) << " after: " << shape;
+
+ return true;
+}
+
+} // namespace ssinf
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleShapeInferenceHelper.h"
+
+namespace luci
+{
+namespace sinf
+{
+
+loco::TensorShape circle_shape(const luci::CircleNode *node)
+{
+ loco::TensorShape shape;
+ shape.rank(node->rank());
+ for (uint32_t r = 0; r < node->rank(); ++r)
+ shape.dim(r) = loco::Dimension(node->dim(r).value());
+ return shape;
+}
+
+} // namespace sinf
+} // namespace luci
};
/**
- * @breif Expand shape x and y to same rank by align right and filling with 1
+ * @brief Expand shape x and y to same rank by align right and filling with 1
*/
void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
{
}
/**
- * @breif Returns shape of expanded dimension of input x and y having same rank
+ * @brief Returns shape of expanded dimension of input x and y having same rank
*/
loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
{
* limitations under the License.
*/
-#include "luci/Service/CircleShapeSignatureInferenceRule.h"
+#include "luci/Service/CircleShapeSignatureInference.h"
#include <luci/Log.h>
namespace luci
{
-bool CircleShapeSignatureInferenceRule::infer(const luci::CircleNode *circle_node,
- ShapeSignature &shape_signature) const
+namespace ssinf
+{
+
+bool Rule::infer(const luci::CircleNode *circle_node, ShapeSignature &shape_signature) const
{
LOGGER(l);
// There is nothing to check before ShapeSignatureInference.
- ShapeSignatureInferenceAlgorithm alg;
+ Algorithm alg;
shape_signature = circle_node->accept(&alg);
return true;
}
+} // namespace ssinf
+
} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleShapeSignatureInferenceHelper.h"
+
+#include <loco.h>
+
+#include <luci/Log.h>
+
+#include <oops/InternalExn.h>
+
+namespace luci
+{
+
+namespace ssinf
+{
+
+luci::ShapeSignature legalized_signature(const luci::ShapeSignature &signature)
+{
+ // If shape signature has at least one -1, it is not static.
+ for (uint32_t i = 0; i < signature.rank(); ++i)
+ if (signature.dim(i) == -1)
+ return signature;
+
+ // If all dimensions are static, return empty shape signature.
+ return luci::ShapeSignature();
+}
+
+ShapeSignature reduced_signature(const loco::Node *node, const loco::Node *indices, bool keep_dims)
+{
+ LOGGER(l);
+
+ ShapeSignature input_signature;
+ ShapeSignature output_signature;
+
+ auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ if (circle_node->shape_signature().rank() > 0)
+ input_signature = circle_node->shape_signature();
+ else
+ {
+ input_signature.rank(circle_node->rank());
+ for (uint32_t i = 0; i < circle_node->rank(); ++i)
+ input_signature.dim(i) = circle_node->dim(i).value();
+ }
+
+ // If input rank is 0, it means that one of following case is occurred.
+ // - Input is scalar : result is always scalar
+ // - Input shape signature is not inferenced : cannot infer output shape signauture
+ // Therefore, when input signature rank is 0, always return empty signature.
+ if (input_signature.rank() == 0)
+ return output_signature;
+
+ // When reduction_indices is not constant
+ auto reduction_indices = dynamic_cast<const luci::CircleConst *>(indices);
+ if (reduction_indices == nullptr)
+ {
+ if (keep_dims)
+ {
+ // If keep_dims is true, rank is not changed.
+ output_signature.rank(input_signature.rank());
+ for (uint32_t i = 0; i < output_signature.rank(); ++i)
+ output_signature.dim(i) = -1;
+ }
+ else
+ {
+ // There is no way to inference for this case.
+ // Do nothing to return empty signature.
+ INFO(l) << "[CircleShapeSignatureInferenceHelper] " << circle_node->name() << std::endl;
+ INFO(l) << " reduced_signature : cannot infer because of non-constant node" << std::endl;
+ }
+
+ return output_signature;
+ }
+
+ std::vector<int32_t> reduction_values;
+ if (reduction_indices->dtype() == loco::DataType::S32)
+ {
+ auto reduction_size = reduction_indices->size<loco::DataType::S32>();
+ for (uint32_t i = 0; i < reduction_size; ++i)
+ {
+ int32_t axis = reduction_indices->at<loco::DataType::S32>(i);
+ if (axis < 0)
+ axis += input_signature.rank();
+
+ if (!(0 <= axis && axis < static_cast<int32_t>(input_signature.rank())))
+ INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
+
+ reduction_values.push_back(axis);
+ }
+ }
+ else if (reduction_indices->dtype() == loco::DataType::S64)
+ {
+ auto reduction_size = reduction_indices->size<loco::DataType::S64>();
+ for (uint32_t i = 0; i < reduction_size; ++i)
+ {
+ int32_t axis = static_cast<int32_t>(reduction_indices->at<loco::DataType::S64>(i));
+ if (axis < 0)
+ axis += input_signature.rank();
+
+ if (!(0 <= axis && axis < static_cast<int32_t>(input_signature.rank())))
+ INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
+
+ reduction_values.push_back(axis);
+ }
+ }
+ else
+ {
+ INTERNAL_EXN("Wrong reduction axis type, Only INT32, INT64 supported.");
+ }
+
+ if (keep_dims)
+ {
+ output_signature.rank(input_signature.rank());
+ for (uint32_t i = 0; i < input_signature.rank(); ++i)
+ output_signature.dim(i) = input_signature.dim(i);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ output_signature.dim(reduction_values.at(i)) = 1;
+ }
+ else
+ {
+ std::vector<bool> check_reduce(input_signature.rank(), false);
+ for (uint32_t i = 0; i < reduction_values.size(); ++i)
+ check_reduce.at(reduction_values.at(i)) = true;
+
+ uint32_t reduce_cnt = 0;
+ for (uint32_t i = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i))
+ ++reduce_cnt;
+
+ output_signature.rank(input_signature.rank() - reduce_cnt);
+ for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
+ if (check_reduce.at(i) == false)
+ output_signature.dim(j++) = input_signature.dim(i);
+ }
+
+ return output_signature;
+}
+
+ShapeSignature input_arg_signature(const luci::CircleNode *node, uint32_t index)
+{
+ auto circle_input = loco::must_cast<luci::CircleNode *>(node->arg(index));
+ return circle_input->shape_signature();
+}
+
+} // namespace ssinf
+
+} // namespace luci
#include "luci/Service/CircleTypeInference.h"
+#include <luci/Log.h>
+
#include <loco.h>
#include <loco/Service/TypeInference.h>
}
} // namespace luci
+
+namespace
+{
+
+bool inputs_dtype_ready(const luci::CircleNode *node)
+{
+ for (uint32_t arity = 0; arity < node->arity(); ++arity)
+ {
+ if (node->dtype() == loco::DataType::Unknown)
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+namespace tinf
+{
+
+bool Rule::infer(const luci::CircleNode *circle_node, loco::DataType &dtype) const
+{
+ LOGGER(l);
+ VERBOSE(l, 1) << "[CircleTypeInference] " << circle_node->name();
+ VERBOSE(l, 1) << " before: " << static_cast<int>(circle_node->dtype());
+
+ if (!inputs_dtype_ready(circle_node))
+ {
+ VERBOSE(l, 1) << " after: Some inputs are not ready for inference";
+ return false;
+ }
+
+ Algorithm alg;
+ dtype = circle_node->accept(&alg);
+
+ VERBOSE(l, 1) << " after: " << static_cast<int>(dtype);
+
+ return true;
+}
+
+} // namespace tinf
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleTypeInferenceHelper.h"
+
+namespace luci
+{
+namespace tinf
+{
+
+// Helper function will be added
+
+} // namespace tinf
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleInput *node)
+{
+ return node->shape_signature();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleMean *node)
+{
+ return legalized_signature(
+ reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutput *node)
+{
+ return input_arg_signature(node, 0);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputDummy *) { return ShapeSignature(); }
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleOutputExclude *)
+{
+ return ShapeSignature();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceAny *node)
+{
+ return legalized_signature(
+ reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMax *node)
+{
+ return legalized_signature(
+ reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceMin *node)
+{
+ return legalized_signature(
+ reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleReduceProd *node)
+{
+ return legalized_signature(
+ reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu *node)
+{
+ return input_arg_signature(node, 0);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleRelu6 *node)
+{
+ return input_arg_signature(node, 0);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleReluN1To1 *node)
+{
+ return input_arg_signature(node, 0);
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/Service/CircleShapeSignatureInference.h>
+
+namespace luci
+{
+
+ShapeSignature ssinf::Algorithm::visit(const luci::CircleSum *node)
+{
+ return legalized_signature(
+ reduced_signature(node->input(), node->reduction_indices(), node->keep_dims()));
+}
+
+} // namespace luci
namespace luci
{
+ShapeDescription to_shape_description(const luci::CircleNode *circle_node)
+{
+ ShapeDescription res;
+
+ res._rank_known = true;
+
+ res._dims.resize(circle_node->rank());
+ for (uint32_t i = 0; i < circle_node->rank(); ++i)
+ res._dims.at(i) = circle_node->dim(i).value();
+
+ return res;
+}
+
ShapeDescription to_shape_description(const loco::TensorShape &shape)
{
ShapeDescription res;
return os;
}
+std::ostream &operator<<(std::ostream &os, const luci::CircleNode *circle_node)
+{
+ os << "[";
+ for (uint32_t r = 0; r < circle_node->rank(); ++r)
+ {
+ if (r)
+ os << ",";
+ os << circle_node->dim(r).value();
+ }
+ os << "]";
+ return os;
+}
+
/**
* @brief returns a node that is CircleOutput with index is out_index in nodes
*/
if (dynamic_cast<luci::CircleOutputExclude *>(circle_node))
continue;
- assert(loco::shape_known(circle_node));
+ assert(circle_node->shape_status() != luci::ShapeStatus::UNDEFINED);
// check if output node shape is same as graph output shape
- auto co_tensor_shape = loco::shape_get(circle_node).as<loco::TensorShape>();
auto go_tensor_shape = graph_out->shape();
assert(go_tensor_shape);
- if (!(co_tensor_shape == *go_tensor_shape))
+
+ bool is_shape_valid = (circle_node->rank() == go_tensor_shape->rank());
+ for (uint32_t i = 0; is_shape_valid && i < circle_node->rank(); ++i)
+ if (circle_node->dim(i).value() != go_tensor_shape->dim(i).value())
+ is_shape_valid = false;
+
+ if (is_shape_valid == false)
{
INFO(l) << "[luci] Shape for output #" << out_index << " not same " << std::endl;
- INFO(l) << "[luci] " << circle_node->name() << " " << co_tensor_shape << " vs "
+ INFO(l) << "[luci] " << circle_node->name() << " " << circle_node << " vs "
<< *go_tensor_shape << std::endl;
return false;
}
// check if data type match
- assert(loco::dtype_known(circle_node));
- if (graph_out->dtype() != loco::dtype_get(circle_node))
+ assert(circle_node->dtype() != loco::DataType::Unknown);
+ if (graph_out->dtype() != circle_node->dtype())
{
INFO(l) << "[luci] Type for output #" << out_index << " not same " << std::endl;
return false;
return true;
}
+bool validate_shape_signature(loco::Graph *g)
+{
+ LOGGER(l);
+
+ for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ const auto shape_signature = circle_node->shape_signature();
+
+ if (shape_signature.rank() == 0)
+ continue;
+
+ // Rank of shape and shape signature should be same
+ if (circle_node->rank() != shape_signature.rank())
+ {
+ INFO(l) << "[luci] Rank of shape signature for " << circle_node->name() << " do not match"
+ << std::endl;
+ return false;
+ }
+
+ bool has_unknown = false;
+
+ // If shape siganture is not -1, dimension value should be same
+ for (uint32_t d = 0; d < shape_signature.rank(); ++d)
+ {
+ if (shape_signature.dim(d) != -1 &&
+ shape_signature.dim(d) != (int32_t)(circle_node->dim(d).value()))
+ {
+ INFO(l) << "[luci] Dimension " << d << "of shape signature for " << circle_node->name()
+ << " do not match" << std::endl;
+ return false;
+ }
+
+ if (shape_signature.dim(d) == -1)
+ has_unknown = true;
+ }
+
+ // Shape signature should have at least one -1 value.
+ if (!has_unknown)
+ {
+ INFO(l) << "[luci] Shape signature in " << circle_node->name()
+ << " do not have unknown dimension" << std::endl;
+ return false;
+ }
+ }
+
+ return true;
+}
+
} // namespace
namespace luci
if (!validate_shape_dtype(g))
return false;
+ if (!validate_shape_signature(g))
+ return false;
+
// TODO add more validation
return true;
#include <luci/Pass/ShapeInferencePass.h>
#include <luci/Pass/TypeInferencePass.h>
+// Following passes will be removed after refactoring is finished
+#include <luci/Pass/MigrateLegacyShapeDtypePass.h>
+
#include <iostream>
#include <map>
#include <string>
while (pass.run(graph) == true)
;
}
+ {
+ // This pass will be removed after refactoring is finished
+ luci::MigrateLegacyShapeDtypePass pass;
+ while (pass.run(graph) == true)
+ ;
+ }
if (!luci::validate(graph))
return 255;
#include <luci/CircleExporter.h>
#include <oops/InternalExn.h>
+// Following passes will be removed after refactoring is finished
+#include <luci/Pass/MigrateLegacyShapeDtypePass.h>
+
#include <fstream>
#include <iostream>
#include <map>
while (pass.run(graph) == true)
;
}
+ {
+ // This pass will be removed after refactoring is finished
+ luci::MigrateLegacyShapeDtypePass pass;
+ while (pass.run(graph) == true)
+ ;
+ }
if (!luci::validate(graph))
return 255;
};
/**
- * @breif Expand shape x and y to same rank by align right and filling with 1
+ * @brief Expand shape x and y to same rank by align right and filling with 1
*/
void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
{
}
/**
- * @breif Returns shape of expanded dimension of input x and y having same rank
+ * @brief Returns shape of expanded dimension of input x and y having same rank
*/
loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
{
*/
/**
- * @breif absolute path to installation directory of *nnc* project
+ * @brief absolute path to installation directory of *nnc* project
*/
#define NNC_ROOT_PATH "@NNC_INSTALL_PATH@"
/**
- * @breif absolute path to directory contains libraries
+ * @brief absolute path to directory contains libraries
*/
#define NNC_LIB_PATH "@NNC_INSTALL_LIB_PATH@"
- make_batchnorm_gamma_positive: This makes negative gamma of batch normalization into a small positive value (1e-10).
Note that this pass can change the execution result of the model.
So, use it only when the impact is known to be acceptable.
+- replace_cw_mul_add_with_depthwise_conv: This will replace channel-wise Mul/Add with DepthwiseConv2D.
- resolve_customop_add: This will convert Custom(Add) to normal Add operator
- resolve_customop_batchmatmul: This will convert Custom(BatchMatMul) to
normal BatchMatMul operator
# verify arguments
_verify_arg(parser, args)
- # get file path to log
+ # make a command to run given backend driver
dir_path = os.path.dirname(os.path.realpath(__file__))
- logfile_path = os.path.realpath(args.output_path) + '.log'
-
- with open(logfile_path, 'wb') as f:
- # make a command to run given backend driver
- codegen_path = os.path.join(dir_path, getattr(args, 'backend') + '-compile')
- codegen_cmd = [codegen_path] + unknown_args
-
- f.write((' '.join(codegen_cmd) + '\n').encode())
-
- # run backend driver
- with subprocess.Popen(
- codegen_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
- bufsize=1) as p:
- for line in p.stdout:
- sys.stdout.buffer.write(line)
- f.write(line)
+ codegen_path = os.path.join(dir_path, getattr(args, 'backend') + '-compile')
+ codegen_cmd = [codegen_path] + unknown_args
+ if _utils._is_valid_attr(args, 'command'):
+ codegen_cmd += getattr(args, 'command').split()
+
+ # run backend driver
+ with subprocess.Popen(
+ codegen_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+ bufsize=1) as p:
+ for line in p.stdout:
+ sys.stdout.buffer.write(line)
if __name__ == '__main__':
converter_version.add_argument(
'--v1',
action='store_const',
- dest='converter_version',
+ dest='converter_version_cmd',
const='--v1',
help='use TensorFlow Lite Converter 1.x')
converter_version.add_argument(
'--v2',
action='store_const',
- dest='converter_version',
+ dest='converter_version_cmd',
const='--v2',
help='use TensorFlow Lite Converter 2.x')
const='--v2',
help='use TensorFlow Lite Converter 2.x')
- #converter_version.set_defaults(converter_version='--v1')
-
parser.add_argument('--converter_version', type=str, help=argparse.SUPPRESS)
# input model format
circle2circle_group.add_argument(
'--fuse_instnorm', action='store_true', help='fuse ops to InstanceNorm operator')
circle2circle_group.add_argument(
+ '--replace_cw_mul_add_with_depthwise_conv',
+ action='store_true',
+ help='replace channel-wise Mul/Add with DepthwiseConv2D')
+ circle2circle_group.add_argument(
'--resolve_customop_add',
action='store_true',
help='convert Custom(Add) op to Add op')
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
-v2=True
+converter_version=v2
[one-optimize]
input_path=inception_v3.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
-v2=True
+converter_version=v2
[one-optimize]
input_path=inception_v3.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
-v2=True
+converter_version=v2
[one-optimize]
input_path=inception_v3.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
-v2=True
+converter_version=v2
[one-optimize]
input_path=inception_v3.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
-v2=True
+converter_version=v2
[one-optimize]
input_path=inception_v3.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
-v2=True
+converter_version=v2
--- /dev/null
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+model_format=saved_model
+input_path=test_saved_model
+output_path=test_saved_model.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# import of TF 2.x saved model
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-import_003.cfg"
+outputfile="test_saved_model.circle"
+
+rm -f ${outputfile}
+
+# run test
+one-import tf -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+model_format=keras_model
+input_path=test_keras_model.h5
+output_path=test_keras_model.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# import of TF 2.x keras model
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-import_004.cfg"
+outputfile="test_keras_model.circle"
+
+rm -f ${outputfile}
+
+# run test
+one-import tf -C ${configfile} > /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
--output_path inception_v3_test_data.h5
fi
+if [[ ! -d "test_saved_model" ]]; then
+ rm -rf test_saved_model.zip
+ wget https://github.com/Samsung/ONE/files/5516226/test_saved_model.zip
+ unzip test_saved_model.zip
+ # https://github.com/Samsung/ONE/issues/4268#issuecomment-724578237
+fi
+
+if [[ ! -s "test_keras_model.h5" ]]; then
+ rm -rf test_keras_model.zip
+ wget https://github.com/Samsung/ONE/files/5520777/test_keras_model.zip
+ unzip test_keras_model.zip
+ # https://github.com/Samsung/ONE/issues/4268#issuecomment-725025805
+fi
+
# prepare 'inception_v3.circle' file used for quantization test
inputfile="./inception_v3.pb"
outputfile="./inception_v3.circle"
{
public:
InternalExn(const char *filename, const int line, const std::string &msg)
- : _filename(filename), _line(line), _msg(msg)
+ : _filename(filename), _line(to_uint32(line)), _msg(msg)
{
construct_full_msg();
}
explicit InternalExn(const char *filename, const int line, const std::string &msg, uint32_t val)
- : _filename(filename), _line(line), _msg(msg + ": " + std::to_string(val))
+ : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + std::to_string(val))
{
construct_full_msg();
}
explicit InternalExn(const char *filename, const int line, const std::string &msg,
const std::string &val)
- : _filename(filename), _line(line), _msg(msg + ": " + val)
+ : _filename(filename), _line(to_uint32(line)), _msg(msg + ": " + val)
{
construct_full_msg();
}
unset(QUANTIZATION_VALUE_TEST)
unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
+nnas_find_package(FlatBuffers QUIET)
+if(NOT FlatBuffers_FOUND)
+ message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)")
+ return()
+endif(NOT FlatBuffers_FOUND)
+
macro(addTest NAME GRANULARITY DTYPE)
list(APPEND QUANTIZATION_VALUE_TEST ${NAME})
list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
unset(TEST_DEPS)
get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR)
+
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY)
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_3_0")
###
### Generate test.config
COMMENT "Generate test configuration"
)
-list(APPEND TEST_DEPS "${TEST_CONFIG}")
+###
+### Generate python interface for circle schema
+###
+set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle")
+
+add_custom_command(
+ OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR}
+ COMMAND ${CMAKE_COMMAND} -E remove_directory "${CIRCLE_SCHEMA_PYTHON_DIR}"
+ COMMAND "$<TARGET_FILE:flatbuffers::flatc>" --python
+ -o "${CMAKE_CURRENT_BINARY_DIR}" "${SCHEMA_BIN_PATH}/schema.fbs"
+ DEPENDS flatbuffers::flatc
+ COMMENT "Generate python interface for circle schema"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CIRCLE_SCHEMA_PYTHON_DIR}")
# This enforces CMake to generate all the dependencies during "build" phase
add_custom_target(pota_quantization_value_test_deps ALL DEPENDS ${TEST_DEPS})
--- /dev/null
+{
+ "weights": [
+ 1,
+ 0,
+ 1,
+ 1
+ ],
+ "scale": [
+ 0.7023000121116638,
+ 0.3091999888420105,
+ 0.7552000284194946,
+ 0.2728999853134155
+ ],
+ "zero_point": [
+ 0,
+ 1,
+ 0,
+ 0
+ ]
+}
--- /dev/null
+{
+ "weights": [
+ 1,
+ 0,
+ 1,
+ 0
+ ],
+ "scale": [
+ 0.012299999594688416,
+ 0.33239999413490295,
+ 0.23240000009536743,
+ 3.3359999656677246
+ ],
+ "zero_point": [
+ 0,
+ 1,
+ 0,
+ 1
+ ]
+}
--- /dev/null
+{
+ "scale": 0.003919127397239208,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "scale": 0.051219820976257324,
+ "zero_point": 104.0
+}
--- /dev/null
+{
+ "min": 0.006417479291558266,
+ "max": 0.9993774032592774
+}
--- /dev/null
+{
+ "min": -5.316554107666015,
+ "max": 7.744499607086182
+}
--- /dev/null
+{
+ "weights": [
+ 242,
+ 0,
+ 255,
+ 139
+ ],
+ "scale": 0.004174117464572191,
+ "zero_point": 74.0
+}
--- /dev/null
+{
+ "weights": [
+ 239,
+ 214,
+ 255,
+ 0
+ ],
+ "scale": 0.013993725180625916,
+ "zero_point": 238.0
+}
--- /dev/null
+{
+ "scale": 0.003914226312190294,
+ "zero_point": 0.0
+}
--- /dev/null
+{
+ "scale": 0.04870154336094856,
+ "zero_point": 122.0
+}
--- /dev/null
+{
+ "min": 0.011221568882465362,
+ "max": 0.9981276893615723
+}
--- /dev/null
+{
+ "min": -5.94246238708496,
+ "max": 6.4764308166503906
+}
"weights": [
[
[
- 6553,
- 19660,
- 32767
+ 1,
+ 1,
+ 1
]
]
],
- "scale": 1.5259254723787308e-05,
- "zero_point": 0.0
+ "scale": [
+ 0.10000000149011612,
+ 0.30000001192092896,
+ 0.5
+ ],
+ "zero_point": [
+ 0,
+ 0,
+ 0
+ ]
}
{
- "scale": 0.0001509107678430155,
+ "scale": 0.00015214986342471093,
"zero_point": 0.0
}
{
- "scale": 0.00015084103506524116,
+ "scale": 0.00015159364556893706,
"zero_point": 0.0
}
{
- "min": -4.944893226623535,
- "max": 4.942608108520508
+ "min": -4.985494499206543,
+ "max": 4.967269058227539
}
{
- "min": -2.451441249847412,
- "max": 4.942608108520508
+ "min": -2.4895002365112306,
+ "max": 4.967269058227539
}
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ 1,
+ 1,
+ 1
+ ]
+ ]
+ ],
+ "scale": [
+ 0.10000000149011612,
+ 0.30000001192092896,
+ 0.5
+ ],
+ "zero_point": [
+ 0,
+ 0,
+ 0
+ ]
+}
--- /dev/null
+{
+ "scale": 0.03893596678972244,
+ "zero_point": 128.0
+}
--- /dev/null
+{
+ "scale": 0.029139429330825806,
+ "zero_point": 85.0
+}
--- /dev/null
+{
+ "min": -4.977406520843505,
+ "max": 4.951265411376953
+}
--- /dev/null
+{
+ "min": -2.4792890548706055,
+ "max": 4.951265411376953
+}
#!/usr/bin/env python3
import h5py as h5
import numpy as np
-import tensorflow as tf
+from circle.Model import Model
+from circle.TensorType import TensorType
import argparse
import glob
#
-# This script generates a pack of random input data (.h5) expected by the input tflite model
+# This script generates a pack of random input data (.h5) expected by the input circle model
#
# Basic usage:
# gen_h5_explicit_inputs.py --model <path/to/model/file> --input <path/to/input/directory> --output <path/to/output/file>
-# ex: gen_h5_explicit_inputs.py --model Add_000.tflite --input Add_000 --output Add_000.input.h5
+# ex: gen_h5_explicit_inputs.py --model Add_000.circle --input Add_000 --output Add_000.input.h5
# (This will create Add_000.input.h5)
#
# The input directory should be organized as follows
input = args.input
output = args.output
-# Build TFLite interpreter. (to get the information of model input)
-interpreter = tf.lite.Interpreter(model)
-input_details = interpreter.get_input_details()
+with open(model, 'rb') as f:
+ buf = f.read()
+ circle_model = Model.GetRootAsModel(buf, 0)
+
+# Assume one subgraph
+assert (circle_model.SubgraphsLength() == 1)
+graph = circle_model.Subgraphs(0)
+inputs = graph.InputsAsNumpy()
# Create h5 file
h5_file = h5.File(output, 'w')
group = h5_file.create_group("value")
group.attrs['desc'] = "Input data for " + model
+
+def toNumpyType(circle_type):
+ if circle_type == TensorType.UINT8:
+ return np.uint8
+ if circle_type == TensorType.FLOAT32:
+ return np.float32
+ if circle_type == TensorType.INT16:
+ return np.int16
+
+
# Input files
records = sorted(glob.glob(input + "/*.txt"))
for i, record in enumerate(records):
lines = f.readlines()
for j, line in enumerate(lines):
data = np.array(line.split(','))
- input_detail = input_details[j]
- input_data = np.array(
- data.reshape(input_detail["shape"]), input_detail["dtype"])
+ input_index = inputs[j]
+ tensor = graph.Tensors(input_index)
+ np_type = toNumpyType(tensor.Type())
+ input_data = np.array(data.reshape(tensor.ShapeAsNumpy()), np_type)
sample.create_dataset(str(j), data=input_data)
h5_file.close()
addTest(FullyConnected_003 channel uint8)
addTest(FullyConnected_003 channel int16)
addTest(FullyConnected_003 layer uint8)
+addTest(InstanceNorm_001 layer uint8)
+addTest(InstanceNorm_001 channel uint8)
addTest(Mean_000 layer uint8)
addTest(Mean_000 channel int16)
addTest(MaxPool2D_000 layer uint8)
addTest(Mul_001 layer uint8)
addTest(Mul_001 channel int16)
addTest(PRelu_001 layer uint8)
+addTest(PRelu_001 channel uint8)
addTest(PRelu_001 channel int16)
addTest(ReLU_000 layer uint8)
addTest(ReLU_000 channel int16)
--- /dev/null
+0.15500909,0.32379007,0.12717001,0.60674316,0.07691418,0.437071 ,0.3737046 ,0.798342 ,0.65901846,0.40579247,0.15460491,0.80063623,0.591834 ,0.6617658 ,0.5617774 ,0.44884747,0.7996519 ,0.75895494,0.6239346 ,0.56500244,0.8955974 ,0.32503998,0.05756519,0.11889575,0.19635268,0.33958906,0.916527 ,0.16366032,0.51954055,0.2615102 ,0.07677322,0.6970092 ,0.27848312,0.97694606,0.73990864,0.96292055
--- /dev/null
+0.85332185,0.03102963,0.54344934,0.6300742 ,0.3323267 ,0.1701224 ,0.36199054,0.23949413,0.11960976,0.668403 ,0.7907452 ,0.4377144 ,0.87145853,0.75605077,0.37314144,0.3622036 ,0.4321453 ,0.8770253 ,0.10936793,0.0734281 ,0.2922192 ,0.5829591 ,0.5422962 ,0.84274834,0.48475483,0.23154257,0.20037153,0.27911612,0.30018023,0.23753181,0.98804647,0.61455756,0.90376633,0.8255312 ,0.21020697,0.6272272
--- /dev/null
+0.29736656,0.5712386 ,0.55447775,0.9014779 ,0.6208391 ,0.3413809 ,0.043885 ,0.5474101 ,0.8642339 ,0.05225753,0.36101478,0.15561381,0.776422 ,0.9997885 ,0.35188794,0.23418508,0.0882741 ,0.5797471 ,0.99945694,0.22190607,0.12337059,0.3701574 ,0.65161157,0.9830193 ,0.46270686,0.10077237,0.23681253,0.8734158 ,0.8358533 ,0.08817147,0.3845248 ,0.12799203,0.66830546,0.14838815,0.90201443,0.21123447
--- /dev/null
+0.92424273,0.35776526,0.0776509 ,0.93697083,0.6559925 ,0.78421926,0.7511033 ,0.71389145,0.52217877,0.41876563,0.3560251 ,0.5862293 ,0.53027606,0.32203177,0.24654935,0.55851364,0.35312092,0.38102064,0.21245371,0.87299466,0.94972914,0.54950166,0.3445233 ,0.98951054,0.37458083,0.3778964 ,0.64035404,0.10410193,0.18511558,0.1942945 ,0.07018933,0.6113747 ,0.38076922,0.08337755,0.98258 ,0.91440874
--- /dev/null
+0.3790198 ,0.6347678 ,0.42544237,0.37033263,0.08057033,0.49041638,0.61705315,0.15411597,0.6455052 ,0.6857795 ,0.9613043 ,0.60357374,0.57679754,0.22550431,0.05105425,0.8641173 ,0.65559083,0.18274343,0.8963692 ,0.22369736,0.3133119 ,0.27507883,0.00539197,0.6846556 ,0.5969273 ,0.78488904,0.87746257,0.15459861,0.23133573,0.59048635,0.07172906,0.28935516,0.02084327,0.09926946,0.02687503,0.7306079
--- /dev/null
+0.641226 ,0.68639857,0.87044334,0.9448475 ,0.21544299,0.5202749 ,0.5077167 ,0.23931624,0.5712026 ,0.4167988 ,0.56711906,0.52392703,0.42762014,0.5277072 ,0.03028643,0.18017273,0.8823869 ,0.5752544 ,0.09368648,0.50277 ,0.784248 ,0.04220072,0.55217946,0.75145644,0.7957966 ,0.6563401 ,0.54975605,0.17231019,0.4219812 ,0.27839735,0.5850074 ,0.24070603,0.00957893,0.3669335 ,0.03722228,0.8705231
--- /dev/null
+0.76871806,0.65729177,0.946514 ,0.4308198 ,0.65200335,0.5745432 ,0.2990488 ,0.3156028 ,0.3218111 ,0.44709972,0.9411461 ,0.4828708 ,0.5707792 ,0.10645963,0.74497086,0.3563156 ,0.07986172,0.64869064,0.73329425,0.8848129 ,0.3027897 ,0.8753744 ,0.8884493 ,0.3606782 ,0.88617206,0.20232914,0.10251648,0.6366529 ,0.20422891,0.24426484,0.6952833 ,0.21889713,0.11477511,0.40650114,0.9637219 ,0.9751801
--- /dev/null
+0.5773043 ,0.6733178 ,0.22994593,0.32895002,0.74122405,0.6671442 ,0.1899878 ,0.35264668,0.31084946,0.3864719 ,0.7035006 ,0.46563607,0.44263086,0.2414678 ,0.7430625 ,0.72898006,0.9982008 ,0.8989132 ,0.45622516,0.17876478,0.9356994 ,0.85493064,0.73729265,0.9804242 ,0.8735895 ,0.14825071,0.33990774,0.76397645,0.14657325,0.2492199 ,0.43957144,0.20367876,0.43692476,0.28123745,0.24346785,0.21133597
--- /dev/null
+0.74837255,0.7530814 ,0.05257462,0.06676125,0.26824346,0.05064487,0.23974492,0.5355457 ,0.97374374,0.38518724,0.3781766 ,0.7047476 ,0.95856845,0.09918232,0.36570287,0.5659468 ,0.8793284 ,0.7967468 ,0.99486005,0.11670698,0.42955273,0.25254622,0.06959745,0.5107888 ,0.88106513,0.3649466 ,0.7039582 ,0.8535825 ,0.3979168 ,0.9560912 ,0.17733434,0.69954944,0.35459924,0.28516313,0.75249106,0.7197228
--- /dev/null
+0.73320377,0.33635676,0.05811058,0.7032399 ,0.26380542,0.99637365,0.36622 ,0.47471517,0.5940316 ,0.39782768,0.46486765,0.5167471 ,0.61612487,0.93076104,0.8955697 ,0.5320168 ,0.41166067,0.29174343,0.07476811,0.60023075,0.0961028 ,0.77073896,0.17360727,0.48763612,0.31430086,0.37943754,0.7456216 ,0.16767363,0.9368368 ,0.09397154,0.68992966,0.5829225 ,0.7521187 ,0.06086114,0.13137193,0.22886442
- 0.5590226 ,-0.2806683 ,-1.6237477 ,-0.9041292 ,-2.2877202 , 3.4275887 , 0.7413508 ,-2.4284103 ,-0.39940628, 2.431437 ,-3.681079 ,-0.24288087, 3.3011584 ,-4.9507365 , 0.63297826, 3.0742207 ,-4.407745 ,-3.1469536 , 0.28014645, 1.7506292 ,-2.2447422 ,-0.5647249 , 4.763762 ,-1.9554822 ,-1.0236452 , 1.4784483 ,-0.15040281, 3.009691 , 4.0685706 ,-4.3577633 , 3.9074588 , 3.3200462 , 0.7937705 ,-4.491444 ,-1.5227276 ,-4.907054 , 3.0078046 ,-3.3134713 ,-4.180262 , 0.42208448,-4.764361 , 1.7373432 ,-2.4944234 , 1.3338212 , 0.5318029 , 2.0201192 , 1.274291 ,-3.891372
+-1.9927613e+00,-1.7386111e+00, 4.0895696e+00, 3.7818990e+00, 1.9420158e+00, 2.8482721e+00, 1.9165717e+00, 3.0059583e+00, 1.8346788e+00,-1.9055414e-03, 4.9277787e+00,-2.2794118e+00, 4.4005270e+00, 4.9703922e+00,-4.5275192e+00,-4.0446317e-01,-4.9363256e+00, 4.9506269e+00, 5.5874938e-01, 3.9949589e+00,-3.8152415e-01,-4.1024357e-01,-3.8472393e+00, 4.2956004e+00, 4.8097472e+00, 1.7960385e+00, 1.6767026e+00,-2.2773645e+00, 2.6808765e+00,-3.7214172e+00, 4.0978761e+00, 3.6202488e+00,-3.3211513e+00, 3.6200387e+00,-3.6106458e+00,-3.9778764e+00, 3.8779631e+00,-4.8502750e+00,-2.1901150e+00, 3.1800017e+00, 4.6261444e+00, 3.5151103e+00, 2.8659137e-02, 4.5340648e+00, 1.9836371e+00,-2.1751235e+00,-4.6762753e+00,-3.6951694e+00
--2.5172353 , 1.8682998 , 2.6845884 , 1.8813597 ,-4.6693754 ,-3.2414548 ,-3.1801097 ,-1.5670214 , 1.9862102 , 3.857179 ,-3.0402668 ,-1.4183347 ,-2.7983398 ,-4.087585 ,-1.1274861 , 1.8738103 ,-2.563316 ,-2.973781 ,-0.872552 ,-4.4504313 ,-0.9188538 , 4.5734954 , 1.3559026 , 4.943204 ,-3.6803703 , 4.577067 ,-0.6116983 , 4.5055084 , 2.5480487 , 3.7308915 ,-0.3163238 ,-0.00772368, 3.0286303 ,-0.43645218, 0.87748104,-2.6953583 , 0.21743219, 2.431181 ,-1.2284794 , 0.35975334, 0.87034357,-2.5191767 , 4.030477 ,-1.2849646 ,-4.537441 ,-0.8822066 , 4.5059347 ,-0.9273924
+-4.7488093 , 4.805902 ,-0.29828382, 0.57486725,-4.864297 , 1.1832287 ,-1.7611881 ,-2.7058024 , 2.707353 ,-3.9832466 , 3.1243927 ,-4.795229 , 1.9835415 , 3.2291937 , 2.4303932 ,-3.556881 , 4.316894 ,-0.6444627 ,-3.8289468 , 4.012964 , 0.7878584 ,-1.8921386 , 2.779619 ,-3.762597 , 3.4239094 ,-0.9103423 ,-3.9791772 ,-2.5613685 ,-4.4910364 , 0.19411987, 4.6296096 ,-0.6827259 , 3.7645729 , 1.5309091 , 3.5163064 , 3.4726381 , 3.5372822 , 1.7671971 , 1.4374614 , 3.5783768 ,-2.4927518 , 3.9427729 , 2.431568 , 2.6959393 , 3.8100271 ,-2.099064 , 3.3663592 ,-2.0818436
- 4.523605 ,-2.1303053 , 2.7449381 ,-4.449816 ,-1.4482541 , 4.643309 ,-2.5644886 , 4.3115034 ,-4.7736797 ,-1.9451635 ,-2.1877592 , 2.3639698 ,-1.8480709 ,-4.560132 ,-0.40588248, 4.368528 ,-0.25666243, 1.1258887 , 2.33142 ,-3.8270295 ,-4.337086 ,-0.6709232 , 4.9283085 ,-3.5181348 , 2.225021 ,-0.0831629 , 2.0482597 , 3.161154 ,-0.49435407, 2.9382129 ,-1.248886 ,-3.7053974 , 1.6736145 ,-1.3524985 ,-1.4007242 ,-4.291275 ,-3.391911 , 4.803692 , 1.631321 , 0.13381048,-2.9587808 , 3.9878602 ,-3.3585925 , 4.6802793 ,-1.7605352 , 3.4168313 , 1.2318416 ,-4.40287
+ 4.279912 ,-2.2746763 , 4.0609813 , 4.5353827 , 3.624241 ,-3.9593613 , 4.189409 ,-3.9370356 ,-2.7063863 ,-1.9987059 , 4.172294 ,-4.5454354 , 4.362368 , 2.2204642 ,-4.9866576 , 3.31571 , 0.12623785, 4.7834573 ,-1.3521448 ,-1.5408021 ,-4.6578984 ,-2.93307 ,-1.5684534 ,-1.6875995 ,-0.4278419 , 1.1314197 ,-2.9655704 ,-0.48032767,-1.9200082 , 1.3321692 , 0.87586147,-0.1761448 , 3.939337 ,-1.0270193 ,-4.807054 , 2.8373904 ,-1.1184337 ,-0.8979197 , 2.1442132 ,-2.8509672 ,-3.3741531 , 3.6592414 , 0.7632272 ,-4.11465 , 4.892313 , 4.715815 ,-4.6481915 , 0.24676175
- 1.249105 ,-3.2594535 ,-1.7899538 ,-4.804654 ,-2.0324056 ,-1.9959925 , 3.5215054 , 0.5371311 , 1.9365969 ,-3.130136 ,-2.3590457 ,-4.653209 ,-2.0184708 , 3.5759254 ,-1.3521014 , 1.910826 , 3.8221822 ,-2.8988552 , 0.6571995 , 1.0839036 , 3.5422468 , 2.4680734 , 0.6148754 ,-3.4008195 , 4.558109 , 2.0105803 , 0.58087206, 1.3398736 , 2.770545 , 0.29666626, 4.1851935 , 0.04321287, 2.7680604 , 4.5661645 , 4.0127945 ,-4.8027678 , 4.1711125 ,-0.24452859, 0.4101852 , 1.5963763 ,-2.8356924 , 1.2876563 , 0.90424466, 2.965566 ,-1.9058269 , 4.759825 ,-2.2063546 ,-1.1309439
+-2.0949495 ,-1.1370499 , 4.6457314 ,-2.243915 ,-1.7996464 , 1.2268789 ,-4.938172 ,-3.2802615 , 1.8788282 , 4.4162655 ,-4.8805113 , 3.1269526 , 3.2644348 , 0.89842725,-1.4484432 ,-0.28381723, 3.046261 ,-1.0718596 ,-3.996107 ,-4.9575796 ,-2.2279077 , 1.5326967 , 4.4588428 ,-2.042381 , 4.6604958 , 4.6422915 ,-1.097833 , 3.666126 , 0.4735639 ,-4.480704 ,-4.831033 ,-0.27288163, 4.588138 , 4.5297036 , 4.3675694 ,-1.6098841 ,-3.4147859 , 2.1168516 ,-1.9529305 ,-0.12548867, 3.4388335 ,-1.4071734 , 0.9507897 , 4.8206787 , 1.676873 ,-1.7102181 , 1.7746873 , 0.02711739
--3.0078897 , 1.6800234 , 4.350201 , 0.22538732, 2.9894316 ,-4.234071 , 2.733158 ,-3.8551323 , 3.9647048 , 1.4266169 , 0.78519976,-0.5334222 , 0.6681823 , 2.8409274 , 2.335872 ,-3.757666 ,-3.321705 , 2.9423573 , 1.3080943 , 1.0453726 , 3.222387 , 3.1813147 ,-1.8588669 ,-3.2523947 ,-4.4175825 , 3.7631783 ,-3.4176416 , 1.2141145 , 1.3725096 ,-1.2283872 ,-2.9829195 ,-3.6383085 ,-2.0126016 ,-3.7627625 , 4.916868 , 0.73052526,-0.02047114,-3.9506733 , 2.3569562 ,-4.247723 ,-1.8913685 , 1.7365774 , 4.59158 , 3.654596 ,-4.2133813 ,-4.6193404 ,-1.3968121 ,-3.580963
+-4.707647 ,-4.0921726 , 3.5813692 ,-4.71081 , 3.157816 ,-3.0034213 ,-0.21858999,-1.1736552 ,-1.6042249 ,-3.93102 ,-4.0407577 , 3.7350774 ,-4.9545655 ,-1.5413756 , 0.34996858, 2.0339615 , 0.99290746,-3.9916334 ,-4.149016 ,-3.2332835 , 3.6728513 , 2.4537466 ,-3.103485 ,-0.4829316 , 4.8046784 ,-1.753812 , 4.878712 ,-1.4039769 , 1.6640003 ,-1.2041731 , 0.8046477 , 0.9196048 ,-0.6475092 , 1.1409346 , 2.0324717 ,-0.04227797,-0.5379897 , 3.205104 , 3.3556423 , 4.8447986 ,-1.9695646 ,-2.6304977 ,-3.7261262 ,-4.725599 , 2.1162436 ,-0.5631174 ,-0.5820323 , 0.8398242
--- /dev/null
+ 0.29413325,-0.5246354 , 2.5049045 , 4.9534087 , 0.9885207 ,-4.9603324 ,-2.534284 ,-1.2587626 ,-4.6054525 ,-4.0071754 , 3.204513 , 1.9254771 ,-3.0781755 ,-2.225973 , 3.3524523 , 3.817767 , 3.4921055 , 4.3435416 , 3.0849605 ,-1.4030998 ,-1.0506575 ,-0.42979953,-2.2500112 , 3.4057455 , 4.5414543 , 2.9366746 , 4.8639297 ,-0.1028097 , 2.3421814 , 0.6463296 ,-4.906506 ,-0.7544193 ,-4.0089574 , 2.3837643 ,-0.62171113,-3.349577 , 0.63758767,-3.6872568 ,-2.4398334 ,-1.1556609 ,-3.116043 ,-1.9698795 , 0.7246678 , 2.1801088 ,-2.5762403 , 2.5748649 ,-2.8637013 , 2.8755338
--- /dev/null
+-3.5664022e+00, 3.7696166e+00,-2.0404069e+00,-3.2197843e+00, 2.0149478e-01, 4.1116104e+00, 1.9678035e+00,-7.5975507e-01,-2.1460054e+00, 4.6308274e+00,-1.8927828e+00, 3.0689645e+00,-7.0773923e-01,-6.7477709e-01,-1.6248076e+00, 2.7095401e+00, 2.9545853e+00, 8.5142839e-01,-2.7683893e-01,-2.0586762e+00,-3.5001924e+00,-1.7622359e+00, 2.2262762e+00,-4.0617161e+00,-2.4704919e+00,-3.6333869e+00, 2.3401244e+00,-4.6641917e+00,-4.0812837e-03, 1.1013873e+00, 1.4518824e-01, 2.4135842e+00, 4.1183419e+00, 3.0343807e+00,-3.7195799e-01,-9.7189492e-01,-3.0425618e+00, 4.6822820e+00,-1.7649661e+00, 3.9648254e+00,-3.1084957e+00,-7.3071235e-01,-5.1578474e-01,-3.5188673e+00,-4.7018051e+00,-4.1592669e+00,-3.5443991e-01, 1.3961188e+00
--- /dev/null
+ 4.2618856 , 0.4364266 , 0.5258691 , 3.5147502 ,-4.025428 , 3.143039 , 1.3707066 , 4.7792606 , 1.1539228 , 3.785161 ,-1.9495047 , 2.7047534 , 0.5673139 ,-0.5191105 ,-2.5284607 , 4.076998 , 2.9433093 ,-2.1924984 , 1.1020935 ,-2.126009 , 0.7586875 , 1.1708144 ,-4.594603 ,-3.252912 ,-3.057344 , 3.8008513 ,-4.9164753 ,-4.560891 , 1.724639 ,-3.0877826 , 0.55354726,-3.969067 , 4.17461 ,-1.901139 ,-4.8903475 , 4.7866077 ,-1.3506653 ,-4.2624874 , 0.8842832 , 4.672003 ,-2.5649548 ,-3.6606123 ,-1.6794366 ,-2.0534387 ,-2.9902222 , 3.078469 , 2.846819 , 1.2788221
--- /dev/null
+-2.6751792 ,-2.5436802 , 0.30533552, 1.0443643 ,-4.4327927 , 2.813772 ,-4.27514 , 2.5894637 , 2.8684394 ,-2.2010357 , 1.5827026 , 0.01609957, 0.38605672,-4.978118 ,-0.30794173, 0.7372266 ,-1.2931277 , 2.8435483 , 2.8204155 , 1.5801594 , 0.853025 , 1.0665054 ,-2.3281817 ,-4.2512784 , 2.379218 , 2.6335719 , 0.17575608,-2.7761426 ,-2.8164017 , 1.8392245 , 2.6495574 , 0.82702005, 3.8548648 ,-3.179834 , 0.25908127, 2.4930098 , 0.71019745,-3.193962 ,-1.1381371 ,-3.5847874 ,-1.3353258 , 2.942422 , 0.11944559,-3.0676606 , 3.534187 , 0.86664987,-1.4781127 , 4.8873277
--- /dev/null
+ 4.2327642 , 4.644095 ,-2.8978996 , 4.39419 , 2.897952 ,-3.330613 ,-3.9131684 ,-1.4672462 ,-3.9219787 , 2.1286428 ,-4.313653 , 2.65426 ,-4.201722 , 2.5390174 ,-3.821772 ,-1.9420135 , 3.3508427 ,-1.2804624 , 4.899826 ,-4.165279 ,-0.38920662, 3.594253 ,-2.367396 , 3.8604352 , 0.40077925, 3.7654843 ,-2.7208197 , 3.4325044 ,-2.921729 , 2.0519714 ,-0.6181836 ,-0.12342291,-4.1059036 ,-3.653849 ,-3.5340316 ,-0.2782715 , 0.32330513, 3.360021 , 2.5673623 , 2.1614027 ,-4.438277 , 3.3010736 , 0.3992392 , 0.82871836,-2.8720777 , 0.29633927, 0.25286415,-4.191315
# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-GEN_SCRIPT_PATH="${SOURCE_PATH}/gen_h5_explicit_inputs.py"
COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
CONFIG_PATH="$1"; shift
BIN_PATH=$(dirname "${CONFIG_PATH}")
TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py"
WORKDIR="$1"; shift
source "${CONFIG_PATH}"
# Generate h5 input data
source "${VIRTUALENV}/bin/activate"
"${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
- --model "${WORKDIR}/${MODELNAME}.tflite" \
+ --model "${WORKDIR}/${MODELNAME}.circle" \
--input "${TEST_INPUT_PATH}/${MODELNAME}/${GRANULARITY}/${DTYPE}" \
--output "${TESTCASE_FILE}.input.h5"
flex_buffers->Add(1);
flex_buffers->EndVector(start, /*typed=*/true, /*fixed=*/false);
auto output_type = operation.max_pool_with_argmax_options().output_type();
- assert(output_type == tflite::TensorType_INT64 || output_type == tflite::TensorType_INT32);
+ assert(output_type == tflchef::INT64 || output_type == tflchef::INT32);
flex_buffers->Int("Targmax", output_type);
std::string padding = operation.max_pool_with_argmax_options().padding() ? "VALID" : "SAME";
flex_buffers->String("padding", padding);
flex_buffers->Bool("include_batch_in_index",
operation.max_pool_with_argmax_options().include_batch_in_index());
- flex_buffers->Int("T", tflite::TensorType_FLOAT32);
+ flex_buffers->Int("T", tflchef::FLOAT32);
flex_buffers->EndMap(map_start);
flex_buffers->Finish();
auto opcodes = reader.opcodes();
auto buffers = reader.buffers();
+ auto metadata = reader.metadata();
// dump operator_codes
os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
}
os << std::endl;
+ // dump metadata
+ if (metadata != nullptr)
+ {
+ os << "metadata : B(index) name" << std::endl;
+ for (uint32_t i = 0; i < metadata->Length(); ++i)
+ {
+ os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str();
+ }
+ os << std::endl;
+ }
+
for (uint32_t sg = 0; sg < num_subgraph; ++sg)
{
reader.select_subgraph(sg);
// There is no Option for LOGISTIC
// There is no Option for LOG_SOFTMAX
_op_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>();
+ _op_map[tflite::BuiltinOperator_MEAN] = make_unique<ReducerPrinter>();
_op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
_op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
// There is no Option for NON_MAX_SUPPRESSION_V4
_version = model->version();
_subgraphs = model->subgraphs();
_buffers = model->buffers();
+ _metadata = model->metadata();
auto opcodes = model->operator_codes();
for (const ::tflite::OperatorCode *opcode : *opcodes)
using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>;
using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>;
using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
+ using TFliteMetadata_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>;
public:
Reader(const tflite::Model *model);
const TFliteOperators_t *operators() { return _operators; }
const std::vector<int32_t> &inputs() const { return _inputs; }
const std::vector<int32_t> &outputs() const { return _outputs; }
+ const TFliteMetadata_t *metadata() const { return _metadata; }
uint32_t num_subgraph() const { return _subgraphs->Length(); }
const TFliteBuffers_t *_buffers{nullptr};
const TFliteTensors_t *_tensors{nullptr};
const TFliteOperators_t *_operators{nullptr};
+ const TFliteMetadata_t *_metadata{nullptr};
uint32_t _subgraph_index;
std::string _subgraph_name;
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x00000000000b0001)
+ set(VCONONE_VERSION 0x00000000000c0001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
--- /dev/null
+../.clang-format.8
\ No newline at end of file
cl::Device _device; /**< Underlying CL device. */
std::string _kernel_path; /**< Path to the kernels folder. */
mutable std::map<std::string, const Program>
- _programs_map; /**< Map with all already loaded program data. */
+ _programs_map; /**< Map with all already loaded program data. */
mutable std::map<std::string, cl::Program>
- _built_programs_map; /**< Map with all already built program data. */
+ _built_programs_map; /**< Map with all already built program data. */
static const std::map<std::string, std::string>
- _kernel_program_map; /**< Map that associates kernel names with programs. */
+ _kernel_program_map; /**< Map that associates kernel names with programs. */
static const std::map<std::string, std::string>
- _program_source_map; /**< Contains sources for all programs.
- Used for compile-time kernel inclusion. >*/
+ _program_source_map; /**< Contains sources for all programs.
+ Used for compile-time kernel inclusion. >*/
};
}
#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
class ICLTensor;
/**
-* @brief Class to perform EmbeddingLookup operation with opencl kernel
-*/
+ * @brief Class to perform EmbeddingLookup operation with opencl kernel
+ */
class CLEmbeddingLookupKernel : public ICLKernel
{
public:
class ICLTensor;
/**
-* @brief Class to perform HashtableLookup operation with opencl kernel
-*/
+ * @brief Class to perform HashtableLookup operation with opencl kernel
+ */
class CLHashtableLookupKernel : public ICLKernel
{
public:
const char *name() const override { return "NEOneHotKernel"; }
/** Initialise the kernel's inputs and outputs
*
- * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
- * following types: U32/S32
- * @param[in] depth The tensor for depth of the one hot dimension. Supported tensor rank: up to
- * 3. Must be one of the following types: U32/S32
- * @param[in] on_value On value tensor. Supported tensor rank: only 1. Data type supported:
- * U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] off_value Off value tensor. Supported tensor rank: only 1. Data type supported: Same
- * as @p on_value
- * @param[out] output Destination tensor. Data type supported: Same as @p on_value
- * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
- * The value must be in range [-indices.rank , indices.rank)
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the
+ * following types: U32/S32
+ * @param[in] depth The tensor for depth of the one hot dimension.
+ * Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor. Supported tensor rank: only 1.
+ * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor. Supported tensor rank: only 1.
+ * Data type supported: Same as @p on_value
+ * @param[out] output Destination tensor. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around.
+ * Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
*/
void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
const ITensor *off_value, ITensor *output, int axis = -1);
/** Static function to check if given info will lead to a valid configuration of @ref
- * NEOneHotKernel
+ * NEOneHotKernel
*
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the
- * following types: U32/S32
- * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank:
- * up to 3. Must be one of the following types: U32/S32
- * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported:
- * U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported:
- * Same as @p on_value
- * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
- * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
- * The value must be in range [-indices.rank , indices.rank)
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] depth The tensor info for depth of the one hot dimension.
+ * Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor info. Supported tensor rank: only 1.
+ * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor info. Supported tensor rank: only 1.
+ * Data type supported: Same as @p on_value
+ * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
*
* @return a status
*/
* @return the calculated shape
*/
inline TensorShape compute_transposeconv_upsampled_shape(
- const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
- std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right,
- unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right,
- unsigned int &pad_top, unsigned int &pad_bottom)
+ const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &info,
+ std::pair<unsigned int, unsigned int> &out_dims, unsigned int invalid_right,
+ unsigned int invalid_bottom, unsigned int &pad_left, unsigned int &pad_right,
+ unsigned int &pad_top, unsigned int &pad_bottom)
{
unsigned int sx = info.stride().first;
unsigned int sy = info.stride().second;
unsigned int padx_all_except_invallid = padx + info.pad_left() + info.pad_right() - invalid_right;
unsigned int pady_all_except_invallid =
- pady + info.pad_top() + info.pad_bottom() - invalid_bottom;
+ pady + info.pad_top() + info.pad_bottom() - invalid_bottom;
pad_left = (padx_all_except_invallid + 1) / 2 - info.pad_left();
pad_right = pady_all_except_invallid / 2 - info.pad_right() + invalid_right;
pad_top = (padx_all_except_invallid + 1) / 2 - info.pad_top();
const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int channel_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
TensorShape out_shape{input_shape};
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const int idx_channel =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
TensorShape output_shape{input->tensor_shape()};
output_shape.set(idx_width, input->dimension(idx_width) * block);
CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs.
- * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension.
- * Data type supported: Should match @p input data type, except for
- * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[out] output Output tensor. The output has the same number of dimensions as the
- * @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this
- * is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type,
+ * except for input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
+ * @param[out] output Output tensor.
+ * The output has the same number of dimensions as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with
+ * @ref CLWeightsReshapeKernel.
*
*/
void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
/** Set the input, weights, biases and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
- * an optional 4th dimension for batch of inputs.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
* Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
* @param[in] bias (Optional) The biases have one dimension.
* Data type supported: Should match @p input data type, except for
- * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
* @param[out] output Output tensor. The output has the same number of dimensions as
- * the @p input.
+ * the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution,
- * this is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
unsigned int invalid_right, unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLDirectTransposeConvLayer
+ * CLDirectTransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs.
- * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension.
- * Data type supported: Should match @p input data type, except for input
- * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[in] output Output tensor info. The output has the same number of dimensions as the
- * @p input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type,
+ * except for input of QASYMM8 and QASYMM8_SIGNED type
+ * where biases should be of S32 type
+ * @param[in] output Output tensor info. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped
+ * with @ref CLWeightsReshapeKernel.
*
* @return a status
*/
CLConvertFullyConnectedWeights _convert_weights;
weights_transformations::CLConvertFullyConnectedWeightsManaged _convert_weights_managed;
weights_transformations::CLFullyConnectedLayerReshapeWeightsExManaged
- _reshape_weights_managed_function;
+ _reshape_weights_managed_function;
CLFlattenLayer _flatten_layer;
CLFullyConnectedLayerReshapeWeightsEx _reshape_weights_function;
CLGEMM _mm_gemm;
public:
CLFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
- : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
- _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
+ : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+ _memory_manager{memory_manager}, _cl_fc{nullptr}, _cl_reshape{}, _needs_reshape(false)
{
// DO NOTHING
}
* @param[out] output The output tensor, Data types supported: same as @p input.
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
* @return N/A
- */
+ */
void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
/**
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same
- * as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the
- * @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this
- * is described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
/** Set the input, weights, biases and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
- * an optional 4th dimension for batch of inputs. Data types supported:
- * QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported:
- * Same as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions as
- * the @p input.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions
+ * as the @p input.
* @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
- * this is described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for
+ * @ref CLConvolutionLayer, specifies if the weights tensor has
+ * been reshaped with @ref CLWeightsReshapeKernel.
*
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
unsigned int invalid_right, unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLTransposeConvLayer
+ * CLTransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
- * type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as
- * @p input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the
- * @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is
- * described in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions
+ * as the @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with
+ * @ref CLWeightsReshapeKernel.
*
* @return a status
*/
public:
NEFullyConnectedReshapingLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr)
- : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
- _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
+ : _memory_manager{memory_manager}, _input(nullptr), _weights(nullptr), _biases(nullptr),
+ _output(nullptr), _neon_buffer{}, _neon_fc{nullptr}, _neon_reshape{}, _needs_reshape(false)
{
// DO NOTHING
}
void configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
const ITensor *off_value, ITensor *output, int axis = -1);
/** Static function to check if given info will lead to a valid configuration of @ref
- * NEOneHotKernel
+ * NEOneHotKernel
*
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 3. Must be one of the
- * following types: U32/S32
- * @param[in] depth The tensor info for depth of the one hot dimension. Supported tensor rank:
- * up to 3. Must be one of the following types: U32/S32
- * @param[in] on_value On value tensor info. Supported tensor rank: only 1. Data type supported:
- * U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] off_value Off value tensor info. Supported tensor rank: only 1. Data type supported:
- * Same as @p on_value
- * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
- * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
- * The value must be in range [-indices.rank , indices.rank)
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] depth The tensor info for depth of the one hot dimension.
+ * Supported tensor rank: up to 3.
+ * Must be one of the following types: U32/S32
+ * @param[in] on_value On value tensor info. Supported tensor rank: only 1.
+ * Data type supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] off_value Off value tensor info. Supported tensor rank: only 1.
+ * Data type supported: Same as @p on_value
+ * @param[out] output Destination tensor info. Data type supported: Same as @p on_value
+ * @param[in] axis (Optional) The axis to fill. Negative values wrap around. Defaults to -1.
+ * The value must be in range [-indices.rank , indices.rank)
*
* @return a status
*/
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type
- * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16
- * for F16 input.
- * @param[out] output Output tensor. The output has the same number of dimensions as the @p
- * input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension.
+ * Data type supported: Data types supported: S32 for QASYMM8 and
+ * QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
*
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
const PadStrideInfo &info, unsigned int invalid_right,
unsigned int invalid_bottom);
/** Static function to check if given info will lead to a valid configuration of @ref
- * NETransposeConvLayer
+ * NETransposeConvLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types
- * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
- * @param[in] output Output tensor info. The output has the same number of dimensions as the @p
- * input.
- * @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] innvalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
+ * and an optional 4th dimension for batch of inputs.
+ * Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
+ * Data type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input,
+ * F32 for F32 input, F16 for F16 input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
*
* @return a status
*/
using namespace arm_compute;
const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
- // ARMComputeEx kernels
- {"arg_min_max_ex_x", "arg_min_max_ex.cl"},
- {"arg_min_max_ex_y", "arg_min_max_ex.cl"},
- {"arg_min_max_ex_z", "arg_min_max_ex.cl"},
- {"arg_min_max_ex_w", "arg_min_max_ex.cl"},
- {"binary_logical_op", "binary_logical_op.cl"},
- {"cast_bool", "cast.cl"},
- {"embedding_lookup", "embedding_lookup.cl"},
- {"gather_ex", "gather_ex.cl"},
- {"gather_ex_1d", "gather_ex.cl"},
- {"gather_ex_1d_out", "gather_ex.cl"},
- {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
- {"hashtable_lookup", "hashtable_lookup.cl"},
- {"instance_normalization_ex", "instance_normalization_ex.cl"},
- {"multiply_scale_factor", "multiply_scale_factor.cl"},
- {"neg_tensor", "neg_tensor.cl"},
- {"one_hot", "one_hot.cl"},
- {"one_hot_only_on_value", "one_hot.cl"},
- {"quantization_symm8", "quantization_symm8.cl"},
- {"reduce_min_max", "reduce_operation.cl"},
- {"reduce_sum_mean", "reduce_operation.cl"},
- {"topkv2_init", "topkv2.cl"},
- {"topkv2_find_first_negative", "topkv2.cl"},
- {"topkv2_reorder_negatives", "topkv2.cl"},
- {"topkv2_store", "topkv2.cl"},
- {"radixsort_histogram", "topkv2_radixsort.cl"},
- {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
- {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
- {"radixsort_reorder", "topkv2_radixsort.cl"},
- {"topkv2_quicksort", "topkv2_quicksort.cl"},
- {"scale_factor_symm8", "scale_factor.cl"},
+ // ARMComputeEx kernels
+ {"arg_min_max_ex_x", "arg_min_max_ex.cl"},
+ {"arg_min_max_ex_y", "arg_min_max_ex.cl"},
+ {"arg_min_max_ex_z", "arg_min_max_ex.cl"},
+ {"arg_min_max_ex_w", "arg_min_max_ex.cl"},
+ {"binary_logical_op", "binary_logical_op.cl"},
+ {"cast_bool", "cast.cl"},
+ {"embedding_lookup", "embedding_lookup.cl"},
+ {"gather_ex", "gather_ex.cl"},
+ {"gather_ex_1d", "gather_ex.cl"},
+ {"gather_ex_1d_out", "gather_ex.cl"},
+ {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
+ {"hashtable_lookup", "hashtable_lookup.cl"},
+ {"instance_normalization_ex", "instance_normalization_ex.cl"},
+ {"multiply_scale_factor", "multiply_scale_factor.cl"},
+ {"neg_tensor", "neg_tensor.cl"},
+ {"one_hot", "one_hot.cl"},
+ {"one_hot_only_on_value", "one_hot.cl"},
+ {"quantization_symm8", "quantization_symm8.cl"},
+ {"reduce_min_max", "reduce_operation.cl"},
+ {"reduce_sum_mean", "reduce_operation.cl"},
+ {"topkv2_init", "topkv2.cl"},
+ {"topkv2_find_first_negative", "topkv2.cl"},
+ {"topkv2_reorder_negatives", "topkv2.cl"},
+ {"topkv2_store", "topkv2.cl"},
+ {"radixsort_histogram", "topkv2_radixsort.cl"},
+ {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
+ {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
+ {"radixsort_reorder", "topkv2_radixsort.cl"},
+ {"topkv2_quicksort", "topkv2_quicksort.cl"},
+ {"scale_factor_symm8", "scale_factor.cl"},
};
const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
#ifdef EMBEDDED_KERNELS
- {
- "arg_min_max_ex.cl",
+ {
+ "arg_min_max_ex.cl",
#include "./cl_kernels/arg_min_max_ex.clembed"
- },
- {
- "cast.cl",
+ },
+ {
+ "cast.cl",
#include "./cl_kernels/cast.clembed"
- },
- {
- "embedding_lookup.cl",
+ },
+ {
+ "embedding_lookup.cl",
#include "./cl_kernels/embedding_lookup.clembed"
- },
- {
- "gather_ex.cl",
+ },
+ {
+ "gather_ex.cl",
#include "./cl_kernels/gather_ex.clembed"
- },
- {
- "gemmlowp_ex.cl",
+ },
+ {
+ "gemmlowp_ex.cl",
#include "./cl_kernels/gemmlowp_ex.clembed"
- },
- {
- "hashtable_lookup.cl",
+ },
+ {
+ "hashtable_lookup.cl",
#include "./cl_kernels/hashtable_lookup.clembed"
- },
- {
- "helpers.h",
+ },
+ {
+ "helpers.h",
#include "./cl_kernels/helpers.hembed"
- },
- {
- "helpers_asymm.h",
+ },
+ {
+ "helpers_asymm.h",
#include "./cl_kernels/helpers_asymm.hembed"
- },
- {
- "instance_normalization_ex.cl",
+ },
+ {
+ "instance_normalization_ex.cl",
#include "./cl_kernels/instance_normalization_ex.clembed"
- },
- {
- "binary_logical_op.cl",
+ },
+ {
+ "binary_logical_op.cl",
#include "./cl_kernels/binary_logical_op.clembed"
- },
- {
- "multiply_scale_factor.cl",
+ },
+ {
+ "multiply_scale_factor.cl",
#include "./cl_kernels/multiply_scale_factor.clembed"
- },
- {
- "neg_tensor.cl",
+ },
+ {
+ "neg_tensor.cl",
#include "./cl_kernels/neg_tensor.clembed"
- },
- {
- "one_hot.cl",
+ },
+ {
+ "one_hot.cl",
#include "./cl_kernels/one_hot.clembed"
- },
- {
- "quantization_symm8.cl",
+ },
+ {
+ "quantization_symm8.cl",
#include "./cl_kernels/quantization_symm8.clembed"
- },
- {
- "reduce_operation.cl",
+ },
+ {
+ "reduce_operation.cl",
#include "./cl_kernels/reduce_operation.clembed"
- },
- {
- "scale_factor.cl",
+ },
+ {
+ "scale_factor.cl",
#include "./cl_kernels/scale_factor.clembed"
- },
- {
- "topkv2.cl",
+ },
+ {
+ "topkv2.cl",
#include "./cl_kernels/topkv2.clembed"
- },
- {
- "topkv2_radixsort.cl",
+ },
+ {
+ "topkv2_radixsort.cl",
#include "./cl_kernels/topkv2_radixsort.clembed"
- },
- {
- "topkv2_quicksort.cl",
+ },
+ {
+ "topkv2_quicksort.cl",
#include "./cl_kernels/topkv2_quicksort.clembed"
- },
+ },
#endif /* EMBEDDED_KERNELS */
};
CLKernelLibraryEx::CLKernelLibraryEx()
- : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
+ : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
{
opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the
// CLKernelLibraryEx is built
size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result);
ARM_COMPUTE_ERROR_ON_MSG(
- err != 0,
- "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
+ err != 0,
+ "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
ARM_COMPUTE_UNUSED(err);
return result;
in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel);
res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8));
- idx_sel.s0123 = (in.s0123 < in.s4567) ||
- (in.s0123 == in.s4567 &&
- CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
+ idx_sel.s0123 =
+ (in.s0123 < in.s4567) ||
+ (in.s0123 == in.s4567 && CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123);
res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4));
idx_sel.s01 =
- (in.s01 < in.s23) ||
- (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
+ (in.s01 < in.s23) ||
+ (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
in.s01 = select(in.s23, in.s01, idx_sel.s01);
res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2));
in.s01234567 = select(in.s89abcdef, in.s01234567, idx_sel);
res.s01234567 = select(res.s89abcdef, res.s01234567, CONVERT(idx_sel, int8));
- idx_sel.s0123 = (in.s0123 > in.s4567) ||
- (in.s0123 == in.s4567 &&
- CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
+ idx_sel.s0123 =
+ (in.s0123 > in.s4567) ||
+ (in.s0123 == in.s4567 && CONVERT((res.s0123 < res.s4567), VEC_DATA_TYPE(DATA_TYPE_SELECT, 4)));
in.s0123 = select(in.s4567, in.s0123, idx_sel.s0123);
res.s0123 = select(res.s4567, res.s0123, CONVERT(idx_sel.s0123, int4));
idx_sel.s01 =
- (in.s01 > in.s23) ||
- (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
+ (in.s01 > in.s23) ||
+ (in.s01 == in.s23 && CONVERT((res.s01 < res.s23), VEC_DATA_TYPE(DATA_TYPE_SELECT, 2)));
in.s01 = select(in.s23, in.s01, idx_sel.s01);
res.s01 = select(res.s23, res.s01, CONVERT(idx_sel.s01, int2));
const uint x_idx = get_global_id(0);
const uint y_idx = get_global_id(1);
const __global DATA_TYPE *src_in_row =
- (const __global DATA_TYPE *)(src_ptr + src_offset_first_element_in_bytes +
- y_idx * src_step_y);
+ (const __global DATA_TYPE *)(src_ptr + src_offset_first_element_in_bytes + y_idx * src_step_y);
for (unsigned int y = 0; y < get_local_size(1); ++y)
{
#if defined(ARG_MAX)
#if defined(PREV_OUTPUT)
- local_results[lid] = arg_idx_max_prev_out(
- src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
+ local_results[lid] =
+ arg_idx_max_prev_out(src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
#else // !defined(PREV_OUTPUT)
local_results[lid] = arg_idx_max((__global DATA_TYPE *)offset(&src, 0, y), x_idx);
#endif // defined(PREV_OUTPUT)
#else // defined(ARG_MIN)
#if defined(PREV_OUTPUT)
- local_results[lid] = arg_idx_min_prev_out(
- src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
+ local_results[lid] =
+ arg_idx_min_prev_out(src_in_row, (__global DATA_TYPE_OUTPUT *)offset(&prev_res, 0, y), x_idx);
#else // !defined(PREV_OUTPUT)
local_results[lid] = arg_idx_min((__global DATA_TYPE *)offset(&src, 0, y), x_idx);
#endif // defined(PREV_OUTPUT)
DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]);
#if defined(ARG_MAX)
condition_check3 =
- ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 < tmp1);
+ ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 < tmp1);
local_results[lid] = select(local_results[lid], local_results[lid + i], condition_check3);
#else // defined(ARG_MIN)
local_results[lid] = select(
- local_results[lid], local_results[lid + i],
- ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 > tmp1));
+ local_results[lid], local_results[lid + i],
+ ((tmp0 == tmp1) && (local_results[lid + i] < local_results[lid])) || (tmp0 > tmp1));
#endif // defined(ARG_MAX) || defined(ARG_MIN)
}
barrier(CLK_LOCAL_MEM_FENCE);
{
VEC_DATA_TYPE(DATA_TYPE, 16)
in =
- CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE, 16));
+ CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE, 16));
VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16)
cond_conv = CONVERT(CONDITION_TO_USE(in, res), VEC_DATA_TYPE(DATA_TYPE_OUTPUT, 16));
#if OP_CODE == 1 // LOGICAL AND
VSTORE(VEC_SIZE)
(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) &&
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
+ VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
0, (__global DATA_TYPE *)output.ptr);
#elif OP_CODE == 2 // LOGICAL OR
VSTORE(VEC_SIZE)
(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) ||
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
+ VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
0, (__global DATA_TYPE *)output.ptr);
// lookup ids for based on the tensor dimensions
int lup_id[4] = {0};
- lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0)))
- : get_global_id(0);
- lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1)))
- : get_global_id(1);
+ lup_id[0] =
+ (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0);
+ lup_id[1] =
+ (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1);
lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2)))
: get_global_id(2) % DEPTH_OUT;
lup_id[3] = (NUM_DIMS == 4)
- ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
- : get_global_id(2) / DEPTH_OUT;
+ ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
+ : get_global_id(2) / DEPTH_OUT;
in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x +
lup_id[1] * input_step_y + lup_id[2] * input_step_z + lup_id[3] * input_step_w;
#include "helpers.h"
#if defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && \
- defined(COLS_A)
+ defined(COLS_A)
#define VECTOR_CHAR VEC_DATA_TYPE(char, NUM_ELEMS_PROCESSED_PER_THREAD_X)
#define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X)
#define VECTOR_FLOAT VEC_DATA_TYPE(float, NUM_ELEMS_PROCESSED_PER_THREAD_X)
,
uint dst_cross_plane_pad
#endif // REINTERPRET_OUTPUT_AS_3D
- )
+)
{
int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
// Load values from matrix B
VECTOR_CHAR b0 =
- VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
VECTOR_CHAR b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(
- 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y));
+ 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y));
// Accumulate
acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0.s0;
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
// Load values from matrix B
VECTOR_CHAR b0 =
- VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
// Accumulate
acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0;
int lup_id[4] = {0};
- lup_id[0] = (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0)))
- : get_global_id(0);
- lup_id[1] = (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1)))
- : get_global_id(1);
+ lup_id[0] =
+ (NUM_DIMS == 1) ? *((__global int *)vector_offset(&lups, get_global_id(0))) : get_global_id(0);
+ lup_id[1] =
+ (NUM_DIMS == 2) ? *((__global int *)vector_offset(&lups, get_global_id(1))) : get_global_id(1);
lup_id[2] = (NUM_DIMS == 3) ? *((__global int *)vector_offset(&lups, get_global_id(2)))
: get_global_id(2) % DEPTH_OUT;
lup_id[3] = (NUM_DIMS == 4)
- ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
- : get_global_id(2) / DEPTH_OUT;
+ ? *((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
+ : get_global_id(2) / DEPTH_OUT;
if (lup_id[NUM_DIMS - 1] < 0)
{
#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
- defined(cl_arm_integer_dot_product_accumulate_int8)
+ defined(cl_arm_integer_dot_product_accumulate_int8)
#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
// defined(cl_arm_integer_dot_product_accumulate_int8)
#define VECTOR_DECLARATION(name) \
__global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \
- uint name##_offset_first_element_in_bytes
+ uint name##_offset_first_element_in_bytes
#define IMAGE_DECLARATION(name) \
__global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_offset_first_element_in_bytes
+ uint name##_step_y, uint name##_offset_first_element_in_bytes
#define TENSOR3D_DECLARATION(name) \
__global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_stride_z, uint name##_step_z, \
- uint name##_offset_first_element_in_bytes
+ uint name##_step_y, uint name##_stride_z, uint name##_step_z, \
+ uint name##_offset_first_element_in_bytes
#define TENSOR4D_DECLARATION(name) \
__global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \
- uint name##_step_w, uint name##_offset_first_element_in_bytes
+ uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \
+ uint name##_step_w, uint name##_offset_first_element_in_bytes
#define CONVERT_TO_VECTOR_STRUCT(name) \
update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
uint stride_x, uint step_x)
{
Vector vector = {
- .ptr = ptr,
- .offset_first_element_in_bytes = offset_first_element_in_bytes,
- .stride_x = stride_x,
+ .ptr = ptr,
+ .offset_first_element_in_bytes = offset_first_element_in_bytes,
+ .stride_x = stride_x,
};
vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
return vector;
.stride_x = stride_x,
.stride_y = stride_y};
img.ptr +=
- img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
+ img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
return img;
}
*
* @return quantized values
*/
-#define QUANTIZE_IMPL(type, size) \
- inline VEC_DATA_TYPE(type, size) \
- quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \
- { \
- VEC_DATA_TYPE(float, size) \
- out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \
- VEC_DATA_TYPE(type, size) \
- res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), \
- VEC_DATA_TYPE(type, size)); \
- return res; \
+#define QUANTIZE_IMPL(type, size) \
+ inline VEC_DATA_TYPE(type, size) \
+ quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \
+ { \
+ VEC_DATA_TYPE(float, size) \
+ out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \
+ VEC_DATA_TYPE(type, size) \
+ res = \
+ CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), VEC_DATA_TYPE(type, size)); \
+ return res; \
}
/** Dequantize a vector of values to floating-point
*
* @return dequantized values in floating point
*/
-#define DEQUANTIZE_IMPL(type, size) \
- inline VEC_DATA_TYPE(float, size) \
- dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
- { \
- return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \
+#define DEQUANTIZE_IMPL(type, size) \
+ inline VEC_DATA_TYPE(float, size) \
+ dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
+ { \
+ return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \
}
/** Correctly-rounded-to-nearest division by a power-of-two.
*/
#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \
inline VEC_DATA_TYPE(int, size) asymm_rounding_divide_by_POW2_##size( \
- VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \
+ VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \
{ \
const VEC_DATA_TYPE(int, size) zero = (VEC_DATA_TYPE(int, size))0; \
const VEC_DATA_TYPE(int, size) one = (VEC_DATA_TYPE(int, size))1; \
*
* @return Product of two fixed-point numbers.
*/
-#define ASYMM_MULT_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
- { \
- VEC_DATA_TYPE(int, size) \
- overflow = a == b && a == INT_MIN; \
- VEC_DATA_TYPE(long, size) \
- a_64 = convert_long##size(a); \
- VEC_DATA_TYPE(long, size) \
- b_64 = convert_long##size(b); \
- VEC_DATA_TYPE(long, size) \
- ab_64 = a_64 * b_64; \
- /* Revert COMPMID-907 */ \
- VEC_DATA_TYPE(long, size) \
- mask1 = 1 << 30; \
- VEC_DATA_TYPE(long, size) \
- mask2 = 1 - (1 << 30); \
- VEC_DATA_TYPE(long, size) \
- is_positive_or_zero = ab_64 >= 0; \
- VEC_DATA_TYPE(long, size) \
- nudge = select(mask2, mask1, is_positive_or_zero); \
- VEC_DATA_TYPE(long, size) \
- mask = 1ll << 31; \
- VEC_DATA_TYPE(int, size) \
- ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \
- return select(ab_x2_high32, INT_MAX, overflow); \
+#define ASYMM_MULT_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
+ { \
+ VEC_DATA_TYPE(int, size) \
+ overflow = a == b && a == INT_MIN; \
+ VEC_DATA_TYPE(long, size) \
+ a_64 = convert_long##size(a); \
+ VEC_DATA_TYPE(long, size) \
+ b_64 = convert_long##size(b); \
+ VEC_DATA_TYPE(long, size) \
+ ab_64 = a_64 * b_64; \
+ /* Revert COMPMID-907 */ \
+ VEC_DATA_TYPE(long, size) \
+ mask1 = 1 << 30; \
+ VEC_DATA_TYPE(long, size) \
+ mask2 = 1 - (1 << 30); \
+ VEC_DATA_TYPE(long, size) \
+ is_positive_or_zero = ab_64 >= 0; \
+ VEC_DATA_TYPE(long, size) \
+ nudge = select(mask2, mask1, is_positive_or_zero); \
+ VEC_DATA_TYPE(long, size) \
+ mask = 1ll << 31; \
+ VEC_DATA_TYPE(int, size) \
+ ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \
+ return select(ab_x2_high32, INT_MAX, overflow); \
}
/** Calculates \f$ exp(x) \f$ for x in [-1/4, 0).
*
* @return Result in fixed-point format Q0.
*/
-#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \
- a) \
- { \
- const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \
- const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \
- const int k_fractional_bits = 31; \
- VEC_DATA_TYPE(int, size) \
- x = a + (1 << (k_fractional_bits - 3)); \
- VEC_DATA_TYPE(int, size) \
- x2 = ASYMM_MULT(x, x, size); \
- VEC_DATA_TYPE(int, size) \
- x3 = ASYMM_MULT(x2, x, size); \
- VEC_DATA_TYPE(int, size) \
- x4 = ASYMM_MULT(x2, x2, size); \
- VEC_DATA_TYPE(int, size) \
- x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \
- VEC_DATA_TYPE(int, size) \
- x4_over_24_plus_x3_over_6_plus_x2 = \
- ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \
- VEC_DATA_TYPE(int, size) \
- x4_over_24_plus_x3_over_6_plus_x2_over_2 = \
- ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \
- return constant_term + \
- ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \
+#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \
+ a) \
+ { \
+ const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \
+ const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \
+ const int k_fractional_bits = 31; \
+ VEC_DATA_TYPE(int, size) \
+ x = a + (1 << (k_fractional_bits - 3)); \
+ VEC_DATA_TYPE(int, size) \
+ x2 = ASYMM_MULT(x, x, size); \
+ VEC_DATA_TYPE(int, size) \
+ x3 = ASYMM_MULT(x2, x, size); \
+ VEC_DATA_TYPE(int, size) \
+ x4 = ASYMM_MULT(x2, x2, size); \
+ VEC_DATA_TYPE(int, size) \
+ x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \
+ VEC_DATA_TYPE(int, size) \
+ x4_over_24_plus_x3_over_6_plus_x2 = \
+ ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \
+ VEC_DATA_TYPE(int, size) \
+ x4_over_24_plus_x3_over_6_plus_x2_over_2 = \
+ ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \
+ return constant_term + \
+ ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \
}
/** Each bit of the result is set to the corresponding bit of either then_val or
#define EXP_BARREL_SHIFTER_IMPL(size) \
inline VEC_DATA_TYPE(int, size) exp_barrel_shifter##size( \
- VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \
- int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \
+ VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \
+ int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \
{ \
if (k_integer_bits > exponent) \
{ \
const int k_shift_amount = k_integer_bits > exponent ? k_fractional_bits + exponent : 0; \
return ASYMM_SELECT_USING_MASK( \
- ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \
- ASYMM_MULT(result, fp_multiplier, size), result, size); \
+ ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \
+ ASYMM_MULT(result, fp_multiplier, size), result, size); \
} \
\
return result; \
*/
#define ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(size) \
inline VEC_DATA_TYPE(int, size) \
- asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \
+ asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \
{ \
const int k_fractional_bits = 31 - k_integer_bits; \
VEC_DATA_TYPE(int, size) \
a_mod_quarter_minus_one_quarter_scaled = a_mod_quarter_minus_one_quarter << k_integer_bits; \
VEC_DATA_TYPE(int, size) \
result = ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL( \
- a_mod_quarter_minus_one_quarter_scaled, size); \
+ a_mod_quarter_minus_one_quarter_scaled, size); \
VEC_DATA_TYPE(int, size) \
remainder = a_mod_quarter_minus_one_quarter - a; \
\
remainder, size); \
result = EXP_BARREL_SHIFTER(result, +2, 39332535, k_integer_bits, k_fractional_bits, \
remainder, size); \
- result = EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, \
- size); \
result = \
- EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \
+ EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, size); \
+ result = \
+ EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \
\
if (k_integer_bits > 5) \
{ \
*
* @return Arithmetic left or right shift.
*/
-#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \
- { \
- if (exponent < 0) \
- { \
- return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \
- } \
- \
- const VEC_DATA_TYPE(int, size) min = INT_MIN; \
- const VEC_DATA_TYPE(int, size) max = INT_MAX; \
- int threshold = ((1 << (31 - exponent)) - 1); \
- VEC_DATA_TYPE(int, size) \
- positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \
- VEC_DATA_TYPE(int, size) \
- negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \
- VEC_DATA_TYPE(int, size) \
- result = x << exponent; \
- result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \
- result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \
- return result; \
+#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \
+ { \
+ if (exponent < 0) \
+ { \
+ return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \
+ } \
+ \
+ const VEC_DATA_TYPE(int, size) min = INT_MIN; \
+ const VEC_DATA_TYPE(int, size) max = INT_MAX; \
+ int threshold = ((1 << (31 - exponent)) - 1); \
+ VEC_DATA_TYPE(int, size) \
+ positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \
+ VEC_DATA_TYPE(int, size) \
+ negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \
+ VEC_DATA_TYPE(int, size) \
+ result = x << exponent; \
+ result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \
+ result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \
+ return result; \
}
/** Calculates (a+b)/2, rounded to the nearest integer.
*
* @return (a+b)/2, rounded to the nearest integer.
*/
-#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
- { \
- VEC_DATA_TYPE(long, size) \
- a64 = convert_long##size(a); \
- VEC_DATA_TYPE(long, size) \
- b64 = convert_long##size(b); \
- VEC_DATA_TYPE(long, size) \
- sum = a64 + b64; \
- const VEC_DATA_TYPE(long, size) one = 1; \
- const VEC_DATA_TYPE(long, size) minus_one = -1; \
- VEC_DATA_TYPE(long, size) \
- sign = select(minus_one, one, sum >= 0); \
- return convert_int##size((sum + sign) / 2); \
+#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
+ { \
+ VEC_DATA_TYPE(long, size) \
+ a64 = convert_long##size(a); \
+ VEC_DATA_TYPE(long, size) \
+ b64 = convert_long##size(b); \
+ VEC_DATA_TYPE(long, size) \
+ sum = a64 + b64; \
+ const VEC_DATA_TYPE(long, size) one = 1; \
+ const VEC_DATA_TYPE(long, size) minus_one = -1; \
+ VEC_DATA_TYPE(long, size) \
+ sign = select(minus_one, one, sum >= 0); \
+ return convert_int##size((sum + sign) / 2); \
}
/** Calculates \f$ 1 / (1 + x) \f$ for x in (0, 1).
*/
#define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(size) \
inline VEC_DATA_TYPE(int, size) \
- asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \
+ asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \
{ \
const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \
const VEC_DATA_TYPE(int, size) Q2_one = 1 << (31 - 2); \
#define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \
asymm_rescale##size(value, src_integer_bits, dst_integer_bits)
-#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \
- { \
- const int left_shift = shift > 0 ? shift : 0; \
- const int right_shift = shift > 0 ? 0 : -shift; \
- return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \
- right_shift, size); \
+#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \
+ { \
+ const int left_shift = shift > 0 ? shift : 0; \
+ const int right_shift = shift > 0 ? 0 : -shift; \
+ return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \
+ right_shift, size); \
}
#define MULTIPLY_BY_QUANTIZED_MULTIPLIER(input, qmul, shift, size) \
multiply_by_quantized_multiplier##size(input, qmul, shift)
#include "helpers.h"
#if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \
- defined(DIM_Y) && defined(DIM_Z)
+ defined(DIM_Y) && defined(DIM_Z)
/** This function normalizes the input 2D tensor across the first dimension with respect to mean and
* standard deviation of the same dimension.
*
TENSOR4D_DECLARATION(output)
#endif /* IN_PLACE */
#ifdef GAMMA
- ,
+ ,
VECTOR_DECLARATION(gamma)
#endif // GAMMA
#ifdef BETA
- ,
+ ,
VECTOR_DECLARATION(beta)
#endif // BETA
- )
+)
{
Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
#ifndef IN_PLACE
for (int i_h = 0; i_h < DIM_Z; ++i_h)
{
__global DATA_TYPE *input_address =
- (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&in, ch, i_w, i_h, batch);
#ifdef IN_PLACE
__global DATA_TYPE *output_address = input_address;
#else /* !IN_PLACE */
__global DATA_TYPE *output_address =
- (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&out, ch, i_w, i_h, batch);
#endif /* IN_PLACE */
*(output_address) = (*(input_address)-mean) * multip + beta;
}
for (; x <= (DIM_X - VEC_SIZE); x += VEC_SIZE)
{
__global DATA_TYPE *input_address =
- (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
#ifdef IN_PLACE
__global DATA_TYPE *output_address = input_address;
#else /* !IN_PLACE */
__global DATA_TYPE *output_address =
- (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
#endif /* IN_PLACE */
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
for (; x < DIM_X; ++x)
{
__global DATA_TYPE *input_address =
- (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&in, x, y, ch, batch);
#ifdef IN_PLACE
__global DATA_TYPE *output_address = input_address;
#else /* !IN_PLACE */
__global DATA_TYPE *output_address =
- (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
+ (__global DATA_TYPE *)tensor4D_offset(&out, x, y, ch, batch);
#endif /* IN_PLACE */
*(output_address) = (*(input_address)-mean) * multip + beta;
}
(val, 0, (__global DATA_TYPE *)output.ptr);
#else // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
*((__global DATA_TYPE *)(output.ptr)) =
- ((DATA_TYPE)(*((__global int *)(input.ptr)))) *
- *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier);
+ ((DATA_TYPE)(*((__global int *)(input.ptr)))) *
+ *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier);
#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
}
#if AXIS == 0
*(__global DATA_TYPE *)tensor4D_offset(&output, index, px, py, pz) =
- *((__global const DATA_TYPE *)on_value_ptr);
+ *((__global const DATA_TYPE *)on_value_ptr);
#elif AXIS == 1
*(__global DATA_TYPE *)tensor4D_offset(&output, px, index, py, pz) =
- *((__global const DATA_TYPE *)on_value_ptr);
+ *((__global const DATA_TYPE *)on_value_ptr);
#elif AXIS == 2
*(__global DATA_TYPE *)tensor4D_offset(&output, px, py, index, pz) =
- *((__global const DATA_TYPE *)on_value_ptr);
+ *((__global const DATA_TYPE *)on_value_ptr);
#elif AXIS == 3
*(__global DATA_TYPE *)tensor4D_offset(&output, px, py, pz, index) =
- *((__global const DATA_TYPE *)on_value_ptr);
+ *((__global const DATA_TYPE *)on_value_ptr);
#endif // AXIS
}
// Multiply with a multiplier smaller than 1
out_val =
- ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
+ ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET);
VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16));
// Create scale vector
const VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) vscale =
- *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1));
+ *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1));
// Quantize
VEC_DATA_TYPE(int, VEC_SIZE)
(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr);
#else //! defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
*((__global DATA_TYPE_OUT *)(output.ptr)) = (DATA_TYPE_OUT)CLAMP(
- CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) /
- (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))),
- int),
- MIN_QUANT_VAL, MAX_QUANT_VAL);
+ CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) /
+ (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))),
+ int),
+ MIN_QUANT_VAL, MAX_QUANT_VAL);
#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
}
#endif // defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
int indices[4] = {
- get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
- get_global_id(2) / DEPTH_OUT,
+ get_global_id(0),
+ get_global_id(1),
+ get_global_id(2) % DEPTH_OUT,
+ get_global_id(2) / DEPTH_OUT,
};
DATA_TYPE value =
- *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
+ *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
for (int i = 1; i < dim; ++i)
{
indices[axis] = i;
Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
int indices[4] = {
- get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
- get_global_id(2) / DEPTH_OUT,
+ get_global_id(0),
+ get_global_id(1),
+ get_global_id(2) % DEPTH_OUT,
+ get_global_id(2) / DEPTH_OUT,
};
DATA_TYPE sum_value = (DATA_TYPE)0;
for (int i = 0; i < dim; ++i)
{
indices[axis] = i;
- sum_value += *(
- (__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
+ sum_value +=
+ *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
}
#if OP_CODE == 3 // REDUCE_SUM
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX &&
- op != ReductionOperation::ARG_IDX_MIN,
+ op != ReductionOperation::ARG_IDX_MIN,
"Only ARG_IDX_MAX and ARG_IDX_MIN are supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions,
"Reduction axis greater than max number of dimensions");
output_shape.set(axis, 1);
DataType output_data_type = (prev_output != nullptr) ? (prev_output->data_type()) : DataType::S32;
auto_init_if_empty(*output, input->clone()
- ->set_tensor_shape(output_shape)
- .set_data_type(output_data_type)
- .reset_padding()
- .set_is_resizable(true));
+ ->set_tensor_shape(output_shape)
+ .set_data_type(output_data_type)
+ .reset_padding()
+ .set_is_resizable(true));
- Window win = calculate_max_window((prev_output != nullptr) ? (*prev_output) : (*input),
- Steps(vector_size));
+ Window win =
+ calculate_max_window((prev_output != nullptr) ? (*prev_output) : (*input), Steps(vector_size));
bool window_changed = false;
switch (axis)
}
Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
return std::make_tuple(err, win);
}
} // namespace
CLArgMinMaxLayerKernelEx::CLArgMinMaxLayerKernelEx()
- : _input(nullptr), _prev_output(nullptr), _output(nullptr), _reduction_axis(0),
- _op(ReductionOperation::ARG_IDX_MAX)
+ : _input(nullptr), _prev_output(nullptr), _output(nullptr), _reduction_axis(0),
+ _op(ReductionOperation::ARG_IDX_MAX)
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr,
- output->info(), axis, op));
+ validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr,
+ output->info(), axis, op));
auto win_config = validate_and_configure_window(
- input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis,
- op);
+ input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis,
+ op);
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
_input = input;
ARM_COMPUTE_ERROR("Not supported");
}
_kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(
- "arg_min_max_ex_" + kernel_axis_name, build_opts.options()));
+ "arg_min_max_ex_" + kernel_axis_name, build_opts.options()));
// Configure kernel window
ICLKernel::configure_internal(std::get<1>(win_config), lws_hint);
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, prev_output, output, axis, op));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
- input->clone().get(), (prev_output != nullptr) ? prev_output->clone().get() : nullptr,
- output->clone().get(), axis, op)));
+ input->clone().get(), (prev_output != nullptr) ? prev_output->clone().get() : nullptr,
+ output->clone().get(), axis, op)));
return Status{};
}
const ITensorInfo *output)
{
const TensorShape &out_shape =
- TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
+ TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8,
DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
+ detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
+ "Wrong shape for output");
}
return Status{};
}
} // namespace
CLBinaryLogicalOpKernel::CLBinaryLogicalOpKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
+ : _input1(nullptr), _input2(nullptr), _output(nullptr)
{
}
build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code)));
build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+ ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
_kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
+ ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
const ValidRegion &valid_region = broadcast_pair.second;
AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
+ update_window_and_padding(win_input2, input2_access) ||
+ update_window_and_padding(win, output_access);
output_access.set_valid_region(win, valid_region);
if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
{
can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
+ (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
{
can_collapse = (in_shape1[d] == in_shape2[d]);
bool has_collapsed = false;
Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
+ can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
+ : window;
const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
+ has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
+ has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
Window slice = collapsed.first_slice_window_3D();
Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
BorderSize CLBinaryLogicalOpKernel::border_size() const
{
const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
+ _output->info()->dimension(0) -
+ std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
+ std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
return BorderSize(0, border, 0, 0);
}
// Create kernel
const std::string kernel_name = "cast_bool";
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
// Configure kernel
ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
input_access.set_valid_region(win, output->valid_region());
Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
return std::make_pair(err, win);
}
} // namespace
CLEmbeddingLookupKernel::CLEmbeddingLookupKernel()
- : _input(nullptr), _output(nullptr), _lookups(nullptr)
+ : _input(nullptr), _output(nullptr), _lookups(nullptr)
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions()));
// Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
+ _kernel =
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= input->num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
- input->tensor_shape(), indices->tensor_shape(), actual_axis);
+ input->tensor_shape(), indices->tensor_shape(), actual_axis);
ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
}
const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions()));
std::unique_ptr<ITensorInfo> output_info = input->clone();
output_info->set_tensor_shape(arm_compute::misc::shape_calculator::compute_gather_shape_ex(
- input->tensor_shape(), indices->tensor_shape(), actual_axis));
+ input->tensor_shape(), indices->tensor_shape(), actual_axis));
// Output auto initialization if not yet initialized
auto_init_if_empty((*output), output_info->tensor_shape(), 1, input->data_type());
} // namespace
CLGatherExKernel::CLGatherExKernel()
- : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0)
+ : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0)
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), indices->info(), output->info(), axis));
+ validate_arguments(input->info(), indices->info(), output->info(), axis));
// Configure kernel window
auto win_config =
- validate_and_configure_window(input->info(), indices->info(), output->info(), axis);
+ validate_and_configure_window(input->info(), indices->info(), output->info(), axis);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
_input = input;
// Create kernel
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel("gather_ex", build_opts.options()));
ICLKernel::configure_internal(win_config.second);
}
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
indices->clone().get(),
output->clone().get(), axis)
- .first);
+ .first);
return Status{};
}
input_access.set_valid_region(win, output->valid_region());
Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
return std::make_pair(err, win);
}
} // namespace
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(hits, 1, DataType::U8, DataType::QASYMM8);
const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
{
ARM_COMPUTE_ERROR_THROW_ON(
- validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
+ validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
_lookups = lookups;
_keys = keys;
// Make _lookup_indices tensor
_lookup_indices = support::cpp14::make_unique<CLTensor>();
_lookup_indices->allocator()->init(
- TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
+ TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
_lookup_indices->allocator()->allocate();
// Set kernel build options
build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions()));
// Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
+ _kernel =
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
// Set values of hits
const int32_t *lookups_buf =
- reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer());
+ reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer());
const int32_t *keys_buf = reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_keys)->buffer());
uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
int32_t *lookup_indices_buf = reinterpret_cast<int32_t *>(_lookup_indices->buffer());
} // namespace
CLInstanceNormalizationLayerKernelEx::CLInstanceNormalizationLayerKernelEx()
- : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12),
- _run_in_place(false)
+ : _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr), _epsilon(1e-12),
+ _run_in_place(false)
{
}
// Create kernel
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel("instance_normalization_ex", build_opts.options()));
// Configure kernel window
auto win_config = validate_and_configure_window(_input->info(), _output->info());
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
- input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
+ input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
return Status{};
}
} // namespace
CLMultiplyScaleFactorKernel::CLMultiplyScaleFactorKernel()
- : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), scale_factor->info(), output->info()));
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
_input = input;
_scale_factor = scale_factor;
Window win = calculate_max_window(*output->info());
if (multi_access_x)
{
- win.set(Window::DimX,
- Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
- vec_size_x));
+ win.set(
+ Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
}
ICLKernel::configure_internal(win);
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
build_opts.add_option_if(
- multi_access_x, "-DLAST_ACCESSED_X=" +
- support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+ multi_access_x, "-DLAST_ACCESSED_X=" +
+ support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options()));
}
Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input,
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
ARM_COMPUTE_RETURN_ON_ERROR(
- std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+ std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
return Status{};
}
std::set<std::string> build_opts;
build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+ ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
_kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts));
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts));
// Configure window
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(on_value, output);
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex(
- indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
+ indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
}
return Status{};
const uint32_t actual_axis = wrap_around(axis, static_cast<int>(output->num_dimensions()));
// Output auto initialization if not yet initialized
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_onehot_shape_ex(
- indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
+ indices->tensor_shape(), static_cast<uint32_t>(depth), actual_axis);
auto_init_if_empty((*output), output_shape, 1, on_value->data_type());
// Create window
Window win = calculate_max_window(*output, Steps());
}
} // namespace
CLOneHotKernel::CLOneHotKernel()
- : _indices(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
- _is_off_value_memset(false)
+ : _indices(nullptr), _on_value(nullptr), _off_value(nullptr), _output(nullptr),
+ _is_off_value_memset(false)
{
}
void CLOneHotKernel::configure(const ICLTensor *indices, const ICLTensor *on_value,
ICLTensor *output, int depth, int axis)
{
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(indices->info(), on_value->info(), output->info(), depth, axis));
+ validate_arguments(indices->info(), on_value->info(), output->info(), depth, axis));
// Configure kernel window
auto win_config =
- validate_and_configure_window(indices->info(), on_value->info(), output->info(), depth, axis);
+ validate_and_configure_window(indices->info(), on_value->info(), output->info(), depth, axis);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
if (_is_off_value_memset)
{
// Set build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(
- data_size_from_type(on_value->info()->data_type())));
+ data_size_from_type(on_value->info()->data_type())));
build_opts.add_option("-DAXIS=" + support::cpp11::to_string(actual_axis));
build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(depth));
build_opts.add_option("-DOUTPUT_DIM_Z=" +
// Create kernel
const std::string kernel_name = _is_off_value_memset ? "one_hot_only_on_value" : "one_hot";
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
ICLKernel::configure_internal(win_config.second);
}
Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value,
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(),
on_value->clone().get(),
output->clone().get(), depth, axis)
- .first);
+ .first);
return Status{};
}
Status CLOneHotKernel::validate(const ITensorInfo *indices, const ITensorInfo *on_value,
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(indices->clone().get(),
on_value->clone().get(),
output->clone().get(), depth, axis)
- .first);
+ .first);
return Status{};
}
void CLOneHotKernel::run(const Window &window, cl::CommandQueue &queue)
if (multi_access_x)
{
- win.set(Window::DimX,
- Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
- vec_size_x));
+ win.set(
+ Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
}
Coordinates coord;
} // namespace
CLQuantizationSymmetricKernel::CLQuantizationSymmetricKernel()
- : _input(nullptr), _scale_factor(nullptr), _output(nullptr)
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr)
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, scale_factor, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), scale_factor->info(), output->info()));
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
_input = input;
_scale_factor = scale_factor;
build_opts.add_option("-DDATA_TYPE_OUT=" +
get_cl_type_from_data_type(output->info()->data_type()));
build_opts.add_option_if(
- multi_access_x, "-DLAST_ACCESSED_X=" +
- support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
+ multi_access_x,
+ "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options()));
+ CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options()));
}
Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input,
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
ARM_COMPUTE_RETURN_ON_ERROR(
- validate_and_configure_window(input->clone().get(), output->clone().get()).first);
+ validate_and_configure_window(input->clone().get(), output->clone().get()).first);
return Status{};
}
// Create kernel
_kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
+ static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
// Configure kernel window
Window win = calculate_max_window(*output_info, Steps());
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
return std::make_tuple(err, win);
}
} // namespace
// Create kernel
_kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts));
+ CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts));
auto win_config = validate_and_configure_window(input->info(), output->info());
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
ARM_COMPUTE_RETURN_ON_ERROR(
- std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+ std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
return Status{};
}
using namespace arm_compute;
template <typename InputScalarType, typename OutputScalarType, typename InputVectorType>
void elementwise_op_templ(
- const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
- OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &),
- int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &,
- OutputScalarType *, const bool),
- int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *,
- OutputScalarType *))
+ const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
+ OutputScalarType (*scalar_func)(const InputScalarType &, const InputScalarType &),
+ int (*broadcast_func)(int, int, int, const InputScalarType *, const InputScalarType &,
+ OutputScalarType *, const bool),
+ int (*neon_func)(int, int, int, const InputScalarType *, const InputScalarType *,
+ OutputScalarType *))
{
// Create input windows
Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());
Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);
Iterator output(out, win);
- execute_window_loop(win,
- [&](const Coordinates &) {
- auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
- const auto non_broadcast_input_ptr =
- reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr());
- const InputScalarType broadcast_value =
- *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr());
-
- int x = (*broadcast_func)(window_start_x, window_end_x, window_step_x,
- non_broadcast_input_ptr, broadcast_value,
- output_ptr, !is_broadcast_input_2);
- for (; x < window_end_x; ++x)
- {
- const auto a = *(non_broadcast_input_ptr + x);
- *(output_ptr + x) =
- (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a,
- !is_broadcast_input_2 ? a : broadcast_value);
- }
- },
- broadcast_input, non_broadcast_input, output);
+ execute_window_loop(
+ win,
+ [&](const Coordinates &) {
+ auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
+ const auto non_broadcast_input_ptr =
+ reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr());
+ const InputScalarType broadcast_value =
+ *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr());
+
+ int x =
+ (*broadcast_func)(window_start_x, window_end_x, window_step_x, non_broadcast_input_ptr,
+ broadcast_value, output_ptr, !is_broadcast_input_2);
+ for (; x < window_end_x; ++x)
+ {
+ const auto a = *(non_broadcast_input_ptr + x);
+ *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a,
+ !is_broadcast_input_2 ? a : broadcast_value);
+ }
+ },
+ broadcast_input, non_broadcast_input, output);
}
else
{
Iterator input2(in2, input2_win);
Iterator output(out, win);
- execute_window_loop(win,
- [&](const Coordinates &) {
- auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
- const auto input1_ptr =
- reinterpret_cast<const InputScalarType *>(input1.ptr());
- const auto input2_ptr =
- reinterpret_cast<const InputScalarType *>(input2.ptr());
-
- int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
- input1_ptr, input2_ptr, output_ptr);
- for (; x < window_end_x; ++x)
- {
- const auto a = *(input1_ptr + x);
- const auto b = *(input2_ptr + x);
- *(output_ptr + x) = (*scalar_func)(a, b);
- }
- },
- input1, input2, output);
+ execute_window_loop(
+ win,
+ [&](const Coordinates &) {
+ auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
+ const auto input1_ptr = reinterpret_cast<const InputScalarType *>(input1.ptr());
+ const auto input2_ptr = reinterpret_cast<const InputScalarType *>(input2.ptr());
+
+ int x = (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr,
+ output_ptr);
+ for (; x < window_end_x; ++x)
+ {
+ const auto a = *(input1_ptr + x);
+ const auto b = *(input2_ptr + x);
+ *(output_ptr + x) = (*scalar_func)(a, b);
+ }
+ },
+ input1, input2, output);
}
}
inline uint8x16x4_t elementwise_logic_op(const uint8x16x4_t &a, const uint8x16x4_t &b)
{
uint8x16x4_t out = {{
- elementwise_logic_op<op>(a.val[0], b.val[0]), elementwise_logic_op<op>(a.val[1], b.val[1]),
- elementwise_logic_op<op>(a.val[2], b.val[2]), elementwise_logic_op<op>(a.val[3], b.val[3]),
+ elementwise_logic_op<op>(a.val[0], b.val[0]),
+ elementwise_logic_op<op>(a.val[1], b.val[1]),
+ elementwise_logic_op<op>(a.val[2], b.val[2]),
+ elementwise_logic_op<op>(a.val[3], b.val[3]),
}};
return out;
}
}
std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> configure_func(
- const ITensor *input1, const ITensor *input2, ITensor *output,
- std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function)
+ const ITensor *input1, const ITensor *input2, ITensor *output,
+ std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function)
{
std::string function_to_call("op_");
function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
configure_logic_func(const ITensor *input1, const ITensor *input2, ITensor *output)
{
static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = {
- {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
- {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
+ {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
+ {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
return configure_func(input1, input2, output, map_function);
}
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
const TensorShape out_shape =
- TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
+ TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
"Inputs are not broadcast compatible");
if (output.total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
- "Wrong shape for output");
+ detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
+ "Wrong shape for output");
}
return Status{};
case DataType::S8:
{
/* Conversion U8 -> S8 */
- execute_window_loop(win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- vst1q_s8(output_ptr + x, vreinterpretq_s8_u8(vandq_u8(
- texels_u8, vdupq_n_u8(true_val))));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ execute_window_loop(
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ vst1q_s8(output_ptr + x,
+ vreinterpretq_s8_u8(vandq_u8(texels_u8, vdupq_n_u8(true_val))));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
case DataType::S16:
{
/* Up-conversion U8 -> S16 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const int16x8x2_t texels = {
- {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
- vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
-
- vst1q_s16(output_ptr + x, texels.val[0]);
- vst1q_s16(output_ptr + x + 8, texels.val[1]);
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<int32_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const int16x8x2_t texels = {
+ {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+ vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+
+ vst1q_s16(output_ptr + x, texels.val[0]);
+ vst1q_s16(output_ptr + x + 8, texels.val[1]);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<int32_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
case DataType::S32:
{
/* Up-conversion U8 -> S32 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int32_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const int16x8x2_t texels = {
- {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
- vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
-
- vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0])));
- vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0])));
- vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1])));
- vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1])));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<uint32_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int32_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const int16x8x2_t texels = {
+ {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+ vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+
+ vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0])));
+ vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0])));
+ vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1])));
+ vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1])));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<uint32_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
case DataType::F32:
{
/* Up-conversion U8 -> F32 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<float *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const int16x8x2_t texels = {
- {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
- vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
- vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
- vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
- vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
- vst1q_f32(output_ptr + x + 12,
- vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- auto in = static_cast<uint32_t>(*(input_ptr + x) & true_val);
- *(output_ptr + x) = static_cast<float>(in);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<float *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const int16x8x2_t texels = {
+ {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+ vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+ vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
+ vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
+ vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
+ vst1q_f32(output_ptr + x + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ auto in = static_cast<uint32_t>(*(input_ptr + x) & true_val);
+ *(output_ptr + x) = static_cast<float>(in);
+ }
+ },
+ input, output);
break;
}
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
{
/* Up-conversion U8 -> F16 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const int16x8x2_t texels = {
- {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
- vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
- vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0]));
- vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1]));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const int16x8x2_t texels = {
+ {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
+ vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
+ vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0]));
+ vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1]));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::U8:
{
/* Conversion U8 -> S8 */
- execute_window_loop(win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val)));
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ execute_window_loop(
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val)));
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
case DataType::U16:
{
/* Up-conversion U8 -> U16 */
execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
-
- const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)),
- vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}};
-
- vst1q_u16(output_ptr + x, texels.val[0]);
- vst1q_u16(output_ptr + x + 8, texels.val[1]);
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = static_cast<uint16_t>(*(input_ptr + x) & true_val);
- }
- },
- input, output);
+ win,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
+
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
+
+ const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)),
+ vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}};
+
+ vst1q_u16(output_ptr + x, texels.val[0]);
+ vst1q_u16(output_ptr + x + 8, texels.val[1]);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ *(output_ptr + x) = static_cast<uint16_t>(*(input_ptr + x) & true_val);
+ }
+ },
+ input, output);
break;
}
default:
using namespace arm_compute;
NEEmbeddingLookupKernel::NEEmbeddingLookupKernel()
- : _input(nullptr), _lookups(nullptr), _output(nullptr)
+ : _input(nullptr), _lookups(nullptr), _output(nullptr)
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4);
{
Iterator output_it(_output, out_slice);
- execute_window_loop(out_slice,
- [&](const Coordinates &id) {
- const int32_t lookup = *reinterpret_cast<int32_t *>(
- _lookups->ptr_to_element(Coordinates{id[lookup_dim]}));
- Coordinates input_id{id};
- input_id.set(lookup_dim, lookup);
- memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
- _output->info()->dimension(0) * _output->info()->element_size());
- },
- output_it);
+ execute_window_loop(
+ out_slice,
+ [&](const Coordinates &id) {
+ const int32_t lookup =
+ *reinterpret_cast<int32_t *>(_lookups->ptr_to_element(Coordinates{id[lookup_dim]}));
+ Coordinates input_id{id};
+ input_id.set(lookup_dim, lookup);
+ memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
+ _output->info()->dimension(0) * _output->info()->element_size());
+ },
+ output_it);
} while (window.slide_window_slice_4D(out_slice));
}
} // namespace
NEGatherKernelEx::NEGatherKernelEx()
- : _input{}, _indices{}, _axis{}, _indices_rank{}, _output{}, _func{}
+ : _input{}, _indices{}, _axis{}, _indices_rank{}, _output{}, _func{}
{
}
Iterator output_it(_output, window);
execute_window_loop(
- window,
- [&](const Coordinates &id) {
- Coordinates gather_id(id);
- gather_id.collapse(_indices_rank);
-
- U new_index;
- switch (_indices_rank)
- {
- case 1:
- new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0]))));
- break;
- case 2:
- new_index =
- *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1]))));
- break;
- case 3:
- new_index = *(
- reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2]))));
- break;
- default:
- ARM_COMPUTE_ERROR("Wrong num of dimensions");
- break;
- }
-
- gather_id.set(0, new_index);
-
- std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(),
- output_it.ptr());
- },
- output_it);
+ window,
+ [&](const Coordinates &id) {
+ Coordinates gather_id(id);
+ gather_id.collapse(_indices_rank);
+
+ U new_index;
+ switch (_indices_rank)
+ {
+ case 1:
+ new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0]))));
+ break;
+ case 2:
+ new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1]))));
+ break;
+ case 3:
+ new_index =
+ *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[0], id[1], id[2]))));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Wrong num of dimensions");
+ break;
+ }
+
+ gather_id.set(0, new_index);
+
+ std::copy_n(_input->ptr_to_element(gather_id), _output->info()->element_size(),
+ output_it.ptr());
+ },
+ output_it);
}
template <typename U>
Iterator output_it(_output, output_window);
execute_window_loop(
- output_window,
- [&](const Coordinates &id) {
- Coordinates gather_id(id);
- gather_id.collapse(_indices_rank, _axis);
-
- U new_index;
- switch (_indices_rank)
- {
- case 1:
- new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis]))));
- break;
- case 2:
- new_index = *(reinterpret_cast<U *>(
- _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1]))));
- break;
- case 3:
- new_index = *(reinterpret_cast<U *>(
- _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2]))));
- break;
- default:
- ARM_COMPUTE_ERROR("Wrong num of dimensions");
- break;
- }
-
- gather_id.set(_axis, new_index);
-
- std::copy_n(_input->ptr_to_element(gather_id),
- _input->info()->dimension(0) * _output->info()->element_size(),
- output_it.ptr());
- },
- output_it);
+ output_window,
+ [&](const Coordinates &id) {
+ Coordinates gather_id(id);
+ gather_id.collapse(_indices_rank, _axis);
+
+ U new_index;
+ switch (_indices_rank)
+ {
+ case 1:
+ new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis]))));
+ break;
+ case 2:
+ new_index = *(
+ reinterpret_cast<U *>(_indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1]))));
+ break;
+ case 3:
+ new_index = *(reinterpret_cast<U *>(
+ _indices->ptr_to_element(Coordinates(id[_axis], id[_axis + 1], id[_axis + 2]))));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Wrong num of dimensions");
+ break;
+ }
+
+ gather_id.set(_axis, new_index);
+
+ std::copy_n(_input->ptr_to_element(gather_id),
+ _input->info()->dimension(0) * _output->info()->element_size(), output_it.ptr());
+ },
+ output_it);
}
void NEGatherKernelEx::configure(const ITensor *input, const ITensor *indices, ITensor *output,
ARM_COMPUTE_ERROR_ON(indices->info()->num_dimensions() > 3);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
_input = input;
_indices = indices;
}
// Output auto initialization if not yet initialized
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
- input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis);
+ input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis);
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
// Create window
ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast<int32_t>(input->num_dimensions()));
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape_ex(
- input->tensor_shape(), indices->tensor_shape(), axis);
+ input->tensor_shape(), indices->tensor_shape(), axis);
ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
}
} // namespace
NEHashtableLookupKernel::NEHashtableLookupKernel()
- : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr}
+ : _lookups(nullptr), _keys(nullptr), _input(nullptr), _output(nullptr), _hits{nullptr}
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
ARM_COMPUTE_ERROR_THROW_ON(
- validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
+ validate(lookups->info(), keys->info(), input->info(), output->info(), hits->info()));
_lookups = lookups;
_keys = keys;
{
ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
+ DataType::U32, DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32);
const size_t lookup_dim = _output->info()->num_dimensions() - 1;
const int const_0 = _output->info()->data_type() == DataType::QASYMM8
- ? _output->info()->quantization_info().uniform().offset
- : 0;
+ ? _output->info()->quantization_info().uniform().offset
+ : 0;
std::unordered_map<int32_t, size_t> key_index_map;
for (size_t n = 0; n < _keys->info()->dimension(0); ++n)
{
Iterator output_it(_output, out_slice);
- execute_window_loop(out_slice,
- [&](const Coordinates &id) {
- const auto lookup = lookup_indices.at(id[lookup_dim]);
- if (lookup == NOT_HIT)
- {
- memset(output_it.ptr(), const_0,
- _output->info()->dimension(0) * _output->info()->element_size());
- }
- else
- {
- Coordinates input_id{id};
- input_id.set(lookup_dim, lookup);
- memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
- _output->info()->dimension(0) * _output->info()->element_size());
- }
-
- },
- output_it);
+ execute_window_loop(
+ out_slice,
+ [&](const Coordinates &id) {
+ const auto lookup = lookup_indices.at(id[lookup_dim]);
+ if (lookup == NOT_HIT)
+ {
+ memset(output_it.ptr(), const_0,
+ _output->info()->dimension(0) * _output->info()->element_size());
+ }
+ else
+ {
+ Coordinates input_id{id};
+ input_id.set(lookup_dim, lookup);
+ memcpy(output_it.ptr(), _input->ptr_to_element(input_id),
+ _output->info()->dimension(0) * _output->info()->element_size());
+ }
+ },
+ output_it);
} while (window.slide_window_slice_4D(out_slice));
}
{
/** NEON vector tag type. */
using ExactTagType =
- typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
+ typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
// Clear X/Y dimensions on execution window as we handle the planes manually
Window win = window;
constexpr int window_step_x = 16 / sizeof(T);
const unsigned int elements_plane = input->info()->dimension(0) * output->info()->dimension(1);
const auto channel_idx =
- get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
Iterator input_it(input, win);
execute_window_loop(
- win,
- [&](const Coordinates &id) {
- Window win_plane = window;
- win_plane.set(Window::DimX, Window::Dimension(0, 1, 1));
- win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1));
- win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1));
-
- Iterator input_plane_it(input, win_plane);
- Iterator output_plane_it(output, win_plane);
-
- auto sum_h_w = static_cast<T>(0.f);
- auto sum_squares_h_w = static_cast<T>(0.f);
-
- execute_window_loop(
- win_plane,
- [&](const Coordinates &) {
- const auto input_ptr = reinterpret_cast<const T *>(input_plane_it.ptr());
-
- auto vec_sum_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
- auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
-
- // Compute S elements per iteration
- int x = window.x().start();
- for (; x <= (window.x().end() - window_step_x); x += window_step_x)
- {
- auto vec_input_val = wrapper::vloadq(input_ptr + x);
- vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val);
- vec_sum_squares_h_w =
- wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val));
- }
-
- auto vec2_sum_h_w =
- wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w));
- auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w),
- wrapper::vgetlow(vec_sum_squares_h_w));
- for (int i = 0; i < window_step_x / 4; ++i)
- {
- vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w);
- vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w);
- }
- sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0);
- sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0);
-
- // Compute left-over elements
- for (; x < window.x().end(); ++x)
- {
- const auto value = *(input_ptr + x);
- sum_h_w += value;
- sum_squares_h_w += value * value;
- }
- },
- input_plane_it, output_plane_it);
-
- const auto mean_h_w = sum_h_w / elements_plane;
- const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w;
-
- auto gamma_val = 1.0f;
- if (gamma != nullptr)
- {
- gamma_val = *reinterpret_cast<T *>(gamma->ptr_to_element({id[channel_idx]}));
- }
- const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon);
- const auto vec_mean_h_w = wrapper::vdup_n(static_cast<T>(mean_h_w), ExactTagType{});
- const auto vec_multip_h_w = wrapper::vdup_n(static_cast<T>(multip_h_w), ExactTagType{});
- auto beta_val = 0.0f;
- if (beta != nullptr)
- {
- beta_val = *reinterpret_cast<T *>(beta->ptr_to_element({id[channel_idx]}));
- }
- const auto vec_beta = wrapper::vdup_n(static_cast<T>(beta_val), ExactTagType{});
-
- execute_window_loop(
- win_plane,
- [&](const Coordinates &) {
- auto input_ptr = reinterpret_cast<T *>(input_plane_it.ptr());
- auto output_ptr = reinterpret_cast<T *>(output_plane_it.ptr());
-
- // Compute S elements per iteration
- int x = window.x().start();
- auto vec_val = wrapper::vdup_n(static_cast<T>(0.0f), ExactTagType{});
- for (; x <= (window.x().end() - window_step_x); x += window_step_x)
- {
- vec_val = wrapper::vloadq(input_ptr + x);
- vec_val = wrapper::vadd(
- wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta);
- wrapper::vstore(output_ptr + x, vec_val);
- }
-
- // Compute left-over elements
- for (; x < window.x().end(); ++x)
- {
- *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val;
- }
- },
- input_plane_it, output_plane_it);
- },
- input_it);
+ win,
+ [&](const Coordinates &id) {
+ Window win_plane = window;
+ win_plane.set(Window::DimX, Window::Dimension(0, 1, 1));
+ win_plane.set(Window::DimZ, Window::Dimension(id[2], id[2] + 1, 1));
+ win_plane.set(3, Window::Dimension(id[3], id[3] + 1, 1));
+
+ Iterator input_plane_it(input, win_plane);
+ Iterator output_plane_it(output, win_plane);
+
+ auto sum_h_w = static_cast<T>(0.f);
+ auto sum_squares_h_w = static_cast<T>(0.f);
+
+ execute_window_loop(
+ win_plane,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const T *>(input_plane_it.ptr());
+
+ auto vec_sum_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
+ auto vec_sum_squares_h_w = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
+
+ // Compute S elements per iteration
+ int x = window.x().start();
+ for (; x <= (window.x().end() - window_step_x); x += window_step_x)
+ {
+ auto vec_input_val = wrapper::vloadq(input_ptr + x);
+ vec_sum_h_w = wrapper::vadd(vec_sum_h_w, vec_input_val);
+ vec_sum_squares_h_w =
+ wrapper::vadd(vec_sum_squares_h_w, wrapper::vmul(vec_input_val, vec_input_val));
+ }
+
+ auto vec2_sum_h_w =
+ wrapper::vpadd(wrapper::vgethigh(vec_sum_h_w), wrapper::vgetlow(vec_sum_h_w));
+ auto vec2_sum_squares_h_w = wrapper::vpadd(wrapper::vgethigh(vec_sum_squares_h_w),
+ wrapper::vgetlow(vec_sum_squares_h_w));
+ for (int i = 0; i < window_step_x / 4; ++i)
+ {
+ vec2_sum_h_w = wrapper::vpadd(vec2_sum_h_w, vec2_sum_h_w);
+ vec2_sum_squares_h_w = wrapper::vpadd(vec2_sum_squares_h_w, vec2_sum_squares_h_w);
+ }
+ sum_h_w += wrapper::vgetlane(vec2_sum_h_w, 0);
+ sum_squares_h_w += wrapper::vgetlane(vec2_sum_squares_h_w, 0);
+
+ // Compute left-over elements
+ for (; x < window.x().end(); ++x)
+ {
+ const auto value = *(input_ptr + x);
+ sum_h_w += value;
+ sum_squares_h_w += value * value;
+ }
+ },
+ input_plane_it, output_plane_it);
+
+ const auto mean_h_w = sum_h_w / elements_plane;
+ const auto var_h_w = sum_squares_h_w / elements_plane - mean_h_w * mean_h_w;
+
+ auto gamma_val = 1.0f;
+ if (gamma != nullptr)
+ {
+ gamma_val = *reinterpret_cast<T *>(gamma->ptr_to_element({id[channel_idx]}));
+ }
+ const auto multip_h_w = gamma_val / std::sqrt(var_h_w + epsilon);
+ const auto vec_mean_h_w = wrapper::vdup_n(static_cast<T>(mean_h_w), ExactTagType{});
+ const auto vec_multip_h_w = wrapper::vdup_n(static_cast<T>(multip_h_w), ExactTagType{});
+ auto beta_val = 0.0f;
+ if (beta != nullptr)
+ {
+ beta_val = *reinterpret_cast<T *>(beta->ptr_to_element({id[channel_idx]}));
+ }
+ const auto vec_beta = wrapper::vdup_n(static_cast<T>(beta_val), ExactTagType{});
+
+ execute_window_loop(
+ win_plane,
+ [&](const Coordinates &) {
+ auto input_ptr = reinterpret_cast<T *>(input_plane_it.ptr());
+ auto output_ptr = reinterpret_cast<T *>(output_plane_it.ptr());
+
+ // Compute S elements per iteration
+ int x = window.x().start();
+ auto vec_val = wrapper::vdup_n(static_cast<T>(0.0f), ExactTagType{});
+ for (; x <= (window.x().end() - window_step_x); x += window_step_x)
+ {
+ vec_val = wrapper::vloadq(input_ptr + x);
+ vec_val = wrapper::vadd(
+ wrapper::vmul(wrapper::vsub(vec_val, vec_mean_h_w), vec_multip_h_w), vec_beta);
+ wrapper::vstore(output_ptr + x, vec_val);
+ }
+
+ // Compute left-over elements
+ for (; x < window.x().end(); ++x)
+ {
+ *(output_ptr + x) = ((*(input_ptr + x)) - mean_h_w) * multip_h_w + beta_val;
+ }
+ },
+ input_plane_it, output_plane_it);
+ },
+ input_it);
}
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index(
- input->data_layout(), DataLayoutDimension::CHANNEL)) !=
- gamma->dimension(0),
+ input->data_layout(), DataLayoutDimension::CHANNEL)) !=
+ gamma->dimension(0),
"Gamma's size must be the same as size of input's channel");
}
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(get_data_layout_dimension_index(
- input->data_layout(), DataLayoutDimension::CHANNEL)) !=
- beta->dimension(0),
+ input->data_layout(), DataLayoutDimension::CHANNEL)) !=
+ beta->dimension(0),
"Beta's size must be the same as size of input's channel");
}
} // namespace
NEInstanceNormalizationLayerKernelEx::NEInstanceNormalizationLayerKernelEx()
- : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr),
- _epsilon(1e-12)
+ : _func(nullptr), _input(nullptr), _output(nullptr), _gamma(nullptr), _beta(nullptr),
+ _epsilon(1e-12)
{
}
_epsilon = epsilon;
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon));
+ validate_arguments(_input->info(), _output->info(), gamma->info(), beta->info(), epsilon));
if (_input->info()->data_type() == DataType::F32)
{
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, gamma, beta, epsilon));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(
- input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
+ input->clone().get(), (output == nullptr ? input->clone().get() : output->clone().get()))));
return Status{};
}
const float32x4_t vscale = vdupq_n_f32(scale);
const float32x4x4_t ret = {{
- vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale),
- vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale), vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale),
+ vmulq_f32(vcvtq_f32_s32(iv.val[0]), vscale),
+ vmulq_f32(vcvtq_f32_s32(iv.val[1]), vscale),
+ vmulq_f32(vcvtq_f32_s32(iv.val[2]), vscale),
+ vmulq_f32(vcvtq_f32_s32(iv.val[3]), vscale),
}};
return ret;
}
} // namespace
NEMultiplyScaleFactorKernel::NEMultiplyScaleFactorKernel()
- : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), scale_factor->info(), output->info()));
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
_input = input;
_scale_factor = scale_factor;
Iterator output(_output, win_collapsed);
win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
execute_window_loop(
- win_collapsed,
- [&](const Coordinates &id) {
- auto scale = *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()}));
- scale *= _multiplier;
-
- const auto input_ptr = reinterpret_cast<const int32_t *>(input.ptr());
- auto output_ptr = reinterpret_cast<T *>(output.ptr());
- int x = window_start_x;
- for (; x <= (window_end_x - window_step); x += window_step)
- {
- store_result<float>(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale));
- }
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- output_ptr[x] = input_ptr[x] * scale;
- }
- },
- input, output);
+ win_collapsed,
+ [&](const Coordinates &id) {
+ auto scale = *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()}));
+ scale *= _multiplier;
+
+ const auto input_ptr = reinterpret_cast<const int32_t *>(input.ptr());
+ auto output_ptr = reinterpret_cast<T *>(output.ptr());
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step); x += window_step)
+ {
+ store_result<float>(&output_ptr[x], multiply_scale_vec(load_value(&input_ptr[x]), scale));
+ }
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ output_ptr[x] = input_ptr[x] * scale;
+ }
+ },
+ input, output);
}
void NEMultiplyScaleFactorKernel::run(const Window &window, const ThreadInfo &info)
} // namespace
NEOneHotKernel::NEOneHotKernel()
- : _indices{nullptr}, _depth{nullptr}, _on_value{nullptr}, _off_value{nullptr}, _axis{-1},
- _output{nullptr}, _func{}
+ : _indices{nullptr}, _depth{nullptr}, _on_value{nullptr},
+ _off_value{nullptr}, _axis{-1}, _output{nullptr}, _func{}
{
}
Iterator output_it(_output, output_window);
const U off_value = *reinterpret_cast<U *>(_off_value->buffer());
execute_window_loop(
- output_window,
- [&](const Coordinates &id) {
- std::fill_n(output_it.ptr(),
- _output->info()->dimension(0) * _output->info()->element_size(), off_value);
- Coordinates indices_id(id);
- indices_id.remove(0);
- const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
- if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
- {
- Coordinates onehot_id(id);
- onehot_id.set(0, new_index);
- std::copy_n(_on_value->buffer(), _output->info()->element_size(),
- _output->ptr_to_element(onehot_id));
- }
- },
- output_it);
+ output_window,
+ [&](const Coordinates &id) {
+ std::fill_n(output_it.ptr(), _output->info()->dimension(0) * _output->info()->element_size(),
+ off_value);
+ Coordinates indices_id(id);
+ indices_id.remove(0);
+ const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
+ if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
+ {
+ Coordinates onehot_id(id);
+ onehot_id.set(0, new_index);
+ std::copy_n(_on_value->buffer(), _output->info()->element_size(),
+ _output->ptr_to_element(onehot_id));
+ }
+ },
+ output_it);
}
template <typename U>
// Validate that the indices are not negative
validate_depth<U>(_depth, _output, _axis);
Iterator output_it(_output, window);
- execute_window_loop(window,
- [&](const Coordinates &id) {
- Coordinates indices_id(id);
- indices_id.remove(_axis);
- const U new_index =
- *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
- if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
- {
- Coordinates onehot_id(id);
- onehot_id.set(_axis, new_index);
- std::copy_n(static_cast<U>(id[_axis]) == new_index ? _on_value->buffer()
- : _off_value->buffer(),
- _output->info()->element_size(), output_it.ptr());
- }
- },
- output_it);
+ execute_window_loop(
+ window,
+ [&](const Coordinates &id) {
+ Coordinates indices_id(id);
+ indices_id.remove(_axis);
+ const U new_index = *(reinterpret_cast<U *>(_indices->ptr_to_element(indices_id)));
+ if (isOnValue(new_index, *(reinterpret_cast<U *>(_depth->buffer()))))
+ {
+ Coordinates onehot_id(id);
+ onehot_id.set(_axis, new_index);
+ std::copy_n(static_cast<U>(id[_axis]) == new_index ? _on_value->buffer()
+ : _off_value->buffer(),
+ _output->info()->element_size(), output_it.ptr());
+ }
+ },
+ output_it);
}
void NEOneHotKernel::configure(const ITensor *indices, const ITensor *depth,
const ITensorInfo *output, int axis)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- validate_arguments(indices, depth, on_value, off_value, output, axis));
+ validate_arguments(indices, depth, on_value, off_value, output, axis));
return Status{};
}
const int32x4x4_t rf = {{
#ifdef __aarch64__
- vminq_s32(vposend,
- vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
- vminq_s32(vposend,
- vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
- vminq_s32(vposend,
- vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
- vminq_s32(vposend,
- vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtnq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
#else //__aarch64__
- vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
- vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
- vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
- vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[0], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[1], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[2], vinvscale))))),
+ vminq_s32(vposend, vmaxq_s32(vnagend, vcvtq_s32_f32(round(vmulq_f32(fv.val[3], vinvscale))))),
#endif //__aarch64__
}};
const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));
} // namespace
NEQuantizationSymmetricKernel::NEQuantizationSymmetricKernel()
- : _input(nullptr), _output(nullptr), _scale_factor(nullptr)
+ : _input(nullptr), _output(nullptr), _scale_factor(nullptr)
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), output->info(), scale_factor->info()));
+ validate_arguments(input->info(), output->info(), scale_factor->info()));
_input = input;
_output = output;
const auto dim_x = _input->info()->dimension(0);
win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
execute_window_loop(
- win_collapsed,
- [&](const Coordinates &id) {
- const auto start = reinterpret_cast<const T *>(input.ptr());
- const auto min_max = std::minmax_element(start, start + dim_x);
- const auto int8_scale = 127;
- auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second));
- if (range == 0)
- {
- *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = 1;
- range = 1;
- }
- else
- {
- *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale;
- }
- const auto scale_factor_inv = int8_scale / range;
-
- auto input_ptr = reinterpret_cast<const T *>(input.ptr());
- auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
- int x = window_start_x;
- for (; x <= (window_end_x - window_step); x += window_step)
- {
- wrapper::vstore(&output_ptr[x],
- vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale));
- }
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy);
- quantized = std::min(int8_scale, std::max(quantized, -int8_scale));
- output_ptr[x] = static_cast<int8_t>(quantized);
- }
- },
- input, output);
+ win_collapsed,
+ [&](const Coordinates &id) {
+ const auto start = reinterpret_cast<const T *>(input.ptr());
+ const auto min_max = std::minmax_element(start, start + dim_x);
+ const auto int8_scale = 127;
+ auto range = std::max(std::abs(*min_max.first), std::abs(*min_max.second));
+ if (range == 0)
+ {
+ *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = 1;
+ range = 1;
+ }
+ else
+ {
+ *reinterpret_cast<T *>(_scale_factor->ptr_to_element({id.y()})) = range / int8_scale;
+ }
+ const auto scale_factor_inv = int8_scale / range;
+
+ auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step); x += window_step)
+ {
+ wrapper::vstore(&output_ptr[x],
+ vquantizeSymm(load_value(&input_ptr[x]), scale_factor_inv, int8_scale));
+ }
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ int quantized = arm_compute::round(input_ptr[x] * scale_factor_inv, rounding_policy);
+ quantized = std::min(int8_scale, std::max(quantized, -int8_scale));
+ output_ptr[x] = static_cast<int8_t>(quantized);
+ }
+ },
+ input, output);
}
void NEQuantizationSymmetricKernel::run(const Window &window, const ThreadInfo &info)
namespace arm_compute
{
CLArgMinMaxLayerEx::CLArgMinMaxLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(),
- _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis()
+ : _memory_group(std::move(memory_manager)), _results_vector(), _not_reshaped_output(),
+ _reduction_kernels_vector(), _reshape_kernel(), _num_of_stages(), _reduction_axis()
{
}
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX &&
- op != ReductionOperation::ARG_IDX_MIN,
+ op != ReductionOperation::ARG_IDX_MIN,
"Invalid reduction operation");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast<int>(TensorShape::num_max_dimensions),
"Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
const unsigned int num_of_stages =
- calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
+ calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
DataType output_data_type = DataType::S32;
TensorInfo not_reshaped_output;
if (output->total_size() != 0)
{
output_data_type = output->data_type();
- const TensorInfo expected_output_shape = output->clone()->set_tensor_shape(
- arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis,
- false));
+ const TensorInfo expected_output_shape =
+ output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape(
+ input->tensor_shape(), axis, false));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output);
}
auto initialize_tensorinfo = [](TensorInfo &ti, TensorShape shape, DataType data_type,
int num_channels, QuantizationInfo qinfo) {
ti.set_data_type(data_type)
- .set_tensor_shape(shape)
- .set_num_channels(num_channels)
- .set_quantization_info(qinfo);
+ .set_tensor_shape(shape)
+ .set_num_channels(num_channels)
+ .set_quantization_info(qinfo);
};
initialize_tensorinfo(not_reshaped_output, shape_before_reshape, output_data_type,
if (num_of_stages == 1)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLArgMinMaxLayerKernelEx::validate(input, nullptr, ¬_reshaped_output, axis, op));
+ CLArgMinMaxLayerKernelEx::validate(input, nullptr, ¬_reshaped_output, axis, op));
}
else
{
// Validate ReductionOperation only on first kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op));
+ CLArgMinMaxLayerKernelEx::validate(input, nullptr, &sums_vector[0], axis, op));
// Validate ReductionOperation on intermediate stages
for (unsigned int i = 1; i < num_of_stages - 1; ++i)
{
- ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1],
- &sums_vector[i], axis, op));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLArgMinMaxLayerKernelEx::validate(input, &sums_vector[i - 1], &sums_vector[i], axis, op));
}
// Validate ReductionOperation on the last stage
const unsigned int last_stage = num_of_stages - 1;
ARM_COMPUTE_RETURN_ON_ERROR(CLArgMinMaxLayerKernelEx::validate(
- input, &sums_vector[last_stage - 1], ¬_reshaped_output, axis, op));
+ input, &sums_vector[last_stage - 1], ¬_reshaped_output, axis, op));
}
ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(¬_reshaped_output, output));
return Status{};
_reduction_axis = axis;
const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(
- input->info()->tensor_shape(), axis, false);
+ input->info()->tensor_shape(), axis, false);
DataType output_data_type = (output->info()->data_type() == DataType::UNKNOWN)
- ? DataType::S32
- : output->info()->data_type();
+ ? DataType::S32
+ : output->info()->data_type();
auto_init_if_empty(*output->info(), input->info()
- ->clone()
- ->set_tensor_shape(output_shape)
- .set_data_type(output_data_type)
- .reset_padding()
- .set_is_resizable(true));
+ ->clone()
+ ->set_tensor_shape(output_shape)
+ .set_data_type(output_data_type)
+ .reset_padding()
+ .set_is_resizable(true));
// Configure reduction operation kernels
_reduction_kernels_vector.resize(_num_of_stages);
TensorShape output_shape{input->info()->tensor_shape()};
output_shape.set(axis, 1);
auto_init_if_empty(*_not_reshaped_output.info(), input->info()
- ->clone()
- ->set_tensor_shape(output_shape)
- .set_data_type(output_data_type)
- .reset_padding()
- .set_is_resizable(true));
+ ->clone()
+ ->set_tensor_shape(output_shape)
+ .set_data_type(output_data_type)
+ .reset_padding()
+ .set_is_resizable(true));
_not_reshaped_output.info()->set_tensor_shape(output_shape);
_reduction_kernels_vector[0].configure(input, nullptr, &_not_reshaped_output, axis, op);
}
{
shape.set(0, ceil(shape.x() / 128.f));
_results_vector[i].allocator()->init(
- input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type));
+ input->info()->clone()->set_tensor_shape(shape).set_data_type(output_data_type));
}
// Apply ReductionOperation only on first kernel
using namespace arm_compute::misc::shape_calculator;
CLDirectTransposeConvLayer::CLDirectTransposeConvLayer(
- std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _scale_f(),
- _conv_f(),
- _flip_weights(),
- _scaled_output(),
- _original_weights(nullptr),
- _weights_flipped(),
- _flip_axis(),
- _is_prepared(false)
+ std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)), _scale_f(), _conv_f(), _flip_weights(),
+ _scaled_output(), _original_weights(nullptr), _weights_flipped(), _flip_axis(),
+ _is_prepared(false)
{
}
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
+ input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
const DataLayout data_layout = input->data_layout();
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
auto out_dims = transposeconv_output_dimensions(
- input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
- weights->dimension(idx_h), info, invalid_right, invalid_bottom);
+ input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
+ weights->dimension(idx_h), info, invalid_right, invalid_bottom);
const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
unsigned int pad_right = 0;
unsigned int pad_top = 0;
unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
+ const TensorShape scale_out_shape =
+ compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right,
+ invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(input->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(scale_out_shape)
- .set_data_layout(data_layout));
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(scale_out_shape)
+ .set_data_layout(data_layout));
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info));
- ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, weights_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, weights_info));
return Status{};
}
_flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(idx_w), input->info()->dimension(idx_h),
- weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
- invalid_bottom);
+ input->info()->dimension(idx_w), input->info()->dimension(idx_h),
+ weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+ invalid_bottom);
const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
// Output auto initialization if not yet initialized
auto_init_if_empty(
- *output->info(),
- input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
+ *output->info(),
+ input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate(
- input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
+ input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info,
+ invalid_right, invalid_bottom));
_is_prepared = weights_info.retain_internal_weights();
// Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
// to match output shape
const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
input->info()->quantization_info());
ARM_COMPUTE_UNUSED(weights);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
+ CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
}
CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
- _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
- _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
- _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
- _original_weights(nullptr)
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
+ _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
+ _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
+ _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
+ _original_weights(nullptr)
{
}
void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights,
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
_accumulate_biases = false;
bool is_fc_after_conv = false;
if (is_batched_fc_layer)
{
- is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
{
// Reshape the weights
_reshape_weights_output.allocator()->init(
- weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights->info())));
+ weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights->info())));
_reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output);
weights_to_use = &_reshape_weights_output;
}
// Extract scale factor
_scale_factor.allocator()->init(
- TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
+ TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
_memory_group.manage(&_scale_factor);
_scale_factor_kernel.configure(input, &_scale_factor);
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_memory_group.manage(&_quantized_input);
_quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
// GEMMLowp
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
_memory_group.manage(&_gemmlowp_output);
configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output,
fc_info.retain_internal_weights);
const GPUTarget gpu_target = CLScheduler::get().target();
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
+ CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
}
// With the Fully Connected layer we can have 4 different cases:
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+ CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
// Validate Scale factor kernel
const ITensorInfo &scale_factor =
- TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
+ TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
// Validate quantization symm8 kernel
- const ITensorInfo &quantized_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ const ITensorInfo &quantized_input = TensorInfo(
+ input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
ARM_COMPUTE_RETURN_ON_ERROR(
- CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
+ CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
// Fully Connected layer after a Fully Connected Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
// Validate matrix multiply kernel
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
// Multiply scale
ARM_COMPUTE_RETURN_ON_ERROR(
- CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
+ CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
return Status{};
}
int output_multiplier = 0;
int output_shift = 0;
ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(
- multiplier, &output_multiplier, &output_shift));
+ multiplier, &output_multiplier, &output_shift));
// Set the GEMMLowp output stage info
gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
{
GEMMLowpOutputStageInfo gemmlowp_output_stage;
ARM_COMPUTE_RETURN_ON_ERROR(
- construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
+ construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
false, // is_b_reshaped
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(
- &input.clone()->set_quantization_info(input_quantization_info),
- &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
- gemm_info));
+ &input.clone()->set_quantization_info(input_quantization_info),
+ &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
+ gemm_info));
}
else
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
+ CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
}
return Status{};
CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager,
IWeightsManager *weights_manager)
- : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
- _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
- _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
- _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
- _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
- _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
+ : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
+ _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
+ _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
+ _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
+ _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
+ _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
{
}
void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights,
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
input->info()->set_quantization_info(QuantizationInfo(
- input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
weights->info()->set_quantization_info(QuantizationInfo(
- weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, bias, output, gemm_info);
const FullyConnectedLayerInfo &fc_info)
{
ARM_COMPUTE_ERROR_ON(
- (weights->info()->dimension(1) !=
- (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+ (weights->info()->dimension(1) !=
+ (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
// If the fully connected layer is called after a convolution layer, the input tensor must be
// linearized
// Initialize output tensor for flatten
TensorShape shape_flatten = compute_flatten_shape(input->info());
_flatten_output.allocator()->init(input->info()
- ->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(shape_flatten)
- .set_data_layout(DataLayout::NCHW));
+ ->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(shape_flatten)
+ .set_data_layout(DataLayout::NCHW));
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_converted = true;
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
{
_reshape_weights_managed_function.configure(weights);
weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->acquire(weights, &_reshape_weights_managed_function));
+ _weights_manager->acquire(weights, &_reshape_weights_managed_function));
}
else
{
_convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(),
fc_info.weights_trained_layout);
weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->acquire(weights, &_convert_weights_managed));
+ _weights_manager->acquire(weights, &_convert_weights_managed));
}
else
{
bool is_fc_after_conv = true;
const ITensorInfo &flatten_input = TensorInfo(input->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(compute_flatten_shape(input))
- .set_data_layout(DataLayout::NCHW));
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(compute_flatten_shape(input))
+ .set_data_layout(DataLayout::NCHW));
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
const ITensorInfo &converted_weights =
- weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
- : TensorInfo(*reshaped_weights.clone());
+ weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+ : TensorInfo(*reshaped_weights.clone());
// With the Fully Connected layer we can have 4 different cases:
// 1) Convolution layer -> Fully Connected layer without batches
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
+ CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
{
// Validate convert weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate(
- weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+ weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
weights_to_use = &converted_weights;
}
{
// Fully Connected layer after a Convolution Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(
- (weights_to_use->dimension(1) !=
- (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+ (weights_to_use->dimension(1) !=
+ (input->dimension(0) * input->dimension(1) * input->dimension(2))));
// Validate flatten kernel
ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input));
// Validate matrix multiply kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
+ validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
return Status{};
}
if (_weights_manager && _weights_manager->are_weights_managed(cur_weights))
{
_original_weights = utils::cast::polymorphic_downcast<ICLTensor *>(
- _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
+ _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
}
else
{
// reshape
auto_init_if_empty(*_cl_buffer.info(),
_input->info()->clone()->set_tensor_shape(reshape).set_data_layout(
- _input->info()->data_layout()));
+ _input->info()->data_layout()));
_cl_reshape.configure(_input, &_cl_buffer);
input_to_use = &_cl_buffer;
}
{
bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
input->info()->data_type() == DataType::F16) &&
- (weights->info()->data_type() == DataType::S8 ||
+ (weights->info()->data_type() == DataType::QSYMM8 ||
weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
{
throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type");
}
-
}();
if (_needs_reshape)
using namespace arm_compute;
CLReduceOperation::CLReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
- _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
+ : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
+ _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
{
}
for (size_t i = 0; i < num_of_kernels; ++i, ++it)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
+ CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
}
if (!keep_dims)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
+ CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
}
return Status{};
// Output auto inizialitation if not yet initialized
TensorInfo tmp_output_info = *output->info()->clone();
auto_init_if_empty(
- tmp_output_info,
- input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
+ tmp_output_info,
+ input->info()->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
// Update coordinate on axis
start_coords.set(split_dim, axis_offset);
} // namespace
CLSplitVEx::CLSplitVEx()
- : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions()
+ : _input(nullptr), _size_splits(nullptr), _outputs(), _num_splits(0), _slice_functions()
{
}
{
CLTopKV2::CLTopKV2()
- : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
- _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
- _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
- _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr),
- _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
- _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
- _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
- _reorder_negatives_kernel(), _store_kernel()*/
+ : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
+ _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
+ _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
+ _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), _p_out_key_buf(nullptr),
+ _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr) /*, _qs_kernel(),
+ _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
+ _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
+ _reorder_negatives_kernel(), _store_kernel()*/
{
}
using namespace arm_compute::misc::shape_calculator;
CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_manager(std::move(memory_manager)), _function()
+ : _memory_manager(std::move(memory_manager)), _function()
{
}
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
switch (CLTransposeConvLayer::get_deconvolution_method(
- input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
{
case DeconvolutionMethod::DIRECT:
{
// Validate direct convolution layer
ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate(
- input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
break;
}
case DeconvolutionMethod::GEMM:
{
// Validate gemm-based convolution layer
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
+ CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
break;
}
default:
}
DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method(
- const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
- ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
- unsigned int invalid_bottom, const WeightsInfo &weights_info)
+ const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
+ ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info)
{
ARM_COMPUTE_UNUSED(output, bias, weights_info);
Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
+ NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
}
NEFullyConnectedHybridLayer::NEFullyConnectedHybridLayer(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
- _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
- _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
- _accumulate_biases(false), _is_prepared(false)
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _reshape_weights_function(), _quant_input_kernel(),
+ _mm_gemmlowp(), _accumulate_biases_kernel(), _reshape_weights_output(), _quantized_input(),
+ _scale_factor(), _original_weights(nullptr), _are_weights_reshaped(false),
+ _accumulate_biases(false), _is_prepared(false)
{
}
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedHybridLayer::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
_accumulate_biases = false;
bool _is_fc_after_conv;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
{
// Reshape the weights
_reshape_weights_output.allocator()->init(
- weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights->info())));
+ weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights->info())));
_reshape_weights_function.configure(weights_to_use, &_reshape_weights_output);
weights_to_use = &_reshape_weights_output;
}
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_scale_factor.allocator()->init(
- TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
+ TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
_quant_input_kernel.configure(input, &_quantized_input, &_scale_factor);
// GEMM
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output);
// Multiply scale
bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr)
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+ NEFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
// Validate quantization kernel
- const ITensorInfo &quantized_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::QASYMM8_SIGNED));
+ const ITensorInfo &quantized_input = TensorInfo(
+ input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::QASYMM8_SIGNED));
const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32);
ARM_COMPUTE_RETURN_ON_ERROR(
- NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
+ NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Validate matrix multiply kernel
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
- &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
+ &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
return Status{};
}
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
- &input.clone()->set_quantization_info(input_quantization_info),
- &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
+ &input.clone()->set_quantization_info(input_quantization_info),
+ &weights.clone()->set_quantization_info(weights_quantization_info), nullptr, &output));
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMM::validate(
- &input, &weights, nullptr, &output, 1.f, 0.0f,
- GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ NEGEMM::validate(&input, &weights, nullptr, &output, 1.f, 0.0f,
+ GEMMInfo(false, false, false /* Reshape weights only for the first run */)));
}
return Status{};
} // namespace
NEFullyConnectedLayerEx::NEFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
- _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
- _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(),
- _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr),
- _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false),
- _accumulate_biases(false), _is_quantized(false), _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _flatten_kernel(), _convert_weights(),
+ _reshape_weights_function(), _mm_gemm(), _mm_gemmlowp(), _gemmlowp_output_stage(),
+ _accumulate_biases_kernel(), _flatten_output(), _gemmlowp_output(), _converted_weights_output(),
+ _reshape_weights_output(), _original_weights(nullptr), _are_weights_converted(true),
+ _are_weights_reshaped(false), _is_fc_after_conv(false), _accumulate_biases(false),
+ _is_quantized(false), _is_prepared(false)
{
}
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
input->info()->set_quantization_info(QuantizationInfo(
- input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
weights->info()->set_quantization_info(QuantizationInfo(
- weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
ITensor *output)
{
ARM_COMPUTE_ERROR_ON(
- (weights->info()->dimension(1) !=
- (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+ (weights->info()->dimension(1) !=
+ (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
// If the fully connected layer is called after a convolution layer, the input tensor must be
// linearized
// Initialize output tensor for flatten
TensorShape shape_flatten = compute_flatten_shape(input->info());
_flatten_output.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- shape_flatten));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerEx::validate(
- input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
- fc_info));
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
_are_weights_converted = true;
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
if (_is_quantized)
{
_gemmlowp_output.allocator()->init(
- output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
- DataType::S32));
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
}
// Configure accumulate biases kernel for non quantized asymmetric types
const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
if (is_batched_fc_layer)
{
- _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
- (std::equal(input->info()->tensor_shape().cbegin() + 3,
- input->info()->tensor_shape().cend(),
- output->info()->tensor_shape().cbegin() + 1));
+ _is_fc_after_conv =
+ (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
}
else
{
bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
const ITensorInfo &flatten_input =
- TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_flatten_shape(input)));
+ TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_flatten_shape(input)));
const ITensorInfo &reshaped_weights =
- TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
- compute_transposed_shape(*weights)));
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
const ITensorInfo &converted_weights =
- weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
- : TensorInfo(*reshaped_weights.clone());
+ weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+ : TensorInfo(*reshaped_weights.clone());
const ITensorInfo &gemmlowp_output = TensorInfo(
- output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
// Configure accumulate biases kernel for non quantized asymmetric types
if (biases != nullptr && !is_quantized)
{
// Validate reshape weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(
- NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
+ NEFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
weights_to_use = &reshaped_weights;
}
{
// Validate convert weights kernel
ARM_COMPUTE_RETURN_ON_ERROR(NEConvertFullyConnectedWeights::validate(
- weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+ weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
weights_to_use = &converted_weights;
}
{
// Fully Connected layer after a Convolution Layer without batches
ARM_COMPUTE_RETURN_ERROR_ON(
- (weights_to_use->dimension(1) !=
- (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+ (weights_to_use->dimension(1) !=
+ (input->dimension(0) * input->dimension(1) * input->dimension(2))));
// Validate flatten kernel
ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
if (is_quantized)
{
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(
- &gemmlowp_output, biases, output));
+ &gemmlowp_output, biases, output));
}
return Status{};
assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
bool is_hybrid = input->info()->data_type() == DataType::F32 &&
- (weights->info()->data_type() == DataType::S8 ||
+ (weights->info()->data_type() == DataType::QSYMM8 ||
weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
namespace arm_compute
{
NEInstanceNormalizationLayerEx::NEInstanceNormalizationLayerEx(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
- _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _normalization_kernel(), _is_nchw(false),
+ _permute_input(), _permute_output(), _permuted_input(), _permuted_output()
{
}
float epsilon)
{
return NEInstanceNormalizationLayerKernelEx::validate(
- &input->clone()->set_data_layout(DataLayout::NCHW),
- &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
+ &input->clone()->set_data_layout(DataLayout::NCHW),
+ &output->clone()->set_data_layout(DataLayout::NCHW), gamma, beta, epsilon);
}
void NEInstanceNormalizationLayerEx::run()
using namespace arm_compute;
NEReduceOperation::NEReduceOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
+ : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
+ _reduction_ops(), _keep_dims()
{
}
for (unsigned int i = 0; i < _reduction_ops; ++i)
{
TensorShape out_shape =
- i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+ i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
out_shape.set(axis_local[i], 1);
auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
using namespace arm_compute;
NEReduceSum::NEReduceSum(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
+ : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
+ _reduction_ops(), _keep_dims()
{
}
for (unsigned int i = 0; i < _reduction_ops; ++i)
{
TensorShape out_shape =
- i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+ i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
out_shape.set(axis_local[i], 1);
auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
_reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(),
input->info()->data_type(),
input->info()->quantization_info())
- .set_data_layout(input->info()->data_layout()));
+ .set_data_layout(input->info()->data_layout()));
_memory_group.manage(&_reduced_outs[i]);
_reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i],
ReductionOperation::SUM);
{
NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _conv_f(),
- _upsample_f(),
- _flip_weights(),
- _scaled_output(),
- _weights_flipped(),
- _flip_axis(),
- _original_weights(nullptr),
- _input(nullptr),
- _info(),
- _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _conv_f(), _upsample_f(), _flip_weights(),
+ _scaled_output(), _weights_flipped(), _flip_axis(), _original_weights(nullptr), _input(nullptr),
+ _info(), _is_prepared(false)
{
}
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
const unsigned int width_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
const unsigned int height_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx));
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1);
auto out_dims = transposeconv_output_dimensions(
- input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
- weights->dimension(height_idx), info, invalid_right, invalid_bottom);
+ input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
+ weights->dimension(height_idx), info, invalid_right, invalid_bottom);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
if (bias != nullptr)
unsigned int pad_right = 0;
unsigned int pad_top = 0;
unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
+ const TensorShape scale_out_shape =
+ compute_transposeconv_upsampled_shape(*input, *weights, info, out_dims, invalid_right,
+ invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
TensorInfo scale_out_info(
- input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
+ input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
const unsigned int batches_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
const unsigned int channel_idx =
- get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) !=
scale_out_info.dimension(batches_idx));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) !=
scale_out_info.dimension(channel_idx));
- ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, WeightsInfo()));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo()));
return Status{};
}
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
- input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
+ input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
+ info, invalid_right, invalid_bottom));
const DataLayout data_layout = input->info()->data_layout();
const unsigned int width_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const unsigned int height_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(width_idx), input->info()->dimension(height_idx),
- weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
- invalid_right, invalid_bottom);
+ input->info()->dimension(width_idx), input->info()->dimension(height_idx),
+ weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
+ invalid_right, invalid_bottom);
const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
_input = input;
_original_weights = weights;
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
DimensionRoundingType::FLOOR);
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_
+#define __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_
+
+#include <ruy/context.h> // from @ruy
+#include <ruy/thread_pool.h> // from @ruy
+
+namespace nnfw
+{
+namespace cker
+{
+namespace cpu_backend_threadpool
+{
+
+using Task = ruy::Task;
+
+template <typename TaskType>
+void Execute(int tasks_count, TaskType *tasks, ruy::Context *ruy_context)
+{
+ assert(tasks_count <= ruy_context->max_num_threads());
+ ruy_context->mutable_thread_pool()->Execute(tasks_count, tasks);
+}
+
+} // namespace cpu_backend_threadpool
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_CPU_BACKEND_THREADPOOL_H_
const int kWeightsPerUint32 = 4;
int8 *shuffled_vectors = reinterpret_cast<int8 *>(
- aligned_alloc(kWeightsPerUint32, n_batch * m_cols, shuffled_vectors_free));
+ aligned_alloc(kWeightsPerUint32, n_batch * m_cols, shuffled_vectors_free));
for (int i = 0; i < n_batch; i += 4)
{
while (unshuffled_vec0_ptr != end_vec0_ptr)
{
asm volatile(
- // This code path requires that (n_cols % 16) == 0 so we can safely
- // read in 16-byte chunks from each row.
- "ld1 {v0.16b}, [%[unshuffled_vec0_ptr]], #16\n"
- "ld1 {v1.16b}, [%[unshuffled_vec1_ptr]], #16\n"
- "ld1 {v2.16b}, [%[unshuffled_vec2_ptr]], #16\n"
- "ld1 {v3.16b}, [%[unshuffled_vec3_ptr]], #16\n"
-
- "st4 {v0.s, v1.s, v2.s, v3.s}[0], [%[shuffled_vectors_ptr]], #16\n"
- "st4 {v0.s, v1.s, v2.s, v3.s}[1], [%[shuffled_vectors_ptr]], #16\n"
- "st4 {v0.s, v1.s, v2.s, v3.s}[2], [%[shuffled_vectors_ptr]], #16\n"
- "st4 {v0.s, v1.s, v2.s, v3.s}[3], [%[shuffled_vectors_ptr]], #16\n"
-
- : [unshuffled_vec0_ptr] "+r"(unshuffled_vec0_ptr),
- [unshuffled_vec1_ptr] "+r"(unshuffled_vec1_ptr),
- [unshuffled_vec2_ptr] "+r"(unshuffled_vec2_ptr),
- [unshuffled_vec3_ptr] "+r"(unshuffled_vec3_ptr),
- [shuffled_vectors_ptr] "+r"(shuffled_vectors_ptr)
- :
- : "v0", "v1", "v2", "v3", "cc", "memory");
+ // This code path requires that (n_cols % 16) == 0 so we can safely
+ // read in 16-byte chunks from each row.
+ "ld1 {v0.16b}, [%[unshuffled_vec0_ptr]], #16\n"
+ "ld1 {v1.16b}, [%[unshuffled_vec1_ptr]], #16\n"
+ "ld1 {v2.16b}, [%[unshuffled_vec2_ptr]], #16\n"
+ "ld1 {v3.16b}, [%[unshuffled_vec3_ptr]], #16\n"
+
+ "st4 {v0.s, v1.s, v2.s, v3.s}[0], [%[shuffled_vectors_ptr]], #16\n"
+ "st4 {v0.s, v1.s, v2.s, v3.s}[1], [%[shuffled_vectors_ptr]], #16\n"
+ "st4 {v0.s, v1.s, v2.s, v3.s}[2], [%[shuffled_vectors_ptr]], #16\n"
+ "st4 {v0.s, v1.s, v2.s, v3.s}[3], [%[shuffled_vectors_ptr]], #16\n"
+
+ : [ unshuffled_vec0_ptr ] "+r"(unshuffled_vec0_ptr),
+ [ unshuffled_vec1_ptr ] "+r"(unshuffled_vec1_ptr),
+ [ unshuffled_vec2_ptr ] "+r"(unshuffled_vec2_ptr),
+ [ unshuffled_vec3_ptr ] "+r"(unshuffled_vec3_ptr),
+ [ shuffled_vectors_ptr ] "+r"(shuffled_vectors_ptr)
+ :
+ : "v0", "v1", "v2", "v3", "cc", "memory");
}
}
const int8 *mat_ptr3 = matrix + ((row + 3) * m_cols);
asm volatile(
- // Zero out the accumulator registers.
- "dup v0.4s, wzr\n"
- "dup v1.4s, wzr\n"
- "dup v2.4s, wzr\n"
- "dup v3.4s, wzr\n"
-
- "1:\n" // batch_cols_loop
-
- // Read 16 more bytes from a pair of matrix rows.
- "ld1 {v12.16b}, [%[mat_ptr0]], #16\n"
-
- // Prefetch two rows ahead.
- "prfm pldl1strm, [%[mat_ptr2]]\n"
- "prfm pldl1strm, [%[mat_ptr3]]\n"
-
- // Read from input vectors 4 times; 64 bytes total.
- // Each 16-byte register contains parts of 4 vectors; see the
- // shuffle logic above.
-
- // From Benoit, places to look in the future:
- // - Move load instructions further from sdot
- // - Switch loop use-then-reload
- // - Do partial unrolling to use register space better
- "ld1 {v8.16b}, [%[vec_ptr]], #16\n"
- ".word 0x4f8ce100 // sdot v0.4s, v8.16b, v12.4b[0]\n"
- "ld1 {v9.16b}, [%[vec_ptr]], #16\n"
- ".word 0x4face121 // sdot v1.4s, v9.16b, v12.4b[1]\n"
- "ld1 {v10.16b}, [%[vec_ptr]], #16\n"
- ".word 0x4f8ce940 // sdot v0.4s, v10.16b, v12.4b[2]\n"
- "ld1 {v11.16b}, [%[vec_ptr]], #16\n"
- ".word 0x4face961 // sdot v1.4s, v11.16b, v12.4b[3]\n"
-
- // Update prefetch pointers.
- "add %[mat_ptr2], %[mat_ptr2], #16\n"
- "add %[mat_ptr3], %[mat_ptr3], #16\n"
-
- // Re-use those vectors for the next row as well.
- "ld1 {v13.16b}, [%[mat_ptr1]], #16\n"
- ".word 0x4f8de102 // sdot v2.4s, v8.16b, v13.4b[0]\n"
- ".word 0x4fade123 // sdot v3.4s, v9.16b, v13.4b[1]\n"
- ".word 0x4f8de942 // sdot v2.4s, v10.16b, v13.4b[2]\n"
- ".word 0x4fade963 // sdot v3.4s, v11.16b, v13.4b[3]\n"
-
- // If we're not done with these rows, continue.
- "cmp %[mat_ptr0], %[mat_ptr0_end]\n"
- "bne 1b\n" // batch_cols_loop
-
- // Done with the rows, sum the results.
- "add v0.4s, v0.4s, v1.4s\n"
- "add v2.4s, v2.4s, v3.4s\n"
-
- // Convert the per-vector sums to floating point.
- "scvtf v0.4s, v0.4s\n"
- "scvtf v1.4s, v2.4s\n"
-
- // Fetch scale factors.
- "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n"
-
- // Multiply scale factors times sums.
- "fmul v0.4s, v4.4s, v0.4s\n"
- "fmul v1.4s, v4.4s, v1.4s\n"
-
- // Load previous result values.
- // The result position is:
- // result[batch * m_rows + row]
- // Here that is factored into:
- // result_ptr = result + row
- // *result_ptr = res[0]
- // (uint8*)result_ptr += (m_rows * sizeof(float))
- // *result_ptr = res[1]
- // ...
- // Since we're reading two rows at a time, though, we read both
- // result[batch * m_rows + row]
- // and
- // result[batch * m_rows + row + 1]
- "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
- "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
- "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
- "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
-
- // Go back to the starting position (subtract wide_rows * 4).
- "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n"
-
- // Add previous result values.
- "fadd v9.4s, v9.4s, v0.4s\n"
- "fadd v10.4s, v10.4s, v1.4s\n"
-
- // Store results.
- "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
- "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
- "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
- "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
- : [mat_ptr0] "+r"(mat_ptr0), [mat_ptr1] "+r"(mat_ptr1), [vec_ptr] "+r"(vec_ptr),
- [result_ptr] "+r"(result_ptr), [mat_ptr2] "+r"(mat_ptr2), [mat_ptr3] "+r"(mat_ptr3)
- : [mat_ptr0_end] "r"(mat_ptr0_end), [scaling_factors_ptr] "r"(scaling_factors_ptr),
- [wide_rows] "r"(wide_rows)
- : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
- "v13", "cc", "memory");
+ // Zero out the accumulator registers.
+ "dup v0.4s, wzr\n"
+ "dup v1.4s, wzr\n"
+ "dup v2.4s, wzr\n"
+ "dup v3.4s, wzr\n"
+
+ "1:\n" // batch_cols_loop
+
+ // Read 16 more bytes from a pair of matrix rows.
+ "ld1 {v12.16b}, [%[mat_ptr0]], #16\n"
+
+ // Prefetch two rows ahead.
+ "prfm pldl1strm, [%[mat_ptr2]]\n"
+ "prfm pldl1strm, [%[mat_ptr3]]\n"
+
+ // Read from input vectors 4 times; 64 bytes total.
+ // Each 16-byte register contains parts of 4 vectors; see the
+ // shuffle logic above.
+
+ // From Benoit, places to look in the future:
+ // - Move load instructions further from sdot
+ // - Switch loop use-then-reload
+ // - Do partial unrolling to use register space better
+ "ld1 {v8.16b}, [%[vec_ptr]], #16\n"
+ ".word 0x4f8ce100 // sdot v0.4s, v8.16b, v12.4b[0]\n"
+ "ld1 {v9.16b}, [%[vec_ptr]], #16\n"
+ ".word 0x4face121 // sdot v1.4s, v9.16b, v12.4b[1]\n"
+ "ld1 {v10.16b}, [%[vec_ptr]], #16\n"
+ ".word 0x4f8ce940 // sdot v0.4s, v10.16b, v12.4b[2]\n"
+ "ld1 {v11.16b}, [%[vec_ptr]], #16\n"
+ ".word 0x4face961 // sdot v1.4s, v11.16b, v12.4b[3]\n"
+
+ // Update prefetch pointers.
+ "add %[mat_ptr2], %[mat_ptr2], #16\n"
+ "add %[mat_ptr3], %[mat_ptr3], #16\n"
+
+ // Re-use those vectors for the next row as well.
+ "ld1 {v13.16b}, [%[mat_ptr1]], #16\n"
+ ".word 0x4f8de102 // sdot v2.4s, v8.16b, v13.4b[0]\n"
+ ".word 0x4fade123 // sdot v3.4s, v9.16b, v13.4b[1]\n"
+ ".word 0x4f8de942 // sdot v2.4s, v10.16b, v13.4b[2]\n"
+ ".word 0x4fade963 // sdot v3.4s, v11.16b, v13.4b[3]\n"
+
+ // If we're not done with these rows, continue.
+ "cmp %[mat_ptr0], %[mat_ptr0_end]\n"
+ "bne 1b\n" // batch_cols_loop
+
+ // Done with the rows, sum the results.
+ "add v0.4s, v0.4s, v1.4s\n"
+ "add v2.4s, v2.4s, v3.4s\n"
+
+ // Convert the per-vector sums to floating point.
+ "scvtf v0.4s, v0.4s\n"
+ "scvtf v1.4s, v2.4s\n"
+
+ // Fetch scale factors.
+ "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n"
+
+ // Multiply scale factors times sums.
+ "fmul v0.4s, v4.4s, v0.4s\n"
+ "fmul v1.4s, v4.4s, v1.4s\n"
+
+ // Load previous result values.
+ // The result position is:
+ // result[batch * m_rows + row]
+ // Here that is factored into:
+ // result_ptr = result + row
+ // *result_ptr = res[0]
+ // (uint8*)result_ptr += (m_rows * sizeof(float))
+ // *result_ptr = res[1]
+ // ...
+ // Since we're reading two rows at a time, though, we read both
+ // result[batch * m_rows + row]
+ // and
+ // result[batch * m_rows + row + 1]
+ "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
+ "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
+ "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
+ "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
+
+ // Go back to the starting position (subtract wide_rows * 4).
+ "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n"
+
+ // Add previous result values.
+ "fadd v9.4s, v9.4s, v0.4s\n"
+ "fadd v10.4s, v10.4s, v1.4s\n"
+
+ // Store results.
+ "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
+ "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
+ "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
+ "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
+ : [ mat_ptr0 ] "+r"(mat_ptr0), [ mat_ptr1 ] "+r"(mat_ptr1), [ vec_ptr ] "+r"(vec_ptr),
+ [ result_ptr ] "+r"(result_ptr), [ mat_ptr2 ] "+r"(mat_ptr2), [ mat_ptr3 ] "+r"(mat_ptr3)
+ : [ mat_ptr0_end ] "r"(mat_ptr0_end), [ scaling_factors_ptr ] "r"(scaling_factors_ptr),
+ [ wide_rows ] "r"(wide_rows)
+ : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
+ "v13", "cc", "memory");
}
}
}
static void DotprodMatrixBatchFourVectorMultiplyAccumulate(
- const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
- const float *scaling_factors, int n_batch, float *__restrict__ result,
- const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums)
+ const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
+ const float *scaling_factors, int n_batch, float *__restrict__ result,
+ const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums)
{
void *shuffled_vectors_free;
const int8_t *shuffled_vectors = ShuffleVectors(vectors, n_batch, m_cols, &shuffled_vectors_free);
const int32_t *batch_offsets_ptr = input_offset + batch;
const int32_t is_channel_scale_nullptr = per_channel_scale == nullptr;
const int32_t is_row_sums_nullptr = row_sums_ptr == nullptr;
- asm volatile("dup v0.4s, wzr\n"
- "dup v1.4s, wzr\n"
- "dup v2.4s, wzr\n"
- "dup v3.4s, wzr\n"
- // Load zero points.
- "ld1 {v7.4s}, [%[batch_offsets_ptr]]\n"
- "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n"
- // Zero out zero point accumulators.
- "dup v14.4s, wzr\n"
- "dup v15.4s, wzr\n"
-
- // Load per channel scales if not null.
- "cmp %w[is_channel_scale_nullptr], #0\n"
- "bne 1f\n"
- "ld1r {v16.4s}, [%[channel_scales_ptr]], #4\n"
- "ld1r {v17.4s}, [%[channel_scales_ptr]]\n"
- "fmul v16.4s, v16.4s, v4.4s\n"
- "fmul v17.4s, v17.4s, v4.4s\n"
- "b 2f\n"
- "1:\n"
- "mov v16.16b, v4.16b\n"
- "mov v17.16b, v4.16b\n"
- "2:\n"
- "ld1 {v12.16b}, [%[mat_ptr0]], #16\n"
- "ld1 {v8.16b}, [%[vec_ptr]], #16\n"
- ".word 0x4f8ce100 // sdot v0.4s, v8.16b, v12.4b[0]\n"
- "ld1 {v9.16b}, [%[vec_ptr]], #16\n"
- ".word 0x4face121 // sdot v1.4s, v9.16b, v12.4b[1]\n"
- "ld1 {v10.16b}, [%[vec_ptr]], #16\n"
- ".word 0x4f8ce940 // sdot v0.4s, v10.16b, v12.4b[2]\n"
- "ld1 {v11.16b}, [%[vec_ptr]], #16\n"
- ".word 0x4face961 // sdot v1.4s, v11.16b, v12.4b[3]\n"
- "ld1 {v13.16b}, [%[mat_ptr1]], #16\n"
- ".word 0x4f8de102 // sdot v2.4s, v8.16b, v13.4b[0]\n"
- ".word 0x4fade123 // sdot v3.4s, v9.16b, v13.4b[1]\n"
- ".word 0x4f8de942 // sdot v2.4s, v10.16b, v13.4b[2]\n"
- ".word 0x4fade963 // sdot v3.4s, v11.16b, v13.4b[3]\n"
- "cmp %w[is_row_sums_nullptr], #1\n"
- "bne 3f\n"
- // Accumulate row_sums for zero point calculations.
- "saddlp v12.8h, v12.16b\n"
- "saddlp v13.8h, v13.16b\n"
- "sadalp v14.4s, v12.8h\n"
- "sadalp v15.4s, v13.8h\n"
- "3:\n"
- "cmp %[mat_ptr0], %[mat_ptr0_end]\n"
- "bne 2b\n"
- "add v0.4s, v0.4s, v1.4s\n"
- "add v2.4s, v2.4s, v3.4s\n"
-
- "cmp %w[is_row_sums_nullptr], #1\n"
- "bne 4f\n"
- // Calculate zero point offsets.
- "addv s14, v14.4s\n"
- "addv s15, v15.4s\n"
- "dup v14.4s, v14.s[0]\n"
- "dup v15.4s, v15.s[0]\n"
- "b 5f\n"
- "4:\n"
- "ld1r {v14.4s}, [%[row_sums_ptr]], #4\n"
- "ld1r {v15.4s}, [%[row_sums_ptr]]\n"
- "5:\n"
-
- "mul v14.4s, v14.4s, v7.4s\n"
- "mul v15.4s, v15.4s, v7.4s\n"
- "sub v0.4s, v0.4s, v14.4s\n"
- "sub v2.4s, v2.4s, v15.4s\n"
-
- "scvtf v0.4s, v0.4s\n"
- "scvtf v1.4s, v2.4s\n"
-
- // Multiply scale.
- "fmul v0.4s, v16.4s, v0.4s\n"
- "fmul v1.4s, v17.4s, v1.4s\n"
-
- "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
- "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
- "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
- "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
- "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n"
- "fadd v9.4s, v9.4s, v0.4s\n"
- "fadd v10.4s, v10.4s, v1.4s\n"
- "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
- "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
- "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
- "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
- : [mat_ptr0] "+r"(mat_ptr0), [mat_ptr1] "+r"(mat_ptr1), [vec_ptr] "+r"(vec_ptr),
- [result_ptr] "+r"(result_ptr), [row_sums_ptr] "+r"(row_sums_ptr)
- : [mat_ptr0_end] "r"(mat_ptr0_end),
- [scaling_factors_ptr] "r"(scaling_factors_ptr), [wide_rows] "r"(wide_rows),
- [channel_scales_ptr] "r"(channel_scales_ptr),
- [batch_offsets_ptr] "r"(batch_offsets_ptr),
- [is_channel_scale_nullptr] "r"(is_channel_scale_nullptr),
- [is_row_sums_nullptr] "r"(is_row_sums_nullptr)
- : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
- "v12", "v13", "v14", "v15", "v16", "v17", "w0", "w1", "cc", "memory");
+ asm volatile(
+ "dup v0.4s, wzr\n"
+ "dup v1.4s, wzr\n"
+ "dup v2.4s, wzr\n"
+ "dup v3.4s, wzr\n"
+ // Load zero points.
+ "ld1 {v7.4s}, [%[batch_offsets_ptr]]\n"
+ "ld1 {v4.4s}, [%[scaling_factors_ptr]]\n"
+ // Zero out zero point accumulators.
+ "dup v14.4s, wzr\n"
+ "dup v15.4s, wzr\n"
+
+ // Load per channel scales if not null.
+ "cmp %w[is_channel_scale_nullptr], #0\n"
+ "bne 1f\n"
+ "ld1r {v16.4s}, [%[channel_scales_ptr]], #4\n"
+ "ld1r {v17.4s}, [%[channel_scales_ptr]]\n"
+ "fmul v16.4s, v16.4s, v4.4s\n"
+ "fmul v17.4s, v17.4s, v4.4s\n"
+ "b 2f\n"
+ "1:\n"
+ "mov v16.16b, v4.16b\n"
+ "mov v17.16b, v4.16b\n"
+ "2:\n"
+ "ld1 {v12.16b}, [%[mat_ptr0]], #16\n"
+ "ld1 {v8.16b}, [%[vec_ptr]], #16\n"
+ ".word 0x4f8ce100 // sdot v0.4s, v8.16b, v12.4b[0]\n"
+ "ld1 {v9.16b}, [%[vec_ptr]], #16\n"
+ ".word 0x4face121 // sdot v1.4s, v9.16b, v12.4b[1]\n"
+ "ld1 {v10.16b}, [%[vec_ptr]], #16\n"
+ ".word 0x4f8ce940 // sdot v0.4s, v10.16b, v12.4b[2]\n"
+ "ld1 {v11.16b}, [%[vec_ptr]], #16\n"
+ ".word 0x4face961 // sdot v1.4s, v11.16b, v12.4b[3]\n"
+ "ld1 {v13.16b}, [%[mat_ptr1]], #16\n"
+ ".word 0x4f8de102 // sdot v2.4s, v8.16b, v13.4b[0]\n"
+ ".word 0x4fade123 // sdot v3.4s, v9.16b, v13.4b[1]\n"
+ ".word 0x4f8de942 // sdot v2.4s, v10.16b, v13.4b[2]\n"
+ ".word 0x4fade963 // sdot v3.4s, v11.16b, v13.4b[3]\n"
+ "cmp %w[is_row_sums_nullptr], #1\n"
+ "bne 3f\n"
+ // Accumulate row_sums for zero point calculations.
+ "saddlp v12.8h, v12.16b\n"
+ "saddlp v13.8h, v13.16b\n"
+ "sadalp v14.4s, v12.8h\n"
+ "sadalp v15.4s, v13.8h\n"
+ "3:\n"
+ "cmp %[mat_ptr0], %[mat_ptr0_end]\n"
+ "bne 2b\n"
+ "add v0.4s, v0.4s, v1.4s\n"
+ "add v2.4s, v2.4s, v3.4s\n"
+
+ "cmp %w[is_row_sums_nullptr], #1\n"
+ "bne 4f\n"
+ // Calculate zero point offsets.
+ "addv s14, v14.4s\n"
+ "addv s15, v15.4s\n"
+ "dup v14.4s, v14.s[0]\n"
+ "dup v15.4s, v15.s[0]\n"
+ "b 5f\n"
+ "4:\n"
+ "ld1r {v14.4s}, [%[row_sums_ptr]], #4\n"
+ "ld1r {v15.4s}, [%[row_sums_ptr]]\n"
+ "5:\n"
+
+ "mul v14.4s, v14.4s, v7.4s\n"
+ "mul v15.4s, v15.4s, v7.4s\n"
+ "sub v0.4s, v0.4s, v14.4s\n"
+ "sub v2.4s, v2.4s, v15.4s\n"
+
+ "scvtf v0.4s, v0.4s\n"
+ "scvtf v1.4s, v2.4s\n"
+
+ // Multiply scale.
+ "fmul v0.4s, v16.4s, v0.4s\n"
+ "fmul v1.4s, v17.4s, v1.4s\n"
+
+ "ld2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
+ "ld2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
+ "ld2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
+ "ld2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
+ "sub %[result_ptr], %[result_ptr], %[wide_rows], lsl #2\n"
+ "fadd v9.4s, v9.4s, v0.4s\n"
+ "fadd v10.4s, v10.4s, v1.4s\n"
+ "st2 {v9.s, v10.s}[0], [%[result_ptr]], %[wide_rows]\n"
+ "st2 {v9.s, v10.s}[1], [%[result_ptr]], %[wide_rows]\n"
+ "st2 {v9.s, v10.s}[2], [%[result_ptr]], %[wide_rows]\n"
+ "st2 {v9.s, v10.s}[3], [%[result_ptr]], %[wide_rows]\n"
+ : [ mat_ptr0 ] "+r"(mat_ptr0), [ mat_ptr1 ] "+r"(mat_ptr1), [ vec_ptr ] "+r"(vec_ptr),
+ [ result_ptr ] "+r"(result_ptr), [ row_sums_ptr ] "+r"(row_sums_ptr)
+ : [ mat_ptr0_end ] "r"(mat_ptr0_end), [ scaling_factors_ptr ] "r"(scaling_factors_ptr),
+ [ wide_rows ] "r"(wide_rows), [ channel_scales_ptr ] "r"(channel_scales_ptr),
+ [ batch_offsets_ptr ] "r"(batch_offsets_ptr),
+ [ is_channel_scale_nullptr ] "r"(is_channel_scale_nullptr),
+ [ is_row_sums_nullptr ] "r"(is_row_sums_nullptr)
+ : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12",
+ "v13", "v14", "v15", "v16", "v17", "w0", "w1", "cc", "memory");
}
}
// We don't use this kernel when n_batch = 1 because the baseline kernel
// is fine for that case.
inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
- const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
- const float *scaling_factors, int n_batch, float *__restrict__ result,
- const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums)
+ const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
+ const float *scaling_factors, int n_batch, float *__restrict__ result,
+ const float *per_channel_scale, const int32_t *input_offset, int32_t *row_sums)
{
const int kWeightsPerUint32 = 4;
void *padded_vectors_free;
const int padded_vectors_size = batch_round_up * m_cols;
int8_t *padded_vectors = reinterpret_cast<int8_t *>(
- aligned_alloc(kWeightsPerUint32, padded_vectors_size, &padded_vectors_free));
+ aligned_alloc(kWeightsPerUint32, padded_vectors_size, &padded_vectors_free));
memset(padded_vectors, 0, padded_vectors_size);
void *padded_result_free;
const int result_size = n_batch * m_rows * sizeof(float);
const int padded_result_size = batch_round_up * m_rows * sizeof(float);
float *padded_result = reinterpret_cast<float *>(
- aligned_alloc(kWeightsPerUint32, padded_result_size, &padded_result_free));
+ aligned_alloc(kWeightsPerUint32, padded_result_size, &padded_result_free));
memcpy(padded_result, result, result_size);
memset(reinterpret_cast<char *>(padded_result) + result_size, 0,
padded_result_size - result_size);
void *padded_scaling_factors_free;
const int padded_scaling_factors_size = batch_round_up * sizeof(float);
float *padded_scaling_factors = reinterpret_cast<float *>(
- aligned_alloc(kWeightsPerUint32, padded_scaling_factors_size, &padded_scaling_factors_free));
+ aligned_alloc(kWeightsPerUint32, padded_scaling_factors_size, &padded_scaling_factors_free));
assert(static_cast<int>(n_batch * sizeof(float)) <= padded_scaling_factors_size);
assert(static_cast<int>(batch_round_up * sizeof(float)) <= padded_scaling_factors_size);
memset(padded_scaling_factors, 0, batch_round_up * sizeof(float));
void *padded_input_offset_free;
const int padded_input_offset_size = batch_round_up * sizeof(int32_t);
int32_t *padded_input_offset = reinterpret_cast<int32_t *>(
- aligned_alloc(kWeightsPerUint32, padded_input_offset_size, &padded_input_offset_free));
+ aligned_alloc(kWeightsPerUint32, padded_input_offset_size, &padded_input_offset_free));
assert(static_cast<int>(n_batch * sizeof(int32_t)) <= padded_input_offset_size);
assert(static_cast<int>(batch_round_up * sizeof(int32_t)) <= padded_input_offset_size);
memset(padded_input_offset, 0, batch_round_up * sizeof(int32_t));
// Call the main kernel.
DotprodMatrixBatchFourVectorMultiplyAccumulate(
- matrix, m_rows, m_cols, padded_vectors, padded_scaling_factors, batch_round_up,
- padded_result, per_channel_scale, padded_input_offset, row_sums);
+ matrix, m_rows, m_cols, padded_vectors, padded_scaling_factors, batch_round_up, padded_result,
+ per_channel_scale, padded_input_offset, row_sums);
free(padded_input_offset_free);
}
}
inline void DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
- const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
- const float *scaling_factors, int n_batch, float *__restrict__ result)
+ const int8_t *__restrict__ matrix, const int m_rows, const int m_cols, const int8_t *vectors,
+ const float *scaling_factors, int n_batch, float *__restrict__ result)
{
DotprodMatrixBatchPaddedFourVectorMultiplyAccumulate(
- matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
- /*per_channel_scale=*/nullptr, /*input_offset=*/nullptr,
- /*row_sums=*/nullptr);
+ matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
+ /*per_channel_scale=*/nullptr, /*input_offset=*/nullptr,
+ /*row_sums=*/nullptr);
}
#endif // __aarch64__
for (int i = postamble_start; i < size; ++i)
{
const int32_t quantized_value =
- static_cast<int32_t>(std::round(scaling_factor_inv * values[i]));
+ static_cast<int32_t>(std::round(scaling_factor_inv * values[i]));
quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
}
}
// Here the assumption is that each buffer is 4-byte aligned. Otherwise,
// performance may suffer significantly.
assert( // NOLINT
- ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
+ ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
const int8x16_t s1_8x16 = vld1q_s8((const int8_t *)(aligned_vec + col));
const int8x16_t s2_8x16 = vld1q_s8((const int8_t *)(row_ptr + col));
// Multiply the low bits (i.e. the lower 8 8bit numbers in the
// Here the assumption is that each buffer is 4-bytes aligned.
// Otherwise, performance may suffer significantly.
assert( // NOLINT
- ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
+ ((uintptr_t)(&row_ptr[col]) & (kWeightsPerUint32 - 1)) == 0);
const int8x8_t s1_8x8 = vld1_s8((const int8_t *)(aligned_vec + col));
const int8x8_t s2_8x8 = vld1_s8((const int8_t *)(row_ptr + col));
const int16x8_t prod_16x8 = vmull_s8(s1_8x8, s2_8x8);
const float32x4_t float_val1 = vcvtq_f32_s32(scratch_val1);
const float32x4_t result0 = vmlaq_f32(vld1q_f32(result), float_val0, scaling_factor0);
const float32x4_t result1 =
- vmlaq_f32(vld1q_f32(result + 4 * result_stride), float_val1, scaling_factor1);
+ vmlaq_f32(vld1q_f32(result + 4 * result_stride), float_val1, scaling_factor1);
vst1q_f32(result, result0);
vst1q_f32(result + 4 * result_stride, result1);
}
for (int i = 0; i < size; ++i)
{
const int32_t quantized_value =
- static_cast<int32_t>(std::round(values[i] * scaling_factor_inv));
+ static_cast<int32_t>(std::round(values[i] * scaling_factor_inv));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
}
int32_t block_size;
};
+struct LeakyReluParams
+{
+ float alpha;
+};
+
enum class Order
{
kColMajor,
// (only those that need perchannel quantization do).
template <typename AccumScalar, typename DstScalar,
QuantizationFlavor quantization_flavor =
- std::is_floating_point<AccumScalar>::value
- ? QuantizationFlavor::kFloatingPoint
- : QuantizationFlavor::kIntegerWithUniformMultiplier>
+ std::is_floating_point<AccumScalar>::value
+ ? QuantizationFlavor::kFloatingPoint
+ : QuantizationFlavor::kIntegerWithUniformMultiplier>
struct GemmParams
{
// Only for non-floating-point cases. The fixed-point part (i.e. the mantissa)
const AccumScalar *bias = nullptr;
// min clamp bound of destination values.
DstScalar clamp_min = std::is_floating_point<DstScalar>::value
- ? -std::numeric_limits<DstScalar>::infinity()
- : std::numeric_limits<DstScalar>::lowest();
+ ? -std::numeric_limits<DstScalar>::infinity()
+ : std::numeric_limits<DstScalar>::lowest();
// max clamp bound of destination values.
DstScalar clamp_max = std::is_floating_point<DstScalar>::value
- ? std::numeric_limits<DstScalar>::infinity()
- : std::numeric_limits<DstScalar>::max();
+ ? std::numeric_limits<DstScalar>::infinity()
+ : std::numeric_limits<DstScalar>::max();
};
// Validates self-consistency of GemmParams.
int left_shift = shift > 0 ? shift : 0;
int right_shift = shift > 0 ? 0 : -shift;
return gemmlowp::RoundingDivideByPOT(
- gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier),
- right_shift);
+ gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier),
+ right_shift);
}
inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(int32_t x, int32_t quantized_multiplier,
int left_shift)
{
return gemmlowp::RoundingDivideByPOT(
- gemmlowp::SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
+ gemmlowp::SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
}
inline int NodeOffset(int b, int h, int w, int height, int width)
const F3 fixedpoint_input = F3::FromRaw(input >> 1);
const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
const F3 fixedpoint_half_three =
- GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
+ GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
// Newton-Raphson iteration
// Naive unoptimized starting guess: x = 1
F3 x = F3::One();
x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
}
const F0 fixedpoint_half_sqrt_2 =
- GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
+ GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
x = x * fixedpoint_half_sqrt_2;
*output_inv_sqrt = x.raw();
if (*output_shift < 0)
{
public:
SequentialTensorWriter(const T *input_data, T *output_data)
- : input_data_(input_data), output_ptr_(output_data)
+ : input_data_(input_data), output_ptr_(output_data)
{
}
// library.
typedef Eigen::TensorMap<Eigen::Tensor<float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
Eigen::Aligned>
- EigenMatrix;
+ EigenMatrix;
typedef Eigen::TensorMap<Eigen::Tensor<const float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
Eigen::Aligned>
- ConstEigenMatrix;
+ ConstEigenMatrix;
typedef Eigen::TensorMap<Eigen::Tensor<float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
Eigen::Aligned>
- EigenTensor;
+ EigenTensor;
typedef Eigen::TensorMap<Eigen::Tensor<const float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
Eigen::Aligned>
- ConstEigenTensor;
+ ConstEigenTensor;
// Utility functions we need for the EigenTensor API.
template <typename Device, typename T> struct MatMulConvFunctor
// Eigen::Map<Eigen::Matrix<const float, ...>>
template <typename Scalar>
using VectorMap = typename std::conditional<
- std::is_const<Scalar>::value,
- Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic, 1>>,
- Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>>>::type;
+ std::is_const<Scalar>::value,
+ Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic, 1>>,
+ Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>>>::type;
template <typename Scalar> VectorMap<Scalar> MapAsVector(Scalar *data, const Shape &shape)
{
// above also applies here.
template <typename Scalar>
using MatrixMap = typename std::conditional<
- std::is_const<Scalar>::value,
- Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic,
- Eigen::Dynamic>>,
- Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
+ std::is_const<Scalar>::value,
+ Eigen::Map<
+ const Eigen::Matrix<typename std::remove_const<Scalar>::type, Eigen::Dynamic, Eigen::Dynamic>>,
+ Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
template <typename Scalar>
MatrixMap<Scalar> MapAsMatrixWithLastDimAsRows(Scalar *data, const Shape &shape)
public:
template <typename TensorEvaluatorT, typename PacketT, typename IndexT>
static auto functionExistsSfinae(
- typename std::enable_if<
- unpacket_traits<PacketT>::masked_load_available &&
- std::is_same<
- PacketT,
- decltype(std::declval<const TensorEvaluatorT>().template partialPacket<PacketT>(
- std::declval<IndexT>(),
- std::declval<typename unpacket_traits<PacketT>::mask_t>()))>::value>::type *)
- -> std::true_type;
+ typename std::enable_if<
+ unpacket_traits<PacketT>::masked_load_available &&
+ std::is_same<PacketT,
+ decltype(std::declval<const TensorEvaluatorT>().template partialPacket<PacketT>(
+ std::declval<IndexT>(),
+ std::declval<typename unpacket_traits<PacketT>::mask_t>()))>::value>::type *)
+ -> std::true_type;
template <typename TensorEvaluatorT, typename PacketT, typename IndexT>
static auto functionExistsSfinae(...) -> std::false_type;
typedef decltype(
- functionExistsSfinae<TensorEvaluatorType, PacketType, IndexType>(nullptr)) status;
+ functionExistsSfinae<TensorEvaluatorType, PacketType, IndexType>(nullptr)) status;
static constexpr bool value = status::value;
};
// [from, to) range. If the mask bit is 1, element will be loaded/stored.
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- typename std::enable_if<unpacket_traits<Packet>::masked_load_available,
- typename unpacket_traits<Packet>::mask_t>::type
- mask(int from, int to)
+ typename std::enable_if<unpacket_traits<Packet>::masked_load_available,
+ typename unpacket_traits<Packet>::mask_t>::type
+ mask(int from, int to)
{
const Index packet_size = internal::unpacket_traits<Packet>::size;
eigen_assert(0 <= from && to <= (packet_size + 1) && from < to);
typename Scalar_, typename Index, typename nocontract_t, typename contract_t, int Side,
int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionInputMapper<
- Scalar_, Index, Side,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ Scalar_, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
{
public:
typedef Scalar_ Scalar;
typedef TensorContractionInputMapper<
- Scalar, Index, Side,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
- Self;
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ Self;
typedef TensorContractionSubMapper<
- Scalar, Index, Side,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
- SubMapper;
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ SubMapper;
typedef SubMapper VectorMapper;
typedef SubMapper LinearMapper;
EIGEN_DEVICE_FUNC
TensorContractionInputMapper(
- const TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device> &tensor,
- const nocontract_t &, const nocontract_t &, const contract_t &, const contract_t &)
- : m_impl(tensor.impl().impl())
+ const TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>
+ &tensor,
+ const nocontract_t &, const nocontract_t &, const contract_t &, const contract_t &)
+ : m_impl(tensor.impl().impl())
{
Index patch_rows;
Index patch_depth;
EIGEN_DEVICE_FUNC
TensorContractionInputMapper(const TensorContractionInputMapper &base_mapper)
- : m_impl(base_mapper.m_impl)
+ : m_impl(base_mapper.m_impl)
{
m_patch_cols = base_mapper.m_patch_cols;
m_num_patches = base_mapper.m_num_patches;
private:
friend class TensorContractionSubMapper<
- Scalar, Index, Side,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>;
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>;
// Load coefficient from a patch specified by the "within patch offset"
// (patchId) and the precomputed indices of the first element of the patch.
const Index colOffset = patchOffset / m_fastColStride;
const Index inputCol = colIndex + colOffset * m_in_col_strides;
const Index origInputCol = (m_patch_col_inflate_strides == 1)
- ? inputCol
- : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
+ ? inputCol
+ : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
const Index rowOffset = patchOffset - colOffset * m_colStride;
const Index inputRow = rowIndex + rowOffset * m_in_row_strides;
const Index origInputRow = (m_patch_row_inflate_strides == 1)
- ? inputRow
- : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
+ ? inputRow
+ : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
if (origInputCol < 0 || origInputRow < 0 || origInputCol >= m_inputCols ||
origInputRow >= m_inputRows || (inputCol != origInputCol * m_patch_col_inflate_strides) ||
(inputRow != origInputRow * m_patch_row_inflate_strides))
}
const Index depth = patchId - patchOffset * patchDepth();
const Index inputIndex =
- depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex;
+ depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex;
return m_impl.coeff(inputIndex);
}
}
const Index depth = patchId - patchOffset * patchDepth();
const Index inputIndex =
- depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+ depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
return m_impl.coeff(inputIndex);
}
// span[0] all the way upto (and including) span[1].
const Index depth = patchId - patchOffsets[0] * patchDepth();
const Index inputIndex =
- depth + inputRows[0] * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+ depth + inputRows[0] * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
return m_impl.template partialPacket<Packet>(inputIndex - span[0],
mask<Packet>(span[0], span[1] + 1));
}
// Load partial packets and do bit-wise OR to generate required packet
return internal::por<Packet>(
- loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[0], spans[0],
- patchOffsets2Cols[0], colOffsets[0]),
- loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[1], spans[1],
- patchOffsets2Cols[1], colOffsets[1]));
+ loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[0], spans[0],
+ patchOffsets2Cols[0], colOffsets[0]),
+ loadPartialPacketStandard(rowIndex, colIndex, otherIndex, patchIds[1], spans[1],
+ patchOffsets2Cols[1], colOffsets[1]));
}
// Helper function to load a packet that is present in a single columns.
// no padding
const Index depth = patchId - patchOffsets[0] * patchDepth();
const Index inputIndex =
- depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex;
+ depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex;
return m_impl.template packet<Unaligned>(inputIndex);
}
return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex);
// load.
template <typename PacketT, typename TensorEvaluatorT>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
- !TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+ !TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const
{
const Index packetSize = internal::unpacket_traits<Packet>::size;
// packets.
template <typename PacketT, typename TensorEvaluatorT>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
- TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+ TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const
{
const Index packetSize = internal::unpacket_traits<PacketT>::size;
// no padding
const Index depth = patchId - patchOffset * patchDepth();
const Index inputIndex =
- depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
+ depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
return m_impl.template packet<Unaligned>(inputIndex);
}
computeBaseIndices(Index patchIndex, Index &rowIndex, Index &colIndex, Index &otherIndex) const
{
const size_t NumInputDims =
- array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
+ array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches;
const Index patch2DIndex =
- (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches);
+ (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches);
otherIndex *= m_patchInputStride;
colIndex = patch2DIndex / m_fastOutputRows;
rowIndex = patch2DIndex - colIndex * m_outputRows;
typename Scalar, typename Index, typename nocontract_t, typename contract_t, int Side,
int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionSubMapper<
- Scalar, Index, Side,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
{
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
typedef TensorContractionInputMapper<
- Scalar, Index, Side,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
- ParentMapper;
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ ParentMapper;
typedef TensorContractionSubMapper<
- Scalar, Index, Side,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
- Self;
+ Scalar, Index, Side,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ Self;
typedef Self LinearMapper;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const ParentMapper &base_mapper,
Index vert_offset,
Index horiz_offset)
- : m_depth_offset(vert_offset), m_col_offset(horiz_offset), m_base_mapper(base_mapper)
+ : m_depth_offset(vert_offset), m_col_offset(horiz_offset), m_base_mapper(base_mapper)
{
m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const Self &base_mapper,
Index vert_offset,
Index horiz_offset)
- : m_depth_offset(vert_offset + base_mapper.m_depth_offset),
- m_col_offset(horiz_offset + base_mapper.m_col_offset),
- m_base_mapper(base_mapper.m_base_mapper)
+ : m_depth_offset(vert_offset + base_mapper.m_depth_offset),
+ m_col_offset(horiz_offset + base_mapper.m_col_offset),
+ m_base_mapper(base_mapper.m_base_mapper)
{
m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex);
}
{
typedef decltype(m_base_mapper.m_impl) TensorEvaluatorT;
return m_base_mapper.template loadPacketStandard<Packet, TensorEvaluatorT>(
- i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
+ i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
}
template <typename Packet> EIGEN_DEVICE_FUNC bool aligned(Index) const { return false; }
EIGEN_ALWAYS_INLINE Index maxCol(const Index peeled_k) const
{
const Index max_col =
- (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1)) / fastPatchColStride();
+ (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1)) / fastPatchColStride();
return std::min<Index>(1 + max_col, patchCols());
}
EIGEN_ALWAYS_INLINE Index maxRow(const Index peeled_k, const Index col) const
{
const Index max_row =
- (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1) - col * patchColStride()) /
- fastPatchRowStride();
+ (m_depth_offset + (peeled_k == 0 ? 0 : peeled_k - 1) - col * patchColStride()) /
+ fastPatchRowStride();
return std::min<Index>(1 + max_row, patchRows());
}
}
template <typename PacketT = Packet>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename std::enable_if<
- TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
+ TensorEvaluatorHasPartialPacket<TensorEvaluatorT, PacketT, Index>::value, PacketT>::type
partialPacketNoPadding(const Index depth, const Index baseIndex, Index num_coeffs) const
{
const Index inputIndex = depth + baseIndex;
const Index input_row = m_rowIndex + row * m_base_mapper.m_in_row_strides;
*orig_row = (m_base_mapper.m_patch_row_inflate_strides == 1)
- ? input_row
- : ((input_row >= 0) ? (input_row / m_base_mapper.m_fastInputRowStride) : 0);
+ ? input_row
+ : ((input_row >= 0) ? (input_row / m_base_mapper.m_fastInputRowStride) : 0);
return (*orig_row < 0 || *orig_row >= m_base_mapper.m_inputRows) ||
(input_row != *orig_row * m_base_mapper.m_patch_row_inflate_strides);
const Index input_col = m_colIndex + col * m_base_mapper.m_in_col_strides;
*orig_col = (m_base_mapper.m_patch_col_inflate_strides == 1)
- ? input_col
- : ((input_col >= 0) ? (input_col / m_base_mapper.m_fastInputColStride) : 0);
+ ? input_col
+ : ((input_col >= 0) ? (input_col / m_base_mapper.m_fastInputColStride) : 0);
return (*orig_col < 0 || *orig_col >= m_base_mapper.m_inputCols) ||
(input_col != *orig_col * m_base_mapper.m_patch_col_inflate_strides);
int packet_size, bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment,
int nr>
struct gemm_pack_rhs<
- Scalar, Index,
- TensorContractionSubMapper<
- Scalar, Index, Rhs,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered,
- Alignment>,
- nr, ColMajor, false, false>
+ Scalar, Index,
+ TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>,
+ nr, ColMajor, false, false>
{
typedef TensorContractionSubMapper<
- Scalar, Index, Rhs,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
- SubMapper;
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ SubMapper;
typedef SubMapper DataMapper;
typedef typename packet_traits<Scalar>::type Packet;
const Index idx3 = dm3.baseIndex(r, c);
const Index start_depth =
- ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
+ ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
eigen_assert((max_depth - start_depth) % packet_size == 0);
typename Scalar, typename Index, typename nocontract_t, typename contract_t,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, int nr>
struct gemm_pack_rhs<
- Scalar, Index,
- TensorContractionSubMapper<
- Scalar, Index, Rhs,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>,
- nr, ColMajor, false, false>
+ Scalar, Index,
+ TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>,
+ nr, ColMajor, false, false>
{
typedef TensorContractionSubMapper<
- Scalar, Index, Rhs,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>
- SubMapper;
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ SubMapper;
typedef SubMapper DataMapper;
typedef typename packet_traits<Scalar>::type Packet;
const Index idx3 = dm3.baseIndex(r, c);
const Index start_depth =
- ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
+ ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
const Index max_depth = rhs.maxDepth(peeled_k - k, start_depth);
eigen_assert((max_depth - start_depth) % packet_size == 0);
typename Scalar, typename Index, typename nocontract_t, typename contract_t,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, int nr>
struct gemm_pack_rhs<
- Scalar, Index,
- TensorContractionSubMapper<
- Scalar, Index, Rhs,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>,
- nr, ColMajor, false, false>
+ Scalar, Index,
+ TensorContractionSubMapper<
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>,
+ nr, ColMajor, false, false>
{
typedef TensorContractionSubMapper<
- Scalar, Index, Rhs,
- TensorEvaluator<
- const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>,
- Device>,
- nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>
- SubMapper;
+ Scalar, Index, Rhs,
+ TensorEvaluator<
+ const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>,
+ nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>
+ SubMapper;
typedef SubMapper DataMapper;
EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE)
*/
template <typename Input, typename Kernel, typename OutputKernel = const NoOpOutputKernel>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename internal::conditional<
- internal::traits<Input>::Layout == ColMajor,
- TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions>,
- const TensorContractionOp<
- const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
- const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
- const Kernel>,
- const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
- const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
- const OutputKernel>>,
- TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions>,
- const TensorContractionOp<
- const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
- const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
- const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
- const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
- const Kernel>,
- const OutputKernel>>>::type
+ internal::traits<Input>::Layout == ColMajor,
+ TensorReshapingOp<
+ const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>,
+ const TensorContractionOp<
+ const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+ const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+ const Kernel>,
+ const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+ const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
+ const OutputKernel>>,
+ TensorReshapingOp<
+ const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>,
+ const TensorContractionOp<
+ const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+ const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+ const TensorImagePatchOp<Dynamic, Dynamic, const Input>>,
+ const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>,
+ const Kernel>,
+ const OutputKernel>>>::type
SpatialConvolution(const Input &input, const Kernel &kernel, const Index row_stride = 1,
const Index col_stride = 1, const PaddingType padding_type = PADDING_SAME,
const Index row_in_stride = 1, const Index col_in_stride = 1,
typedef typename internal::traits<Input>::Index TensorIndex;
TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions,
internal::traits<Input>::Layout, TensorIndex>>
- in(input);
+ in(input);
TensorRef<
- Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions,
- internal::traits<Kernel>::Layout, TensorIndex>>
- kern(kernel);
+ Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions,
+ internal::traits<Kernel>::Layout, TensorIndex>>
+ kern(kernel);
EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<Kernel>::Layout,
YOU_MADE_A_PROGRAMMING_MISTAKE)
}
if (padding_explicit)
{
- return choose(
- Cond<internal::traits<Input>::Layout == ColMajor>(),
- kernel.reshape(kernel_dims)
- .contract(input
- .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
- row_in_stride, col_in_stride,
- /*row_inflate_stride=*/1,
- /*col_inflate_stride=*/1, padding_top,
- padding_bottom, padding_left, padding_right,
- /*padding_value=*/0)
- .reshape(pre_contract_dims),
- contract_dims, output_kernel)
- .reshape(post_contract_dims),
- input
- .extract_image_patches(
- kernelRows, kernelCols, row_stride, col_stride, row_in_stride, col_in_stride,
- /*row_inflate_stride=*/1,
- /*col_inflate_stride=*/1, padding_top, padding_bottom, padding_left, padding_right,
- /*padding_value=*/0)
- .reshape(pre_contract_dims)
- .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
- .reshape(post_contract_dims));
+ return choose(Cond<internal::traits<Input>::Layout == ColMajor>(),
+ kernel.reshape(kernel_dims)
+ .contract(input
+ .extract_image_patches(kernelRows, kernelCols, row_stride,
+ col_stride, row_in_stride, col_in_stride,
+ /*row_inflate_stride=*/1,
+ /*col_inflate_stride=*/1, padding_top,
+ padding_bottom, padding_left, padding_right,
+ /*padding_value=*/0)
+ .reshape(pre_contract_dims),
+ contract_dims, output_kernel)
+ .reshape(post_contract_dims),
+ input
+ .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
+ row_in_stride, col_in_stride,
+ /*row_inflate_stride=*/1,
+ /*col_inflate_stride=*/1, padding_top, padding_bottom,
+ padding_left, padding_right,
+ /*padding_value=*/0)
+ .reshape(pre_contract_dims)
+ .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
+ .reshape(post_contract_dims));
}
else
{
return choose(
- Cond<internal::traits<Input>::Layout == ColMajor>(),
- kernel.reshape(kernel_dims)
- .contract(input
- .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
- row_in_stride, col_in_stride, padding_type)
- .reshape(pre_contract_dims),
- contract_dims, output_kernel)
- .reshape(post_contract_dims),
- input
- .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, row_in_stride,
- col_in_stride, padding_type)
- .reshape(pre_contract_dims)
- .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
- .reshape(post_contract_dims));
+ Cond<internal::traits<Input>::Layout == ColMajor>(),
+ kernel.reshape(kernel_dims)
+ .contract(input
+ .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride,
+ row_in_stride, col_in_stride, padding_type)
+ .reshape(pre_contract_dims),
+ contract_dims, output_kernel)
+ .reshape(post_contract_dims),
+ input
+ .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, row_in_stride,
+ col_in_stride, padding_type)
+ .reshape(pre_contract_dims)
+ .contract(kernel.reshape(kernel_dims), contract_dims, output_kernel)
+ .reshape(post_contract_dims));
}
}
int hpad = h + params.padding_values.height;
int wpad = w + params.padding_values.width;
int h_start =
- (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+ (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
int h_end = std::min(hpad / stride_height + 1, output_height);
int w_start =
- (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+ (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
int w_end = std::min(wpad / stride_width + 1, output_width);
// compute elementwise sum
for (int ph = h_start; ph < h_end; ++ph)
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
const int filter_count =
- (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+ (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
memset(acc, 0, tranche_depth * sizeof(acc[0]));
const uint8_t *input_ptr =
- input_data + depth_base +
- depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+ input_data + depth_base +
+ depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
for (int fy = filter_y_start; fy < filter_y_end; fy++)
{
const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
const int filter_count =
- (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+ (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
memset(acc, 0, tranche_depth * sizeof(acc[0]));
const uint8_t *input_ptr =
- input_data + depth_base +
- depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+ input_data + depth_base +
+ depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
for (int fy = filter_y_start; fy < filter_y_end; fy++)
{
const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
// Similarly, (*end_index) * block_shape_dim is rounded up too (note that
// end_index is exclusive).
*end_index =
- std::min(input_dim, (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim);
+ std::min(input_dim, (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim);
}
template <typename T>
for (int in_w = in_w_start; in_w < in_w_end; ++in_w)
{
const int out_w =
- in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
+ in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
assert(out_w >= 0);
assert(out_w < output_width);
T *out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
// From this point it is assumed contractually that corresponding dimensions
// in shape0 and shape1 are either (a) equal or (b) one or other equals 1.
const bool swap_inputs =
- params->broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast;
+ params->broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast;
const Shape *shape_a = swap_inputs ? &extended_shape1 : &extended_shape0;
const Shape *shape_b = swap_inputs ? &extended_shape0 : &extended_shape1;
break;
case nnfw::cker::BinaryArithmeticOpType::MUL:
optimized::BroadcastMulDispatchQuant8(
- params, input1_shape, const_cast<uint8_t *>(input1_data), input2_shape,
- const_cast<uint8_t *>(input2_data), output_shape, output_data);
+ params, input1_shape, const_cast<uint8_t *>(input1_data), input2_shape,
+ const_cast<uint8_t *>(input2_data), output_shape, output_data);
break;
case nnfw::cker::BinaryArithmeticOpType::DIV:
case nnfw::cker::BinaryArithmeticOpType::POW:
break;
case nnfw::cker::BinaryArithmeticOpType::POW:
reference::BroadcastBinaryArithmeticOpSlow<float>(
- params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
- GetBinaryArtithmeticFn<op_type, float>());
+ params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+ GetBinaryArtithmeticFn<op_type, float>());
break;
default:
assert(false);
for (; i < bias_size; i++)
{
array_ptr[i] =
- ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max);
+ ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], clamp_min, clamp_max);
}
}
#else // not NEON
for (int i = 0; i < bias_size; i++)
{
array_data[array_offset + i] = ActivationFunctionWithMinMax(
- array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
+ array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
}
}
#endif
const Shape &output_shape, bool *output_data)
{
const int64_t flatsize = // number of data....
- MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i)
{
output_data[i] = F(input1_data[i], input2_data[i]);
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input1_val, input1_multiplier, input1_shift);
+ shifted_input1_val, input1_multiplier, input1_shift);
const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input2_val, input2_multiplier, input2_shift);
+ shifted_input2_val, input2_multiplier, input2_shift);
output_data[i] = F(scaled_input1_val, scaled_input2_val);
}
}
for (int c = 0; c < output_shape.Dims(3); ++c)
{
output_data[Offset(output_shape, b, y, x, c)] =
- F(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
- input2_data[SubscriptToIndex(desc2, b, y, x, c)]);
+ F(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+ input2_data[SubscriptToIndex(desc2, b, y, x, c)]);
}
}
}
for (int c = 0; c < output_shape.Dims(3); ++c)
{
const int32_t input1_val =
- input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)];
+ input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32_t input2_val =
- input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)];
+ input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input1_val, input1_multiplier, input1_shift);
+ shifted_input1_val, input1_multiplier, input1_shift);
const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input2_val, input2_multiplier, input2_shift);
+ shifted_input2_val, input2_multiplier, input2_shift);
output_data[Offset(output_shape, b, y, x, c)] = F(scaled_input1_val, scaled_input2_val);
}
}
}
}
-#define TFLITE_COMPARISON_OP(name) \
- template <typename T> \
- inline void name(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape, \
- const T *input2_data, const Shape &output_shape, bool *output_data) \
- { \
- Comparison<name##Fn>(input1_shape, input1_data, input2_shape, input2_data, output_shape, \
- output_data); \
- } \
- template <typename T> \
- inline void name##NoScaling(const Shape &input1_shape, const T *input1_data, \
- const Shape &input2_shape, const T *input2_data, \
- const Shape &output_shape, bool *output_data) \
- { \
- ComparisonImpl<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data, \
- output_shape, output_data); \
- } \
- template <typename T> \
- inline void name##WithScaling(ComparisonParams ¶ms, const Shape &input1_shape, \
- const T *input1_data, const Shape &input2_shape, \
- const T *input2_data, const Shape &output_shape, \
- bool *output_data) \
- { \
- ComparisonWithScaling<T, name##Fn>(params, input1_shape, input1_data, input2_shape, \
- input2_data, output_shape, output_data); \
- } \
- template <typename T> \
- inline void Broadcast4DSlow##name##NoScaling(const Shape &input1_shape, const T *input1_data, \
- const Shape &input2_shape, const T *input2_data, \
- const Shape &output_shape, bool *output_data) \
- { \
- BroadcastComparison4DSlowImpl<T, name##Fn>(input1_shape, input1_data, input2_shape, \
- input2_data, output_shape, output_data); \
- } \
- template <typename T> \
- inline void Broadcast4DSlow##name(const Shape &input1_shape, const T *input1_data, \
- const Shape &input2_shape, const T *input2_data, \
- const Shape &output_shape, bool *output_data) \
- { \
- BroadcastComparison4DSlow<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data, \
- output_shape, output_data); \
- } \
- template <typename T> \
- inline void Broadcast4DSlow##name##WithScaling(ComparisonParams ¶ms, \
- const Shape &input1_shape, const T *input1_data, \
- const Shape &input2_shape, const T *input2_data, \
- const Shape &output_shape, bool *output_data) \
- { \
- BroadcastComparison4DSlowWithScaling<T, name##Fn>( \
- params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); \
+#define TFLITE_COMPARISON_OP(name) \
+ template <typename T> \
+ inline void name(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape, \
+ const T *input2_data, const Shape &output_shape, bool *output_data) \
+ { \
+ Comparison<name##Fn>(input1_shape, input1_data, input2_shape, input2_data, output_shape, \
+ output_data); \
+ } \
+ template <typename T> \
+ inline void name##NoScaling(const Shape &input1_shape, const T *input1_data, \
+ const Shape &input2_shape, const T *input2_data, \
+ const Shape &output_shape, bool *output_data) \
+ { \
+ ComparisonImpl<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data, \
+ output_shape, output_data); \
+ } \
+ template <typename T> \
+ inline void name##WithScaling( \
+ ComparisonParams ¶ms, const Shape &input1_shape, const T *input1_data, \
+ const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data) \
+ { \
+ ComparisonWithScaling<T, name##Fn>(params, input1_shape, input1_data, input2_shape, \
+ input2_data, output_shape, output_data); \
+ } \
+ template <typename T> \
+ inline void Broadcast4DSlow##name##NoScaling(const Shape &input1_shape, const T *input1_data, \
+ const Shape &input2_shape, const T *input2_data, \
+ const Shape &output_shape, bool *output_data) \
+ { \
+ BroadcastComparison4DSlowImpl<T, name##Fn>(input1_shape, input1_data, input2_shape, \
+ input2_data, output_shape, output_data); \
+ } \
+ template <typename T> \
+ inline void Broadcast4DSlow##name(const Shape &input1_shape, const T *input1_data, \
+ const Shape &input2_shape, const T *input2_data, \
+ const Shape &output_shape, bool *output_data) \
+ { \
+ BroadcastComparison4DSlow<T, name##Fn>(input1_shape, input1_data, input2_shape, input2_data, \
+ output_shape, output_data); \
+ } \
+ template <typename T> \
+ inline void Broadcast4DSlow##name##WithScaling( \
+ ComparisonParams ¶ms, const Shape &input1_shape, const T *input1_data, \
+ const Shape &input2_shape, const T *input2_data, const Shape &output_shape, bool *output_data) \
+ { \
+ BroadcastComparison4DSlowWithScaling<T, name##Fn>( \
+ params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data); \
}
TFLITE_COMPARISON_OP(Equal);
for (int j = 0; j < copy_size; ++j)
{
const int32_t value =
- static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+ static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
}
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_DEPTH_TO_SPACE_H__
+#define __NNFW_CKER_DEPTH_TO_SPACE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+inline void DepthToSpace(const Shape &unextended_input_shape, const T *input_data,
+ const Shape &unextended_output_shape, T *output_data, int32_t block_size)
+{
+ assert(unextended_input_shape.DimensionsCount() <= 4);
+ assert(unextended_output_shape.DimensionsCount() <= 4);
+ const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+ const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+ const int input_depth = input_shape.Dims(3);
+ const int input_width = input_shape.Dims(2);
+ const int input_height = input_shape.Dims(1);
+
+ const int output_depth = output_shape.Dims(3);
+ const int batch_size = output_shape.Dims(0);
+
+ // Number of continuous values that we can copy in one interation.
+ const int stride = block_size * output_depth;
+
+ for (int batch = 0; batch < batch_size; ++batch)
+ {
+ for (int in_h = 0; in_h < input_height; ++in_h)
+ {
+ const T *input_ptr = input_data + Offset(input_shape, batch, in_h, 0, 0);
+ for (int offset_h = 0; offset_h < block_size; ++offset_h)
+ {
+ const T *src = input_ptr;
+ for (int in_w = 0; in_w < input_width; ++in_w)
+ {
+ memcpy(output_data, src, stride * sizeof(T));
+ output_data += stride;
+ src += input_depth;
+ }
+ input_ptr += stride;
+ }
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SPACE_TO_DEPTH_H__
#include "cker/Types.h"
#include "cker/Utils.h"
#include "cker/neon/neon_check.h"
+#include "cker/operation/optimized/DepthwiseConvFloat.h"
#include "cker/operation/optimized/DepthwiseConvUint8.h"
+#include "cker/CpuBackendThreadpool.h"
namespace nnfw
{
namespace cker
{
-inline void DepthwiseConv(const DepthwiseConvParams ¶ms, const Shape &input_shape,
- const uint8_t *input_data, const Shape &filter_shape,
- const uint8_t *filter_data, const Shape &bias_shape,
- const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+// TODO(luwa): add multithread to per-channel depthwise_conv
+// DepthwiseConv can run with multi threads on the dim specified by thread_dim.
+// Each thread processes output elements on dim, thread_dim, in the range of
+// [thread_start, thread_end).
+// For example, assume thread_start = 2, thread_end = 6, and thread_dim = 1, it
+// means that it will calculate DepthwiseConv for output_data[:, 2:5, :, :].
+template <typename T, typename TS> struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task
{
- const int depth_multiplier = params.depth_multiplier;
- const int32_t output_activation_min = params.quantized_activation_min;
- const int32_t output_activation_max = params.quantized_activation_max;
- const int dilation_width_factor = params.dilation_width_factor;
- const int dilation_height_factor = params.dilation_height_factor;
- assert(dilation_width_factor >= 1);
- assert(dilation_height_factor >= 1);
- UNUSED_RELEASE(dilation_width_factor);
- UNUSED_RELEASE(dilation_height_factor);
- assert(input_shape.DimensionsCount() == 4);
- assert(filter_shape.DimensionsCount() == 4);
- assert(output_shape.DimensionsCount() == 4);
- assert(output_activation_min <= output_activation_max);
- UNUSED_RELEASE(output_activation_min);
- UNUSED_RELEASE(output_activation_max);
- const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
- const int input_depth = input_shape.Dims(3);
- assert(output_depth == input_depth * depth_multiplier);
- assert(bias_shape.FlatSize() == output_depth);
- UNUSED_RELEASE(input_depth);
- UNUSED_RELEASE(output_depth);
- UNUSED_RELEASE(depth_multiplier);
-
-// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
-// Jetson TX-2. This compiler does not support the offsetof() macro.
-#if defined(__aarch64__)
-// TODO Use below codes
-
-// const int stride_width = params.stride_width;
-// const int stride_height = params.stride_height;
-// const int pad_width = params.padding_values.width;
-// const int pad_height = params.padding_values.height;
-// const int output_shift = params.output_shift;
-//
-// // Call kernel optimized for depthwise convolutions using 3x3 filters if
-// // parameters are supported.
-// if (Fast3x3FilterKernelSupported(
-// input_shape, filter_shape, stride_width, stride_height,
-// dilation_width_factor, dilation_height_factor, pad_width, pad_height,
-// depth_multiplier, output_shape, output_shift)) {
-// DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape,
-// filter_data, bias_shape, bias_data, output_shape,
-// output_data);
-// return;
-// }
-#endif
-
- optimized::DepthwiseConvGeneral(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data);
+ DepthwiseConvWorkerTask(const DepthwiseConvParams ¶ms, const Shape &input_shape,
+ const T *input_data, const Shape &filter_shape, const T *filter_data,
+ const Shape &bias_shape, const TS *bias_data, const Shape &output_shape,
+ T *output_data, int thread_start, int thread_end, int thread_dim)
+ : params_(params), input_shape_(input_shape), input_data_(input_data),
+ filter_shape_(filter_shape), filter_data_(filter_data), bias_shape_(bias_shape),
+ bias_data_(bias_data), output_shape_(output_shape), output_data_(output_data),
+ thread_start_(thread_start), thread_end_(thread_end), thread_dim_(thread_dim)
+ {
+ }
+
+ void Run() override
+ {
+ optimized::DepthwiseConvImpl(params_, input_shape_, input_data_, filter_shape_, filter_data_,
+ bias_shape_, bias_data_, output_shape_, output_data_,
+ thread_start_, thread_end_, thread_dim_);
+ }
+
+private:
+ const DepthwiseConvParams ¶ms_;
+ const Shape &input_shape_;
+ const T *input_data_;
+ const Shape &filter_shape_;
+ const T *filter_data_;
+ const Shape &bias_shape_;
+ const TS *bias_data_;
+ const Shape &output_shape_;
+ T *output_data_;
+ // const CpuFlags& cpu_flags_;
+ int thread_start_;
+ int thread_end_;
+ int thread_dim_;
+};
+
+inline int HowManyConvThreads(const Shape &output_shape, const Shape &filter_shape)
+{
+ // How many scalar multiplications are needed to make it worth using one
+ // more thread
+ static constexpr int kMinMulPerThread = 1 << 13; // 8k
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int num_muls = output_shape.FlatSize() * filter_height * filter_width;
+ // Try to avoid real runtime divisions if possible by dividing by a
+ // compile-time constant.
+ int thread_count = std::max(1, num_muls / kMinMulPerThread);
+ return thread_count;
+}
+
+inline bool MultithreadAlongBatches(int thread_count, int batches)
+{
+ assert(thread_count >= 2);
+ // If there are fewer batch entries than the number of threads we want to use,
+ // then better do intra-batch-entry multithreading.
+ if (batches < thread_count)
+ {
+ return false;
+ }
+ // If there are at least 2 batch entries to be handed to each thread, then
+ // it's safe to proceed with batch-wise multithreading: each thread will have
+ // approximately equal number of batch entries to handle, so the load
+ // balancing will be reasonable, and the amount to which the load is not
+ // perfectly balanced will be offset by the inherent advantages of
+ // batch-wise multithreading (each thread is more efficient thanks to working
+ // on larger buffers with less boundary-handling overhead).
+ if (batches >= 2 * thread_count)
+ {
+ return true;
+ }
+ // In the limit case were there are at least 1 but not much more than 1
+ // batch entries per thread, it may be a good idea to do per-batch
+ // multithreading if the number of batch entries is a multiple of the number
+ // of threads, so that each thread will have the same number of batch entries
+ // to process.
+ return ((batches % thread_count) == 0);
}
+template <typename T, typename TS>
inline void DepthwiseConv(const DepthwiseConvParams ¶ms, const Shape &input_shape,
- const float *input_data, const Shape &filter_shape,
- const float *filter_data, const Shape &bias_shape, const float *bias_data,
- const Shape &output_shape, float *output_data)
+ const T *input_data, const Shape &filter_shape, const T *filter_data,
+ const Shape &bias_shape, const TS *bias_data, const Shape &output_shape,
+ T *output_data, ruy::Context *ruy_context)
{
- const int stride_width = params.stride_width;
- const int stride_height = params.stride_height;
- const int dilation_width_factor = params.dilation_width_factor;
- const int dilation_height_factor = params.dilation_height_factor;
- const int pad_width = params.padding_values.width;
- const int pad_height = params.padding_values.height;
- const int depth_multiplier = params.depth_multiplier;
- const float output_activation_min = params.float_activation_min;
- const float output_activation_max = params.float_activation_max;
assert(input_shape.DimensionsCount() == 4);
assert(filter_shape.DimensionsCount() == 4);
assert(output_shape.DimensionsCount() == 4);
- const int batches = MatchingDim(input_shape, 0, output_shape, 0);
- const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
- const int input_height = input_shape.Dims(1);
- const int input_width = input_shape.Dims(2);
- const int input_depth = input_shape.Dims(3);
- const int filter_height = filter_shape.Dims(1);
- const int filter_width = filter_shape.Dims(2);
+ int thread_count = HowManyConvThreads(output_shape, filter_shape);
+
+ // NOTE Borrow RuyContext to get max_num_threads setting
+ // TODO Define and use max_num_threads for CPU backend
+ const auto max_threads = (ruy_context == nullptr) ? 1 : ruy_context->max_num_threads();
+
+ thread_count = std::max(1, std::min(thread_count, max_threads));
+ // Cap the number of threads to 2 for float path to avoid regression in
+ // performance (b/132294857).
+ if (std::is_floating_point<T>::value)
+ {
+ thread_count = std::min(thread_count, 2);
+ }
+
+ const int output_batches = output_shape.Dims(0);
const int output_height = output_shape.Dims(1);
- const int output_width = output_shape.Dims(2);
- assert(output_depth == input_depth * depth_multiplier);
- assert(bias_shape.FlatSize() == output_depth);
- UNUSED_RELEASE(output_depth);
- UNUSED_RELEASE(bias_shape);
- for (int b = 0; b < batches; ++b)
+ if (thread_count == 1)
+ {
+ optimized::DepthwiseConvImpl(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, 0, output_height,
+ 1);
+ return;
+ }
+
+ int thread_dim, thread_dim_size;
+ if (MultithreadAlongBatches(thread_count, output_batches))
+ {
+ thread_dim = 0;
+ thread_dim_size = output_batches;
+ }
+ else
+ {
+ thread_dim = 1;
+ thread_dim_size = output_height;
+ }
+
+ std::vector<DepthwiseConvWorkerTask<T, TS>> tasks;
+ // TODO(b/131746020) don't create new heap allocations every time.
+ // At least we make it a single heap allocation by using reserve().
+ tasks.reserve(thread_count);
+ int thread_start = 0;
+ for (int i = 0; i < thread_count; ++i)
{
- for (int out_y = 0; out_y < output_height; ++out_y)
- {
- for (int out_x = 0; out_x < output_width; ++out_x)
- {
- for (int ic = 0; ic < input_depth; ++ic)
- {
- for (int m = 0; m < depth_multiplier; m++)
- {
- const int oc = m + ic * depth_multiplier;
- const int in_x_origin = (out_x * stride_width) - pad_width;
- const int in_y_origin = (out_y * stride_height) - pad_height;
- float total = 0.f;
- for (int filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int in_x = in_x_origin + dilation_width_factor * filter_x;
- const int in_y = in_y_origin + dilation_height_factor * filter_y;
- // If the location is outside the bounds of the input image,
- // use zero as a default value.
- if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
- {
- float input_value = input_data[Offset(input_shape, b, in_y, in_x, ic)];
- float filter_value = filter_data[Offset(filter_shape, 0, filter_y, filter_x, oc)];
- total += (input_value * filter_value);
- }
- }
- }
- float bias_value = 0.0f;
- if (bias_data)
- {
- bias_value = bias_data[oc];
- }
- output_data[Offset(output_shape, b, out_y, out_x, oc)] = ActivationFunctionWithMinMax(
- total + bias_value, output_activation_min, output_activation_max);
- }
- }
- }
- }
+ int thread_end = thread_start + (thread_dim_size - thread_start) / (thread_count - i);
+ tasks.emplace_back(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+ bias_data, output_shape, output_data, thread_start, thread_end, thread_dim);
+ thread_start = thread_end;
}
+ cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), ruy_context);
}
} // namespace cker
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_ELU_H__
+#define __NNFW_CKER_ELU_H__
+
+#include "cker/Shape.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void ELU(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const float val = input_data[i];
+ output_data[i] = val < 0.0 ? std::exp(val) - 1 : val;
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ELU_H__
for (int label = 0; label < num_labels; ++label)
{
bool removed = (_output_label_counts[label] == 0);
- bool unique = num_inputs == 1 || _input_label_counts[0][label] == 0 ||
- _input_label_counts[1][label] == 0;
+ bool unique =
+ num_inputs == 1 || _input_label_counts[0][label] == 0 || _input_label_counts[1][label] == 0;
_label_types[label] = getDimensionType(removed, unique);
}
}
if (inputs[i].shape.DimensionsCount() + 1 < (int32_t)labels->size())
{
throw std::runtime_error{"Expected input " + std::to_string(i) + " to have rank at least " +
- std::to_string(labels->size() - 1) + " but got: " +
- std::to_string(inputs[i].shape.DimensionsCount())};
+ std::to_string(labels->size() - 1) +
+ " but got: " + std::to_string(inputs[i].shape.DimensionsCount())};
}
int ellipsis_axis = -1;
const int num_bcast_dims = inputs[i].shape.DimensionsCount() - labels->size() + 1;
}
std::vector<bool>::iterator it_input =
- std::find(_input_has_ellipsis.begin(), _input_has_ellipsis.end(), true);
+ std::find(_input_has_ellipsis.begin(), _input_has_ellipsis.end(), true);
if (it_input == _input_has_ellipsis.end() && !_output_has_ellipsis)
{
return;
// Reduce along the last axis (i.e axis 1) of the rank-2 Tensor.
const int32_t output_size =
- reshape[kBroadcasting] * reshape[kBatch] * reshape[kFree] * reshape[kContract];
+ reshape[kBroadcasting] * reshape[kBatch] * reshape[kFree] * reshape[kContract];
functor::ReduceFunctor<Eigen::ThreadPoolDevice, Reducer>::Reduce(
- device, output->shaped<T, 1>({output_size}),
- input_deduped.shaped<T, 2>({output_size, reshape[kReduce]}), Eigen::array<Index, 1>({1}),
- Reducer());
+ device, output->shaped<T, 1>({output_size}),
+ input_deduped.shaped<T, 2>({output_size, reshape[kReduce]}), Eigen::array<Index, 1>({1}),
+ Reducer());
}
bool shouldSwapFreeAndContract(const Labels &labels,
{
const int32_t count = label_counts[label];
const int current_axis =
- should_inflate ? strided_shape_dims.size() : inflated_shape_dims.size();
+ should_inflate ? strided_shape_dims.size() : inflated_shape_dims.size();
const int32_t dim = input.shape.Dims(current_axis);
strided_shape_dims.push_back(dim);
inflated_shape_dims.insert(inflated_shape_dims.end(), count, dim);
for (size_t i = 0; i < inputs.size(); ++i)
{
const int32_t free_axis =
- inputs[i].shape.DimensionsCount() - (swap_free_and_contract[i] ? 1 : 2);
+ inputs[i].shape.DimensionsCount() - (swap_free_and_contract[i] ? 1 : 2);
output_shape.SetDim(i + old_output_shape.DimensionsCount(), inputs[i].shape.Dims(free_axis));
}
bool adj_x = swap_free_and_contract[0];
}
}
+inline void Sqrt(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = std::sqrt(input_data[i]);
+ }
+}
+
+inline void Square(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ float *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input_data[i] * input_data[i];
+ }
+}
+
} // namespace cker
} // namespace nnfw
{
namespace cker
{
-template <typename T>
-inline void Fill(const Shape &input_shape, int *input_data, const T value_data,
- const Shape &output_shape, T output_data)
+template <typename T> inline void Fill(const T value_data, const Shape &output_shape, T output_data)
{
- int input_size = input_shape.FlatSize();
- int output_size = 1;
- for (int i = 0; i < input_size; i++)
+ int output_size = output_shape.FlatSize();
+ for (int i = 0; i < output_size; i++)
{
- output_size *= input_data[i];
- }
-
- if (output_size == output_shape.FlatSize())
- {
- for (int i = 0; i < output_size; i++)
- {
- output_data[i] = *value_data;
- }
- }
- else
- {
- throw std::runtime_error("Cker Fill.h: output's size is not matched inferred size of output");
+ output_data[i] = *value_data;
}
}
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth =
- MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1);
+ MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1);
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b)
{
const int weights_dims_count = weights_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
const int output_depth =
- MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
+ MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
UNUSED_RELEASE(bias_shape);
{
int idx_1 = w1_indices[pw1];
output_data[b * output_depth + idx_0] +=
- weights_data[pw1] * input_data[b * accum_depth + idx_1];
+ weights_data[pw1] * input_data[b * accum_depth + idx_1];
}
}
}
const int weights_dims_count = weights_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
const int output_depth =
- MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
+ MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
UNUSED_RELEASE(bias_shape);
float rest_size_inv = static_cast<float>(1.0f / static_cast<float>(rest_size));
// This adjustment is for Bessel's correction
float rest_size_adjust =
- static_cast<float>(rest_size) / static_cast<float>(rest_size_minus_one);
+ static_cast<float>(rest_size) / static_cast<float>(rest_size_minus_one);
Eigen::Tensor<float, 1, Eigen::RowMajor> batch_mean(depth);
Eigen::Tensor<float, 1, Eigen::RowMajor> batch_variance(depth);
batch_variance.device(d) = x_centered.square().sum(reduce_dims) * rest_size_inv;
auto scaling_factor = ((batch_variance + param.epsilon).rsqrt() * scale)
- .eval()
- .reshape(one_by_depth)
- .broadcast(bcast_spec);
+ .eval()
+ .reshape(one_by_depth)
+ .broadcast(bcast_spec);
auto x_scaled = x_centered * scaling_factor;
auto x_shifted =
- (x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec)).template cast<float>();
+ (x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec)).template cast<float>();
UNUSED_RELEASE(rest_size_adjust);
* ToDo : This file will be moved into upper folder when integrate with other
* custom operations.
* And It should merged with EinsumHelper's BCast.
-**/
+ **/
#include "cker/Shape.h"
#include "cker/eigen/EigenSupport.h"
BCast(const Vec &x, const Vec &y, const bool fewer_dims_optimization = true,
const bool return_flattened_batch_indices = false)
- : BCastList<2>({x, y}, fewer_dims_optimization, return_flattened_batch_indices)
+ : BCastList<2>({x, y}, fewer_dims_optimization, return_flattened_batch_indices)
{
}
// Must have lo < hi
UniformDistribution(int32_t lo, int32_t hi)
- : lo_(lo), range_(static_cast<uint32_t>(hi) - static_cast<uint32_t>(lo))
+ : lo_(lo), range_(static_cast<uint32_t>(hi) - static_cast<uint32_t>(lo))
{
}
// Must have lo < hi
UniformDistribution(int64_t lo, int64_t hi)
- : lo_(lo), range_(static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo))
+ : lo_(lo), range_(static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo))
{
}
template <typename Generator>
class UniformFullIntDistribution<Generator, int32_t>
- : public UniformFullIntDistribution32<Generator, int32_t>
+ : public UniformFullIntDistribution32<Generator, int32_t>
{
};
template <typename Generator>
class UniformFullIntDistribution<Generator, uint32_t>
- : public UniformFullIntDistribution32<Generator, uint32_t>
+ : public UniformFullIntDistribution32<Generator, uint32_t>
{
};
template <typename Generator>
class UniformFullIntDistribution<Generator, int64_t>
- : public UniformFullIntDistribution64<Generator, int64_t>
+ : public UniformFullIntDistribution64<Generator, int64_t>
{
};
template <typename Generator>
class UniformFullIntDistribution<Generator, uint64_t>
- : public UniformFullIntDistribution64<Generator, uint64_t>
+ : public UniformFullIntDistribution64<Generator, uint64_t>
{
};
PHILOX_DEVICE_INLINE
explicit SingleSampleAdapter(Generator *gen)
- : generator_(gen), used_result_index_(Generator::kResultElementCount)
+ : generator_(gen), used_result_index_(Generator::kResultElementCount)
{
}
public:
// The number of elements that will be returned.
static constexpr int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1)
- ? SingleSampleGenerator::kNativeElementCount / 2
- : 1;
+ ? SingleSampleGenerator::kNativeElementCount / 2
+ : 1;
// Cost of generation of a single element (in cycles).
static constexpr int kElementCost = 90;
// Indicate that this distribution may take variable number of samples
{
const int kGroupSize = Distribution::kResultElementCount;
static const int kGeneratorSkipPerOutputGroup =
- kGroupSize * kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount;
+ kGroupSize * kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount;
int64_t offset = 0;
{
// Rank-<NDIMS> tensor of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>, Eigen::Aligned>
- Tensor;
+ Tensor;
typedef Eigen::TensorMap<Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
- ConstTensor;
+ ConstTensor;
// Unaligned Rank-<NDIMS> tensor of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>> UnalignedTensor;
typedef Eigen::TensorMap<Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType>>
- UnalignedConstTensor;
+ UnalignedConstTensor;
typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, int>, Eigen::Aligned>
- Tensor32Bit;
+ Tensor32Bit;
// Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
typedef Eigen::TensorMap<Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
- Scalar;
+ Scalar;
typedef Eigen::TensorMap<
- Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>, Eigen::Aligned>
- ConstScalar;
+ Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>, Eigen::Aligned>
+ ConstScalar;
// Unaligned Scalar tensor of scalar type T.
typedef Eigen::TensorMap<Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>>
- UnalignedScalar;
+ UnalignedScalar;
typedef Eigen::TensorMap<
- Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>>
- UnalignedConstScalar;
+ Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>>
+ UnalignedConstScalar;
// Rank-1 tensor (vector) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned> Flat;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
- ConstFlat;
+ ConstFlat;
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned> Vec;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
- ConstVec;
+ ConstVec;
// Unaligned Rank-1 tensor (vector) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>> UnalignedFlat;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>>
- UnalignedConstFlat;
+ UnalignedConstFlat;
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>> UnalignedVec;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>> UnalignedConstVec;
// Rank-2 tensor (matrix) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned> Matrix;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned>
- ConstMatrix;
+ ConstMatrix;
// Unaligned Rank-2 tensor (matrix) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType>> UnalignedMatrix;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType>>
- UnalignedConstMatrix;
+ UnalignedConstMatrix;
};
typedef typename TTypes<float, 1>::Tensor32Bit::Index Index32;
double input_value = input_data[Offset(output_shape, batch, height, width, channel)];
double output_value = input_value * a + b;
output_data[Offset(output_shape, batch, height, width, channel)] =
- ActivationFunctionWithMinMax((float)output_value, output_activation_min,
- output_activation_max);
+ ActivationFunctionWithMinMax((float)output_value, output_activation_min,
+ output_activation_max);
}
}
}
{
int32_t diff = *input_data - input_zero_point;
int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
+ 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
int32_t unclamped_output_val = 128 + rescaled_diff;
int32_t output_val = std::min(static_cast<int32_t>(255),
std::max(static_cast<int32_t>(0), unclamped_output_val));
// contiguous, and we manually loop over the batched outputs.
// LINT.IfChange
inline void LstmStepFloat(
- const float *input_ptr, const float *input_to_input_weights_ptr,
- const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr,
- const float *input_to_output_weights_ptr, const float *aux_input_ptr,
- const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr,
- const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr,
- const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr,
- const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr,
- const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr,
- const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr,
- const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr,
- const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr,
- const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr,
- const float *output_gate_bias_ptr, const float *projection_weights_ptr,
- const float *projection_bias_ptr, const LSTMParams *params, int n_batch, int n_cell,
- int n_input, int n_aux_input, int n_output, int output_batch_leading_dim,
- float *output_state_ptr, float *cell_state_ptr, float *scratch0, float *scratch1,
- float *scratch2, float *scratch3, float *output_ptr)
+ const float *input_ptr, const float *input_to_input_weights_ptr,
+ const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr,
+ const float *input_to_output_weights_ptr, const float *aux_input_ptr,
+ const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr,
+ const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr,
+ const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr,
+ const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr,
+ const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr,
+ const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr,
+ const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr,
+ const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr,
+ const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr,
+ const float *output_gate_bias_ptr, const float *projection_weights_ptr,
+ const float *projection_bias_ptr, const LSTMParams *params, int n_batch, int n_cell, int n_input,
+ int n_aux_input, int n_output, int output_batch_leading_dim, float *output_state_ptr,
+ float *cell_state_ptr, float *scratch0, float *scratch1, float *scratch2, float *scratch3,
+ float *output_ptr)
{
// Since we have already checked that weights are all there or none, we can
// check the existence of only one to the get the condition.
// Check if inputs are all zeros so we can skip some computations.
const bool is_input_all_zeros = IsZeroVector(input_ptr, n_batch * n_input);
const bool is_aux_input_all_zeros =
- (aux_input_ptr == nullptr || IsZeroVector(aux_input_ptr, n_batch * n_aux_input));
+ (aux_input_ptr == nullptr || IsZeroVector(aux_input_ptr, n_batch * n_aux_input));
if (!use_cifg)
{
// Calculate the input gate. (If not CIFG.)
forget_gate_scratch, is_input_all_zeros, is_aux_input_all_zeros);
// Calculate the cell update gate.
CalculateLstmGateFloat(
- input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr,
- output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr,
- /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr,
- n_batch, n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch,
- is_input_all_zeros, is_aux_input_all_zeros);
+ input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr,
+ output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr,
+ /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr, n_batch,
+ n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch,
+ is_input_all_zeros, is_aux_input_all_zeros);
// Update the cell state.
UpdateLstmCellFloat(n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch,
cell_gate_scratch, use_cifg, params->cell_clip);
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_LEKAY_RELU_H__
+#define __NNFW_CKER_LEKAY_RELU_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void LeakyReLU(const LeakyReluParams ¶ms, const Shape &input_shape,
+ const float *input_data, const Shape &output_shape, float *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ const float val = input_data[i];
+ // Note that alpha might be > 1 or < 0, so we don't use std::max here.
+ output_data[i] = val > 0 ? val : val * params.alpha;
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_RELU_H__
for (int c = 0; c < depth; ++c)
{
output_data[(i * depth + c) * inner_size + j] =
- (input_data[(i * depth + c) * inner_size + j] - max) * beta - log_sum;
+ (input_data[(i * depth + c) * inner_size + j] - max) * beta - log_sum;
}
}
}
for (int c = 0; c < depth; ++c)
{
const float log_prob =
- scale * input_data[(i * depth + c) * inner_size] * beta - precomputed;
+ scale * input_data[(i * depth + c) * inner_size] * beta - precomputed;
const int32_t prob_quantized = std::rint(log_prob) + params.zero_point;
output_data[(i * depth + c) * inner_size] =
- static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+ static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
}
}
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_LOGICAL_AND_H__
+#define __NNFW_CKER_LOGICAL_AND_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+inline void LogicalAndBroadcast(const Shape &unextended_input1_shape, const T *input1_data,
+ const Shape &unextended_input2_shape, const T *input2_data,
+ const Shape &unextended_output_shape, T *output_data)
+{
+ assert(unextended_input1_shape.DimensionsCount() <= 4);
+ assert(unextended_input2_shape.DimensionsCount() <= 4);
+ assert(unextended_output_shape.DimensionsCount() <= 4);
+ const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+ &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < output_shape.Dims(3); ++c)
+ {
+ auto out_idx = Offset(output_shape, b, y, x, c);
+ auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+ auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+ auto in1_val = input1_data[in1_idx];
+ auto in2_val = input2_data[in2_idx];
+ output_data[out_idx] = in1_val && in2_val;
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void LogicalAndElementwise(const Shape &shape, const T *input1_data, const T *input2_data,
+ T *output_data)
+{
+
+ int num_elements = shape.FlatSize();
+
+ for (int t = 0; t < num_elements; t++)
+ {
+ output_data[t] = input1_data[t] && input2_data[t];
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_LOGICAL_AND_H__
if (!(num_lower_diags <= row_num))
throw std::runtime_error(
- "MatrixBandPart : num_lower must be negative or less or equal to number of rows");
+ "MatrixBandPart : num_lower must be negative or less or equal to number of rows");
if (!(num_upper_diags <= col_num))
throw std::runtime_error(
- "MatrixBandPart : num_upper must be negative or less or equal to number of columns");
+ "MatrixBandPart : num_upper must be negative or less or equal to number of columns");
std::fill(output_data, output_data + output_shape.FlatSize(), 0); // output matrix init
auto input = input_data + (batch * row_num * col_num + row * col_num);
const T band_start =
- num_lower_diags < 0 ? 0 : std::min(col_num, std::max(T{0}, row - num_lower_diags));
- const T band_end = num_upper_diags < 0 ? col_num : std::min(static_cast<T>(col_num),
- row + num_upper_diags + 1);
+ num_lower_diags < 0 ? 0 : std::min(col_num, std::max(T{0}, row - num_lower_diags));
+ const T band_end = num_upper_diags < 0
+ ? col_num
+ : std::min(static_cast<T>(col_num), row + num_upper_diags + 1);
for (T band_idx = band_start; band_idx < band_end; band_idx++)
{
int hpad = h + params.padding_values.height;
int wpad = w + params.padding_values.width;
int h_start =
- (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+ (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
int h_end = std::min(hpad / stride_height + 1, output_height);
int w_start =
- (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+ (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
int w_end = std::min(wpad / stride_width + 1, output_width);
// compute elementwise sum
for (int ph = h_start; ph < h_end; ++ph)
{
int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
out_mat.col(out_offset) =
- out_mat.col(out_offset)
- .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width)));
+ out_mat.col(out_offset)
+ .cwiseMax(in_mat.col(NodeOffset(b, h, w, input_height, input_width)));
}
}
}
const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
memset(acc, 0, tranche_depth * sizeof(acc[0]));
const uint8_t *input_ptr =
- input_data + depth_base +
- depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
+ input_data + depth_base +
+ depth * (in_x_origin + input_width * (in_y_origin + input_height * batch));
for (int fy = filter_y_start; fy < filter_y_end; fy++)
{
const uint8_t *input_row_ptr = input_ptr + depth * (fy * input_width + filter_x_start);
for (int k = 0; k < suffix_dim_size; ++k, ++output_data)
{
*output_data =
- static_cast<int>(indices_data[i * suffix_dim_size + k]) == j ? on_value : off_value;
+ static_cast<int>(indices_data[i * suffix_dim_size + k]) == j ? on_value : off_value;
}
}
}
}
int size = (std::is_integral<T>::value
- ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta))
- : std::ceil(std::abs((limit - start) / delta)));
+ ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta))
+ : std::ceil(std::abs((limit - start) / delta)));
return size;
}
{
int r_idx = 0;
float tmp_data[4] = {
- 0,
+ 0,
};
float32x4_t tmp_data_32x4 = vld1q_f32(tmp_data);
for (; r_idx <= reduce_size - 32; r_idx += 32)
{
size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
size_t output_offset =
- ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
+ ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
output_data[output_offset] = reducer(output_data[output_offset], input_data[input_offset]);
} while (NextIndex(input_num_dims, input_dims, input_iter));
return true;
for (size_t idx = 0; idx < num_outputs; ++idx)
{
const U value =
- static_cast<U>(std::round(temp_sum[idx] * scale + bias)) + output_zero_point;
+ static_cast<U>(std::round(temp_sum[idx] * scale + bias)) + output_zero_point;
output_data[idx] = static_cast<T>(value);
}
}
for (size_t idx = 0; idx < num_outputs; ++idx)
{
float float_mean =
- static_cast<float>(temp_sum[idx]) / static_cast<float>(num_elements_in_axis);
+ static_cast<float>(temp_sum[idx]) / static_cast<float>(num_elements_in_axis);
float result = std::min(std::round(float_mean * scale + bias) + output_zero_point,
static_cast<float>(std::numeric_limits<T>::max()));
result = std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
{
size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
size_t output_offset =
- ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
+ ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
output_data[output_offset] =
- reducer(output_data[output_offset], input_data[input_offset], normalizer);
+ reducer(output_data[output_offset], input_data[input_offset], normalizer);
} while (NextIndex(input_num_dims, input_dims, input_iter));
return true;
}
{
size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
size_t output_offset =
- ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
+ ReducedOutputOffset(input_num_dims, input_dims, input_iter, num_axis, axis);
temp_sum[output_offset] = reducer(temp_sum[output_offset], input_data[input_offset]);
} while (NextIndex(input_num_dims, input_dims, input_iter));
return normalizer;
}
size_t normalizer =
- ReduceSumQuantImpl<In>(input_data, input_shape, resolved_axis_data(), num_resolved_axis,
- temp_index_data(), reducer, _temp_sum.data());
+ ReduceSumQuantImpl<In>(input_data, input_shape, resolved_axis_data(), num_resolved_axis,
+ temp_index_data(), reducer, _temp_sum.data());
if (num_outputs > 0)
{
float scale = input_scale / output_scale;
sum_reducer);
}
+template <typename In, typename Out>
+void MeanAxis1And2(const Shape &input_shape, const In *input_data, const Shape &output_shape,
+ Out *output_data)
+{
+ UNUSED_RELEASE(output_shape);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const int output_batch = output_shape.Dims(0);
+ const int output_depth = output_shape.Dims(3);
+
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+
+ for (int out_b = 0; out_b < output_batch; ++out_b)
+ {
+ for (int out_d = 0; out_d < output_depth; ++out_d)
+ {
+ float value = 0;
+ for (int in_h = 0; in_h < input_height; ++in_h)
+ {
+ for (int in_w = 0; in_w < input_width; ++in_w)
+ {
+ value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
+ }
+ }
+ output_data[Offset(output_shape, out_b, 0, 0, out_d)] = value / (input_width * input_height);
+ }
+ }
+}
+
} // namespace cker
} // namespace nnfw
// Bottom right corner.
output_data[output_offset + output_x_offset + output_y_offset] =
- (output + ((x1y0 + x1y1) / 2)) / 2;
+ (output + ((x1y0 + x1y1) / 2)) / 2;
}
}
&x1);
int32_t input_offset[4] = {
- Offset(input_shape, b, y0, x0, 0), Offset(input_shape, b, y0, x1, 0),
- Offset(input_shape, b, y1, x0, 0), Offset(input_shape, b, y1, x1, 0)};
+ Offset(input_shape, b, y0, x0, 0), Offset(input_shape, b, y0, x1, 0),
+ Offset(input_shape, b, y1, x0, 0), Offset(input_shape, b, y1, x1, 0)};
float scale[4] = {(1 - (input_y - y0)) * (1 - (input_x - x0)),
(1 - (input_y - y0)) * (input_x - x0),
(input_y - y0) * (1 - (input_x - x0)), (input_y - y0) * (input_x - x0)};
{
const T *input_ptr = &input_data[d];
*output_ptr++ = static_cast<T>(
- input_ptr[input_offset[0]] * scale[0] + input_ptr[input_offset[1]] * scale[1] +
- input_ptr[input_offset[2]] * scale[2] + input_ptr[input_offset[3]] * scale[3]);
+ input_ptr[input_offset[0]] * scale[0] + input_ptr[input_offset[1]] * scale[1] +
+ input_ptr[input_offset[2]] * scale[2] + input_ptr[input_offset[3]] * scale[3]);
}
}
}
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
float height_scale = (params.align_corners && params.output_height > 1)
- ? (static_cast<float>(input_height - 1) / (params.output_height - 1))
- : (static_cast<float>(input_height) / params.output_height);
+ ? (static_cast<float>(input_height - 1) / (params.output_height - 1))
+ : (static_cast<float>(input_height) / params.output_height);
float width_scale = (params.align_corners && params.output_width > 1)
- ? (static_cast<float>(input_width - 1) / (params.output_width - 1))
- : (static_cast<float>(input_width) / params.output_width);
+ ? (static_cast<float>(input_width - 1) / (params.output_width - 1))
+ : (static_cast<float>(input_width) / params.output_width);
ResizeBilinearGenericSmallChannel<uint8_t>(
- batches, input_height, input_width, depth, params.output_height, params.output_width,
- height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers);
+ batches, input_height, input_width, depth, params.output_height, params.output_width,
+ height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers);
}
} // namespace cker
} // namespace nnfw
const T *input_y_data, const Shape &output_shape, T *output_data)
{
const int64_t flatsize =
- MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+ MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i)
{
output_data[i] = (input_condition_data[i] != 0) ? input_x_data[i] : input_y_data[i];
const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
output_data[Offset(extended_output_shape, b, y, x, c)] =
- input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+ input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
}
}
}
: start_b + op_params.size[0];
const int start_h = begin_count < 3 ? 0 : op_params.begin[begin_count - 3];
const int stop_h = (size_count < 3 || op_params.size[size_count - 3] == -1)
- ? input_shape.Dims(1)
- : start_h + op_params.size[size_count - 3];
+ ? input_shape.Dims(1)
+ : start_h + op_params.size[size_count - 3];
const int start_w = begin_count < 2 ? 0 : op_params.begin[begin_count - 2];
const int stop_w = (size_count < 2 || op_params.size[size_count - 2] == -1)
- ? input_shape.Dims(2)
- : start_w + op_params.size[size_count - 2];
+ ? input_shape.Dims(2)
+ : start_w + op_params.size[size_count - 2];
const int start_d = begin_count < 1 ? 0 : op_params.begin[begin_count - 1];
const int stop_d = (size_count < 1 || op_params.size[size_count - 1] == -1)
- ? input_shape.Dims(3)
- : start_d + op_params.size[size_count - 1];
+ ? input_shape.Dims(3)
+ : start_d + op_params.size[size_count - 1];
for (int in_b = start_b; in_b < stop_b; ++in_b)
{
for (int c = 0; c < depth; ++c)
{
output_data[i * depth + c] =
- std::exp((input_data[i * depth + c] - max) * static_cast<float>(params.beta)) / sum;
+ std::exp((input_data[i * depth + c] - max) * static_cast<float>(params.beta)) / sum;
}
}
}
if (input_diff >= diff_min)
{
const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
- input_diff, input_beta_multiplier, input_beta_left_shift);
+ input_diff, input_beta_multiplier, input_beta_left_shift);
const FixedPointScaledDiff scaled_diff_f8 =
- FixedPointScaledDiff::FromRaw(input_diff_rescaled);
+ FixedPointScaledDiff::FromRaw(input_diff_rescaled);
sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
- exp_on_negative_values(scaled_diff_f8));
+ exp_on_negative_values(scaled_diff_f8));
}
}
// no later adjustment will be needed.
int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
int32_t shifted_sum_minus_one =
- static_cast<int32_t>((static_cast<uint32_t>(fixed_sum_of_exps) << headroom_plus_one) -
- (static_cast<uint32_t>(1) << 31));
+ static_cast<int32_t>((static_cast<uint32_t>(fixed_sum_of_exps) << headroom_plus_one) -
+ (static_cast<uint32_t>(1) << 31));
FixedPoint0 shifted_scale =
- one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one));
+ one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one));
for (int c = 0; c < depth; ++c)
{
if (input_diff >= diff_min)
{
const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
- input_diff, input_beta_multiplier, input_beta_left_shift);
+ input_diff, input_beta_multiplier, input_beta_left_shift);
const FixedPointScaledDiff scaled_diff_f8 =
- FixedPointScaledDiff::FromRaw(input_diff_rescaled);
+ FixedPointScaledDiff::FromRaw(input_diff_rescaled);
FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
int32_t unsat_output = gemmlowp::RoundingDivideByPOT((shifted_scale * exp_in_0).raw(),
num_bits_over_unit + 31 - 8);
output_data[i * depth + c] = static_cast<uint8_t>(
- std::max(std::min(unsat_output, static_cast<int32_t>(255)), static_cast<int32_t>(0)));
+ std::max(std::min(unsat_output, static_cast<int32_t>(255)), static_cast<int32_t>(0)));
}
else
{
else
{
const T *in =
- input_data + Offset(input_shape, input_batch,
- (out_h * block_shape_height + shift_h) - padding_top,
- (out_w * block_shape_width + shift_w) - padding_left, 0);
+ input_data + Offset(input_shape, input_batch,
+ (out_h * block_shape_height + shift_h) - padding_top,
+ (out_w * block_shape_width + shift_w) - padding_left, 0);
memcpy(out, in, depth * sizeof(T));
}
}
GenerateKey(seed_t, &key, &counter);
Fill<Eigen::ThreadPoolDevice, random::UniformDistribution<random::PhiloxRandom, float>>(
- random::PhiloxRandom(counter, key), &output_t);
+ random::PhiloxRandom(counter, key), &output_t);
}
} // namespace cker
} // namespace nnfw
{
int stride_size = 0, tiled_stride_size = 0;
std::tie(stride_size, tiled_stride_size) =
- TileOneDimension(in_dimensions, copy_from_data, multipliers, copy_to_data, dimension + 1);
+ TileOneDimension(in_dimensions, copy_from_data, multipliers, copy_to_data, dimension + 1);
copy_from_data += stride_size;
copy_to_data += tiled_stride_size;
total_stride_size += stride_size;
const int total_size = shrunk_input_shape.FlatSize();
const int non_flatten_size =
- Flatten(shrunk_input_shape, shrunk_output_shape, shrunk_params,
+ Flatten(shrunk_input_shape, shrunk_output_shape, shrunk_params,
- &non_flatten_input_shape, &non_flatten_output_shape, &non_flatten_params);
+ &non_flatten_input_shape, &non_flatten_output_shape, &non_flatten_params);
assert(non_flatten_params.perm[0] != 0);
for (int i = 0; i < total_size; i += non_flatten_size)
(out_y < output_height))
{
float input_value =
- input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
- float filter_value = filter_data[Offset(filter_shape, out_channel, filter_y,
- filter_x, in_channel)];
+ input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+ float filter_value =
+ filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] +=
- input_value * filter_value;
+ input_value * filter_value;
}
}
}
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input1_val, params.input1_multiplier, params.input1_shift);
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input2_val, params.input2_multiplier, params.input2_shift);
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- raw_sum, params.output_multiplier, params.output_shift) +
+ raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output = std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
const int16x4_t s1_narrowed = vmovn_s32(s1);
const int16x4_t s2_narrowed = vmovn_s32(s2);
const int16x8_t s =
- vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
- const uint8x8_t clamped = vmax_u8(output_activation_min_vector,
- vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
+ vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
+ const uint8x8_t clamped =
+ vmax_u8(output_activation_min_vector, vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
vst1_u8(output_data + i, clamped);
}
#endif // NEON
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input1_val, params.input1_multiplier, params.input1_shift);
+ shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input2_val, params.input2_multiplier, params.input2_shift);
+ shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
- raw_sum, params.output_multiplier, params.output_shift) +
+ raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output = std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
auto a2 = vld1q_f32(input2_data + i);
auto x = OPERATOR::calculate(a1, a2); // vaddq
auto x_clamped =
- ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max);
+ ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max);
vst1q_f32(output_data + i, x_clamped);
}
#endif // USE_NEON
{
auto x = OPERATOR::calculate(input1_data[i], input2_data[i]);
output_data[i] = ACTIVATION::applyCeiling(
- ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max);
+ ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max);
}
}
auto a2 = vld1q_f32(input2_data + i);
auto x = OPERATOR::calculate(broadcast_value_dup, a2);
auto x_clamped =
- ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max);
+ ACTIVATION::applyCeiling(ACTIVATION::applyFloor(x, activation_min), activation_max);
vst1q_f32(output_data + i, x_clamped);
}
#endif // USE_NEON
{
auto x = OPERATOR::calculate(broadcast_value, input2_data[i]);
output_data[i] = ACTIVATION::applyCeiling(
- ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max);
+ ACTIVATION::applyFloor(x, params.float_activation_min), params.float_activation_max);
}
}
using BinaryOpImplFloatFuncs =
- std::pair<void (*)(int, const BinaryArithmeticOpParam &, const float *, const float *, float *),
- void (*)(int, const BinaryArithmeticOpParam &, const float, const float *, float *)>;
+ std::pair<void (*)(int, const BinaryArithmeticOpParam &, const float *, const float *, float *),
+ void (*)(int, const BinaryArithmeticOpParam &, const float, const float *, float *)>;
template <class FUNC>
inline BinaryOpImplFloatFuncs
if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
{
const std::function<uint8_t(const BinaryArithmeticOpParam &, const uint8_t &, const uint8_t &)>
- fn = [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a,
- const uint8_t &b) -> uint8_t {
+ fn =
+ [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a, const uint8_t &b) -> uint8_t {
return static_cast<uint8_t>(quant8_sum(params, a, b));
};
- reference::BroadcastBinaryArithmeticOpSlowQuant8(params, input1_shape, input1_data,
- input2_shape, input2_data, output_shape,
- output_data, fn);
+ reference::BroadcastBinaryArithmeticOpSlowQuant8(
+ params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn);
}
else
{
BinaryBroadcastFiveFold(
- params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
- input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
- static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
- uint8_t *)>(AddElementwiseQuant8),
- static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
- uint8_t *)>(AddScalarBroadcastQuant8));
+ params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
+ input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+ static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
+ uint8_t *)>(AddElementwiseQuant8),
+ static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
+ uint8_t *)>(AddScalarBroadcastQuant8));
}
}
if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
{
const std::function<float(const float &, const float &)> fn =
- [](const float &a, const float &b) -> float { return a + b; };
+ [](const float &a, const float &b) -> float { return a + b; };
reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data, fn);
}
{
auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncAddFloat>(params);
- BinaryBroadcastFiveFold(params, params.broadcast_category ==
- BroadcastableOpCategory::kSecondInputBroadcastsFast,
- input1_shape, input1_data, input2_shape, input2_data, output_shape,
- output_data, implFuncs.first, implFuncs.second);
+ BinaryBroadcastFiveFold(
+ params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
+ input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+ implFuncs.first, implFuncs.second);
}
}
else if (params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast)
{
auto implFuncs =
- getBinaryOpWithActivationImplFloat<BinaryOpFuncSwapArgs<BinaryOpFuncSubFloat>>(params);
+ getBinaryOpWithActivationImplFloat<BinaryOpFuncSwapArgs<BinaryOpFuncSubFloat>>(params);
BinaryBroadcastFiveFold(params, true, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, implFuncs.first, implFuncs.second);
}
else
{
const std::function<float(const float &, const float &)> fn =
- [](const float &a, const float &b) -> float { return a - b; };
+ [](const float &a, const float &b) -> float { return a - b; };
reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data, fn);
}
const int32_t input1_val = params.input1_offset + input1_data;
const int32_t input2_val = params.input2_offset + input2_data;
const int32_t unclamped_result =
- params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
- params.output_multiplier,
- params.output_shift);
+ params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+ params.output_multiplier,
+ params.output_shift);
const int32_t clamped_output = std::min(
- params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
+ params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
return clamped_output;
}
const auto p1_narrowed = vqmovn_s32(p1);
const auto p2_narrowed = vqmovn_s32(p2);
const auto p = vaddq_s16(vcombine_s16(p1_narrowed, p2_narrowed), output_offset_vector);
- const auto clamped = vmax_u8(output_activation_min_vector,
- vmin_u8(output_activation_max_vector, vqmovun_s16(p)));
+ const auto clamped =
+ vmax_u8(output_activation_min_vector, vmin_u8(output_activation_max_vector, vqmovun_s16(p)));
vst1_u8(output_data + i, clamped);
}
#endif // NEON
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t unclamped_result =
- params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
- params.output_multiplier,
- params.output_shift);
- const int32_t clamped_output =
- std::min(params.quantized_activation_max,
- std::max(params.quantized_activation_min, unclamped_result));
+ params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+ params.output_multiplier,
+ params.output_shift);
+ const int32_t clamped_output = std::min(
+ params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast)
{
const std::function<uint8_t(const BinaryArithmeticOpParam &, const uint8_t &, const uint8_t &)>
- fn = [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a,
- const uint8_t &b) -> uint8_t {
+ fn =
+ [](const BinaryArithmeticOpParam ¶ms, const uint8_t &a, const uint8_t &b) -> uint8_t {
return static_cast<uint8_t>(quant8_mul(params, a, b));
};
- reference::BroadcastBinaryArithmeticOpSlowQuant8(params, input1_shape, input1_data,
- input2_shape, input2_data, output_shape,
- output_data, fn);
+ reference::BroadcastBinaryArithmeticOpSlowQuant8(
+ params, input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, fn);
return;
}
BinaryBroadcastFiveFold(
- params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
- input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
- static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
- uint8_t *)>(MulElementwiseQuant8),
- static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
- uint8_t *)>(MulSimpleBroadcastQuant8));
+ params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
+ input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+ static_cast<void (*)(int, const BinaryArithmeticOpParam &, const uint8_t *, const uint8_t *,
+ uint8_t *)>(MulElementwiseQuant8),
+ static_cast<void (*)(int, const BinaryArithmeticOpParam &, uint8_t, const uint8_t *,
+ uint8_t *)>(MulSimpleBroadcastQuant8));
}
inline void BroadcastMulDispatch(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
{
// TODO: Use GetBinaryArithmeticFn
const std::function<float(const float &, const float &)> fn =
- [](const float &a, const float &b) -> float { return a * b; };
+ [](const float &a, const float &b) -> float { return a * b; };
reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data, fn);
return;
}
auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncMulFloat>(params);
- BinaryBroadcastFiveFold(params, params.broadcast_category ==
- BroadcastableOpCategory::kSecondInputBroadcastsFast,
- input1_shape, input1_data, input2_shape, input2_data, output_shape,
- output_data, implFuncs.first, implFuncs.second);
+ BinaryBroadcastFiveFold(
+ params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
+ input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
+ implFuncs.first, implFuncs.second);
}
inline void Div(const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape,
(*implFuncs.first)(flat_size, params, input1_data, input2_data, output_data);
#else
const std::function<float(const float &, const float &)> fn =
- [](const float &a, const float &b) -> float { return a / b; };
+ [](const float &a, const float &b) -> float { return a / b; };
reference::BinaryArithmeticOp(params, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, fn);
#endif // __aarch64__
else if (params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast)
{
auto implFuncs =
- getBinaryOpWithActivationImplFloat<BinaryOpFuncSwapArgs<BinaryOpFuncDivFloat>>(params);
+ getBinaryOpWithActivationImplFloat<BinaryOpFuncSwapArgs<BinaryOpFuncDivFloat>>(params);
BinaryBroadcastFiveFold(params, true, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, implFuncs.first, implFuncs.second);
}
#endif // __aarch64__
{
const std::function<float(const float &, const float &)> fn =
- [](const float &a, const float &b) -> float { return a / b; };
+ [](const float &a, const float &b) -> float { return a / b; };
reference::BroadcastBinaryArithmeticOpSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data, fn);
}
typedef std::tuple<gemmlowp::OutputStageBiasAddition<ColVectorMap>,
gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent,
gemmlowp::OutputStageClamp, gemmlowp::OutputStageSaturatingCastToUint8>
- Pipeline;
+ Pipeline;
static Pipeline MakeExp(const int32_t *bias_data, int output_rows, int32_t output_offset,
int32_t output_multiplier, int output_left_shift,
int32_t output_activation_min, int32_t output_activation_max)
const int filter_height = filter_shape.Dims(1);
const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
const bool need_im2col =
- stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1;
+ stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1;
if (need_dilated_im2col)
{
assert(im2col_data);
// the other calls commented out. This is a partial rollback of cl/196819423.
// const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_shape, 3);
const int gemm_input_cols =
- gemm_input_shape->Dims(0) * gemm_input_shape->Dims(1) * gemm_input_shape->Dims(2);
+ gemm_input_shape->Dims(0) * gemm_input_shape->Dims(1) * gemm_input_shape->Dims(2);
const int filter_rows = filter_shape.Dims(0);
// See b/79927784.
// const int filter_cols = FlatSizeSkipDim(filter_shape, 0);
assert(bias_shape.FlatSize() == output_rows);
UNUSED_RELEASE(bias_shape);
gemmlowp::MatrixMap<const uint8_t, gemmlowp::MapOrder::RowMajor> filter_matrix(
- filter_data, filter_rows, filter_cols);
+ filter_data, filter_rows, filter_cols);
gemmlowp::MatrixMap<const uint8_t, gemmlowp::MapOrder::ColMajor> input_matrix(
- gemm_input_data, gemm_input_rows, gemm_input_cols);
+ gemm_input_data, gemm_input_rows, gemm_input_cols);
gemmlowp::MatrixMap<uint8_t, gemmlowp::MapOrder::ColMajor> output_matrix(output_data, output_rows,
output_cols);
const auto &output_pipeline =
- GemmlowpOutputPipeline::MakeExp(bias_data, output_rows, output_offset, output_multiplier,
- output_shift, output_activation_min, output_activation_max);
+ GemmlowpOutputPipeline::MakeExp(bias_data, output_rows, output_offset, output_multiplier,
+ output_shift, output_activation_min, output_activation_max);
gemmlowp::GemmWithOutputPipeline<uint8_t, uint8_t, gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
- gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, input_offset,
- output_pipeline);
+ gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, input_offset,
+ output_pipeline);
}
} // namespace optimized
T *output_data, int output_height, int output_width)
{
const bool is_1x1_kernel =
- (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1);
+ (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1);
const bool is_same_height_width =
- (filter_height == input_height && filter_width == input_width && pad_width == 0 &&
- pad_height == 0);
+ (filter_height == input_height && filter_width == input_width && pad_width == 0 &&
+ pad_height == 0);
if (is_1x1_kernel || is_same_height_width)
{
// is_1x1_kernel: For 1x1 kernel, the 2D convolution is reduced to matrix multiplication.
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_FLOAT_H__
+#define __NNFW_CKER_OPTIMIZED_DEPTHWISE_CONV_FLOAT_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include "cker/neon/neon_check.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+// Implementation of float DepthwiseConv
+
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+struct FloatDepthwiseConvKernel
+{
+};
+
+#ifdef USE_NEON
+
+template <> struct FloatDepthwiseConvKernel<false, 8, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+ // Load the filters
+ float32x4_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vld1q_f32(filter_ptr + 4 * i);
+ }
+ int outp = 0;
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the inputs
+ float32x4_t input[4];
+ for (int i = 0; i < 4; i++)
+ {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ input_ptr += 16;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlaq_f32(acc[0], input[0], filter[0]);
+ acc[1] = vmlaq_f32(acc[1], input[1], filter[1]);
+ acc[2] = vmlaq_f32(acc[2], input[2], filter[0]);
+ acc[3] = vmlaq_f32(acc[3], input[3], filter[1]);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs
+ float32x4_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ input_ptr += 8;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vmlaq_f32(acc[i], input[i], filter[i]);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<false, 2, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+ (void)input_ptr_increment;
+
+ const float32x2_t filters = vld1_f32(filter_ptr);
+ const float32x4_t filters_dup2 = vcombine_f32(filters, filters);
+ int outp = 0;
+ // Handle 8 output pixels at a time.
+ for (; outp <= num_output_pixels - 8; outp += 8)
+ {
+ // Load the inputs
+ float32x4_t input[4];
+ for (int i = 0; i < 4; i++)
+ {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ input_ptr += 16;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle 4 output pixels at a time.
+ for (; outp <= num_output_pixels - 4; outp += 4)
+ {
+ // Load the inputs
+ float32x4_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ input_ptr += 8;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vmlaq_f32(acc[i], input[i], filters_dup2);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ // Handle 2 output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the inputs
+ const float32x4_t input = vld1q_f32(input_ptr);
+ input_ptr += 4;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+ // Multiply-accumulate
+ acc = vmlaq_f32(acc, input, filters_dup2);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ // Handle 1 output pixel at a time
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs
+ const float32x2_t input = vld1_f32(input_ptr);
+ input_ptr += 2;
+ // Load the accumulators from acc_buffer
+ float32x2_t acc = vld1_f32(acc_buffer_ptr);
+ // Multiply-accumulate
+ acc = vmla_f32(acc, input, filters);
+ // Store the accumulators back to acc_buffer
+ vst1_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 0, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)depth_multiplier;
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const float *local_filter_ptr = filter_ptr;
+ const float *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 16 input channels at a time.
+ for (; ic <= input_depth - 16; ic += 16)
+ {
+ // Load the filters
+ float32x4_t filter_0 = vld1q_f32(local_filter_ptr + 4 * 0);
+ float32x4_t filter_1 = vld1q_f32(local_filter_ptr + 4 * 1);
+ float32x4_t filter_2 = vld1q_f32(local_filter_ptr + 4 * 2);
+ float32x4_t filter_3 = vld1q_f32(local_filter_ptr + 4 * 3);
+ local_filter_ptr += 16;
+ // Load the inputs
+ float32x4_t input_0 = vld1q_f32(local_input_ptr + 4 * 0);
+ float32x4_t input_1 = vld1q_f32(local_input_ptr + 4 * 1);
+ float32x4_t input_2 = vld1q_f32(local_input_ptr + 4 * 2);
+ float32x4_t input_3 = vld1q_f32(local_input_ptr + 4 * 3);
+ local_input_ptr += 16;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+ float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+ float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+ float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+ // Multiply-accumulate
+ acc_0 = vmlaq_f32(acc_0, input_0, filter_0);
+ acc_1 = vmlaq_f32(acc_1, input_1, filter_1);
+ acc_2 = vmlaq_f32(acc_2, input_2, filter_2);
+ acc_3 = vmlaq_f32(acc_3, input_3, filter_3);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+ acc_buffer_ptr += 16;
+ }
+ // Handle 4 input channels at a time.
+ for (; ic <= input_depth - 4; ic += 4)
+ {
+ // Load the filters
+ float32x4_t filter;
+ filter = vld1q_f32(local_filter_ptr);
+ local_filter_ptr += 4;
+ // Load the inputs
+ float32x4_t input;
+ input = vld1q_f32(local_input_ptr);
+ local_input_ptr += 4;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc;
+ acc = vld1q_f32(acc_buffer_ptr);
+ // Multiply-accumulate
+ acc = vmlaq_f32(acc, input, filter);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ const float input_val = *local_input_ptr++;
+ const float filter_val = *local_filter_ptr++;
+ *acc_buffer_ptr++ += filter_val * input_val;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 0, 8>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)depth_multiplier;
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const float *local_filter_ptr = filter_ptr;
+ const float *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 2 input channels at a time.
+ for (; ic <= input_depth - 2; ic += 2)
+ {
+ // Load the filters
+ float32x4_t filter[4];
+ for (int i = 0; i < 4; i++)
+ {
+ filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+ }
+ local_filter_ptr += 16;
+ // Load the inputs
+ const float32x2_t input = vld1_f32(local_input_ptr);
+ local_input_ptr += 2;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlaq_lane_f32(acc[0], filter[0], input, 0);
+ acc[1] = vmlaq_lane_f32(acc[1], filter[1], input, 0);
+ acc[2] = vmlaq_lane_f32(acc[2], filter[2], input, 1);
+ acc[3] = vmlaq_lane_f32(acc[3], filter[3], input, 1);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ // Load the filters
+ float32x4_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+ }
+ local_filter_ptr += 8;
+ // Load the inputs
+ const float input_val = *local_input_ptr++;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+// Note this implementation is very slow for input_depths < 8
+// (e.g. comparable to reference implementation) see, specializations for
+// input_depth=3 below.
+template <> struct FloatDepthwiseConvKernel<true, 0, 2>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)depth_multiplier;
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const float *local_filter_ptr = filter_ptr;
+ const float *local_input_ptr = input_ptr;
+ int ic = 0;
+ // Handle 8 input channels at a time.
+ for (; ic <= input_depth - 8; ic += 8)
+ {
+ // Load the filters
+ float32x4_t filter[4];
+ for (int i = 0; i < 4; i++)
+ {
+ filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+ }
+ local_filter_ptr += 16;
+ // Load the inputs
+ float32x4x2_t input_dup2[2];
+ for (int i = 0; i < 2; i++)
+ {
+ const float32x4_t input = vld1q_f32(local_input_ptr + 4 * i);
+ input_dup2[i] = vzipq_f32(input, input);
+ }
+ local_input_ptr += 8;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmlaq_f32(acc[0], filter[0], input_dup2[0].val[0]);
+ acc[1] = vmlaq_f32(acc[1], filter[1], input_dup2[0].val[1]);
+ acc[2] = vmlaq_f32(acc[2], filter[2], input_dup2[1].val[0]);
+ acc[3] = vmlaq_f32(acc[3], filter[3], input_dup2[1].val[1]);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ // Handle 4 input channels at a time.
+ for (; ic <= input_depth - 4; ic += 4)
+ {
+ // Load the filters
+ float32x2_t filter[4];
+ for (int i = 0; i < 4; i++)
+ {
+ filter[i] = vld1_f32(local_filter_ptr + 2 * i);
+ }
+ local_filter_ptr += 8;
+ // Load the inputs
+ const float32x4_t input = vld1q_f32(local_input_ptr);
+ local_input_ptr += 4;
+ // Load the accumulators from acc_buffer
+ float32x2_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmla_lane_f32(acc[0], filter[0], vget_low_f32(input), 0);
+ acc[1] = vmla_lane_f32(acc[1], filter[1], vget_low_f32(input), 1);
+ acc[2] = vmla_lane_f32(acc[2], filter[2], vget_high_f32(input), 0);
+ acc[3] = vmla_lane_f32(acc[3], filter[3], vget_high_f32(input), 1);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ // Handle 2 input channels at a time.
+ for (; ic <= input_depth - 2; ic += 2)
+ {
+ // Load the filters
+ const float32x4_t filter = vld1q_f32(local_filter_ptr);
+ local_filter_ptr += 4;
+ // Load the inputs
+ const float32x2_t input = vld1_f32(local_input_ptr);
+ local_input_ptr += 2;
+ // Load the accumulators from acc_buffer
+ float32x2_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+ }
+ // Multiply-accumulate
+ acc[0] = vmla_lane_f32(acc[0], vget_low_f32(filter), input, 0);
+ acc[1] = vmla_lane_f32(acc[1], vget_high_f32(filter), input, 1);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+ }
+ acc_buffer_ptr += 4;
+ }
+ // Handle one input channel at a time.
+ for (; ic < input_depth; ic++)
+ {
+ // Load the inputs
+ const float input_val = *local_input_ptr++;
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc_buffer_ptr[i] += local_filter_ptr[i] * input_val;
+ }
+ local_filter_ptr += 2;
+ acc_buffer_ptr += 2;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 3, 2>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+
+ // Load the filters
+ float32x2_t filter[3];
+ for (int i = 0; i < 3; i++)
+ {
+ filter[i] = vld1_f32(filter_ptr + 2 * i);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const float32x2_t input01 = vld1_f32(input_ptr);
+ const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+ // Load the accumulators from acc_buffer
+ float32x2_t acc[3];
+ for (int i = 0; i < 3; i++)
+ {
+ acc[i] = vld1_f32(acc_buffer_ptr + 2 * i);
+ }
+ // Multiply-accumulate for each input channel there 2 outputs
+ acc[0] = vmla_lane_f32(acc[0], filter[0], input01, 0);
+ acc[1] = vmla_lane_f32(acc[1], filter[1], input01, 1);
+ acc[2] = vmla_lane_f32(acc[2], filter[2], input2, 0);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 3; i++)
+ {
+ vst1_f32(acc_buffer_ptr + 2 * i, acc[i]);
+ }
+ acc_buffer_ptr += 6;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 3, 4>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+
+ // Load the filters
+ float32x4_t filter[3];
+ for (int i = 0; i < 3; i++)
+ {
+ filter[i] = vld1q_f32(filter_ptr + 4 * i);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // NOTE: we only want 3 values, so we read it as two ops where
+ // the second op just duplicates the lane
+ const float32x2_t input01 = vld1_f32(input_ptr);
+ const float32x2_t input2 = vld1_dup_f32(input_ptr + 2);
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[3];
+ for (int i = 0; i < 3; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate all outputs.
+ acc[0] = vmlaq_lane_f32(acc[0], filter[0], input01, 0);
+ acc[1] = vmlaq_lane_f32(acc[1], filter[1], input01, 1);
+ acc[2] = vmlaq_lane_f32(acc[2], filter[2], input2, 0);
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 3; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 12;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 1, 8>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+
+ // Load the filters
+ float32x4_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vld1q_f32(filter_ptr + 4 * i);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs
+ const float input_val = *input_ptr;
+ input_ptr += input_ptr_increment;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 1, 32>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+
+ // Load the filters
+ float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0);
+ float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1);
+ float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2);
+ float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3);
+ float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4);
+ float32x4_t filter_5 = vld1q_f32(filter_ptr + 4 * 5);
+ float32x4_t filter_6 = vld1q_f32(filter_ptr + 4 * 6);
+ float32x4_t filter_7 = vld1q_f32(filter_ptr + 4 * 7);
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs
+ const float input_val = *input_ptr;
+ input_ptr += input_ptr_increment;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+ float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+ float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+ float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+ float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4);
+ float32x4_t acc_5 = vld1q_f32(acc_buffer_ptr + 4 * 5);
+ float32x4_t acc_6 = vld1q_f32(acc_buffer_ptr + 4 * 6);
+ float32x4_t acc_7 = vld1q_f32(acc_buffer_ptr + 4 * 7);
+ // Multiply-accumulate
+ acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val);
+ acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val);
+ acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val);
+ acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val);
+ acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val);
+ acc_5 = vmlaq_n_f32(acc_5, filter_5, input_val);
+ acc_6 = vmlaq_n_f32(acc_6, filter_6, input_val);
+ acc_7 = vmlaq_n_f32(acc_7, filter_7, input_val);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+ vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4);
+ vst1q_f32(acc_buffer_ptr + 4 * 5, acc_5);
+ vst1q_f32(acc_buffer_ptr + 4 * 6, acc_6);
+ vst1q_f32(acc_buffer_ptr + 4 * 7, acc_7);
+ acc_buffer_ptr += 32;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 1, 20>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+
+ // Load the filters
+ float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0);
+ float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1);
+ float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2);
+ float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3);
+ float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4);
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs
+ const float input_val = *input_ptr;
+ input_ptr += input_ptr_increment;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0);
+ float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1);
+ float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2);
+ float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3);
+ float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4);
+ // Multiply-accumulate
+ acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val);
+ acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val);
+ acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val);
+ acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val);
+ acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0);
+ vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1);
+ vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2);
+ vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3);
+ vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4);
+ acc_buffer_ptr += 20;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 0, 16>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)depth_multiplier;
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ const float *local_filter_ptr = filter_ptr;
+ const float *local_input_ptr = input_ptr;
+ for (int ic = 0; ic < input_depth; ic++)
+ {
+ // Load the filters
+ float32x4_t filter[4];
+ for (int i = 0; i < 4; i++)
+ {
+ filter[i] = vld1q_f32(local_filter_ptr + 4 * i);
+ }
+ local_filter_ptr += 16;
+ // Load the inputs
+ const float input_val = *local_input_ptr++;
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[4];
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 4; i++)
+ {
+ acc[i] = vmlaq_n_f32(acc[i], filter[i], input_val);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 4; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 16;
+ }
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 8, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+
+ // Load the filters
+ float32x4_t filter[2];
+ for (int i = 0; i < 2; i++)
+ {
+ filter[i] = vld1q_f32(filter_ptr + 4 * i);
+ }
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs
+ float32x4_t input[2];
+ for (int i = 0; i < 2; i++)
+ {
+ input[i] = vld1q_f32(input_ptr + 4 * i);
+ }
+ // Load the accumulators from acc_buffer
+ float32x4_t acc[2];
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vld1q_f32(acc_buffer_ptr + 4 * i);
+ }
+ // Multiply-accumulate
+ for (int i = 0; i < 2; i++)
+ {
+ acc[i] = vmlaq_f32(acc[i], input[i], filter[i]);
+ }
+ // Store the accumulators back to acc_buffer
+ for (int i = 0; i < 2; i++)
+ {
+ vst1q_f32(acc_buffer_ptr + 4 * i, acc[i]);
+ }
+ acc_buffer_ptr += 8;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 2, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+
+ float32x2_t filter = vld1_f32(filter_ptr);
+ float32x4_t filter_x4 = vcombine_f32(filter, filter);
+ int outp = 0;
+
+ // Handle two output pixels at a time.
+ for (; outp <= num_output_pixels - 2; outp += 2)
+ {
+ // Load the inputs
+ float32x2_t input_1 = vld1_f32(input_ptr);
+ input_ptr += input_ptr_increment;
+ float32x2_t input_2 = vld1_f32(input_ptr);
+ input_ptr += input_ptr_increment;
+ float32x4_t input = vcombine_f32(input_1, input_2);
+
+ // Load the accumulators from acc_buffer
+ float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+
+ // Multiply-accumulate
+ acc = vmlaq_f32(acc, input, filter_x4);
+
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ }
+ // Handle one output pixel at a time.
+ for (; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs
+ float32x2_t input = vld1_f32(input_ptr);
+ input_ptr += input_ptr_increment;
+
+ // Load the accumulators from acc_buffer
+ float32x2_t acc = vld1_f32(acc_buffer_ptr);
+
+ // Multiply-accumulate
+ acc = vmla_f32(acc, input, filter);
+
+ // Store the accumulators back to acc_buffer
+ vst1_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 2;
+ }
+ }
+};
+
+template <> struct FloatDepthwiseConvKernel<true, 4, 1>
+{
+ static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
+ const float *input_ptr, int input_ptr_increment, const float *filter_ptr,
+ float *acc_buffer_ptr)
+ {
+ (void)input_depth;
+ (void)depth_multiplier;
+
+ float32x4_t filter = vld1q_f32(filter_ptr);
+
+ // Handle one output pixel at a time.
+ for (int outp = 0; outp < num_output_pixels; outp++)
+ {
+ // Load the inputs
+ float32x4_t input = vld1q_f32(input_ptr);
+ // Load the accumulators from acc_buffer
+ float32x4_t acc = vld1q_f32(acc_buffer_ptr);
+ // Multiply-accumulate
+ acc = vmlaq_f32(acc, input, filter);
+ // Store the accumulators back to acc_buffer
+ vst1q_f32(acc_buffer_ptr, acc);
+ acc_buffer_ptr += 4;
+ input_ptr += input_ptr_increment;
+ }
+ }
+};
+#endif
+
+// Accumulates the effect of one row of the filter, on a segment of one row
+// of the output, accessing the corresponding one row of the input.
+template <bool kAllowStrided, int kFixedInputDepth, int kFixedDepthMultiplier>
+void FloatDepthwiseConvAccumRow(int stride, int dilation_factor, int input_depth, int input_width,
+ const float *input_data, int pad_width, int depth_multiplier,
+ int filter_width, const float *filter_data, int out_x_buffer_start,
+ int out_x_buffer_end, int output_depth, float *acc_buffer)
+{
+ // Sanity check parameters. This is important in particular to ensure
+ // that we keep the number of template instantiations minimal, so we don't
+ // increase binary size unnecessarily.
+ static_assert(kFixedDepthMultiplier || !kFixedInputDepth, "");
+ static_assert(kFixedInputDepth || kAllowStrided, "");
+ assert(stride == 1 || kAllowStrided);
+ if (kFixedInputDepth)
+ {
+ assert(input_depth == kFixedInputDepth);
+ }
+ if (kFixedDepthMultiplier)
+ {
+ assert(depth_multiplier == kFixedDepthMultiplier);
+ }
+ assert(output_depth == input_depth * depth_multiplier);
+ const int input_ptr_increment = stride * input_depth;
+ const float *filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ // For the current (filter_x, filter_y) point in the filter,
+ // compute the boundaries of the corresponding output row segment.
+ int out_x_loop_start_unclamped = 0;
+ int out_x_loop_end_unclamped = 0;
+ if (kAllowStrided)
+ {
+ if (stride == 2)
+ {
+ out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 1) / 2;
+ out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
+ }
+ else if (stride == 4)
+ {
+ out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + 3) / 4;
+ out_x_loop_end_unclamped = (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
+ }
+ else
+ {
+ out_x_loop_start_unclamped = (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+ out_x_loop_end_unclamped =
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
+ }
+ }
+ else
+ {
+ out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+ out_x_loop_end_unclamped = pad_width + input_width - dilation_factor * filter_x;
+ }
+ // The kernel will have to iterate on the segment of the
+ // output row that starts at out_x_loop_start and out_x_loop_end.
+ const int out_x_loop_start = std::max(out_x_buffer_start, out_x_loop_start_unclamped);
+ const int out_x_loop_end = std::min(out_x_buffer_end, out_x_loop_end_unclamped);
+
+ float *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+ const float *input_ptr = input_data + in_x_origin * input_depth;
+ const int num_output_pixels = out_x_loop_end - out_x_loop_start;
+ FloatDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
+ num_output_pixels, input_depth, depth_multiplier, input_ptr, input_ptr_increment,
+ filter_base_ptr, acc_buffer_ptr);
+ filter_base_ptr += output_depth;
+ }
+}
+
+// generic fallback of FloatDepthwiseConvAccumRow, portable, non-templatized.
+inline void FloatDepthwiseConvAccumRowGeneric(int stride, int dilation_factor, int input_depth,
+ int input_width, const float *input_data,
+ int pad_width, int depth_multiplier, int filter_width,
+ const float *filter_data, int out_x_buffer_start,
+ int out_x_buffer_end, int output_depth,
+ float *acc_buffer)
+{
+ const float *filter_base_ptr = filter_data;
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int out_x_loop_start =
+ std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+ const int out_x_loop_end =
+ std::min(out_x_buffer_end,
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
+
+ float *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
+ const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
+ const float *input_ptr = input_data + in_x_origin * input_depth;
+ const int input_ptr_increment = (stride - 1) * input_depth;
+ for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++)
+ {
+ const float *filter_ptr = filter_base_ptr;
+ for (int ic = 0; ic < input_depth; ++ic)
+ {
+ const float input_val = *input_ptr++;
+ for (int m = 0; m < depth_multiplier; m++)
+ {
+ const float filter_val = *filter_ptr++;
+ *acc_buffer_ptr++ += filter_val * input_val;
+ }
+ }
+ input_ptr += input_ptr_increment;
+ }
+ filter_base_ptr += output_depth;
+ }
+}
+
+// Initializes the accumulator buffer with bias values.
+inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
+ const float *bias_data, float *acc_buffer)
+{
+ // TODO(benoitjacob): This might need optimized specializations
+ // for small output_depth values, if that ever becomes an important
+ // case (like it was for some quantized DepthwiseConv cases).
+ for (int i = 0; i < num_output_pixels; i++)
+ {
+ memcpy(acc_buffer + i * output_depth, bias_data, sizeof(acc_buffer[0]) * output_depth);
+ }
+}
+
+// DepthwiseConv can run with multi threads on the dim specified by thread_dim.
+// Each thread processes output elements on dim, thread_dim, in the range of
+// [thread_start, thread_end).
+// For example, assume thread_start = 2, thread_end = 6, and thread_dim = 1, it
+// means that it will calculate DepthwiseConv for output_data[:, 2:5, :, :].
+inline void DepthwiseConvImpl(const DepthwiseConvParams ¶ms, const Shape &input_shape,
+ const float *input_data, const Shape &filter_shape,
+ const float *filter_data, const Shape &bias_shape,
+ const float *bias_data, const Shape &output_shape, float *output_data,
+ int thread_start, int thread_end, int thread_dim)
+{
+ UNUSED_RELEASE(bias_shape);
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int depth_multiplier = params.depth_multiplier;
+ const float output_activation_min = params.float_activation_min;
+ const float output_activation_max = params.float_activation_max;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ assert(thread_dim == 0 || thread_dim == 1);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = input_shape.Dims(3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ assert(output_depth == input_depth * depth_multiplier);
+ assert(bias_shape.FlatSize() == output_depth);
+
+ static const int kAccBufferMaxSize = 4832;
+ float acc_buffer[kAccBufferMaxSize];
+ assert(kAccBufferMaxSize >= output_depth);
+ const int kOutputPixelsInAccBuffer = kAccBufferMaxSize / output_depth;
+ const int kAccBufferActualSize = kOutputPixelsInAccBuffer * output_depth;
+ assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize);
+ assert(kAccBufferActualSize <= kAccBufferMaxSize);
+ assert(kOutputPixelsInAccBuffer >= 1);
+
+ UNUSED_RELEASE(kAccBufferActualSize);
+
+ // row_accum_func will point to the core accumulation function to be used
+ // for this DepthwiseConv op.
+ using row_accum_func_t = decltype(&FloatDepthwiseConvAccumRowGeneric);
+ row_accum_func_t row_accum_func = nullptr;
+
+#define TFMINI_USE_DEPTHWISECONV_KERNEL(ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER) \
+ if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \
+ (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \
+ depth_multiplier == FIXED_DEPTH_MULTIPLIER) \
+ { \
+ row_accum_func = \
+ FloatDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>; \
+ }
+
+#ifdef USE_NEON
+ // We go over our list of kernels by decreasing order of preference
+ // for the cases where multiple kernels could apply.
+
+ // Start with the fastest kernels: AllowStrided=false, fixed input depth.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 8, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(false, 2, 1)
+
+ // Next come the strided kernels: AllowStrided=true, fixed input depth.
+ // They are a bit less efficient, but allow stride!=1.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 4)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 4, 1)
+
+ // Finally, the kernels allowing a variable input depth,
+ // these are the least efficient but most general kernels.
+
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 1)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 2)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 8)
+ TFMINI_USE_DEPTHWISECONV_KERNEL(true, 0, 16)
+
+#endif // USE_NEON
+
+#undef TFMINI_USE_DEPTHWISECONV_KERNEL
+
+ // No matching fast kernel found, use slow fallback.
+ if (!row_accum_func)
+ {
+ row_accum_func = FloatDepthwiseConvAccumRowGeneric;
+ }
+
+ const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2);
+ const int input_batch_stride = input_height_stride * input_shape.Dims(1);
+ const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
+
+ // Now that we have determined row_accum_func, we can start work.
+ int batch_start = 0;
+ int batch_end = batches;
+ int row_start = 0;
+ int row_end = output_height;
+ int output_ptr_offset = 0;
+
+ switch (thread_dim)
+ {
+ case 0:
+ // Multithread along with the batch axis
+ assert(thread_start >= 0);
+ assert(thread_end <= batches);
+ batch_start = thread_start;
+ batch_end = thread_end;
+ output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0);
+ break;
+ case 1:
+ // Multithread along with the row axis
+ assert(thread_start >= 0);
+ assert(thread_end <= output_height);
+ row_start = thread_start;
+ row_end = thread_end;
+ output_ptr_offset = row_start * output_width * output_depth;
+ break;
+ }
+
+ float *output_ptr = output_data + output_ptr_offset;
+ int batch_step = (output_height + row_start - row_end) * output_width * output_depth;
+
+ for (int b = batch_start; b < batch_end; ++b)
+ {
+ for (int out_y = row_start; out_y < row_end; ++out_y)
+ {
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ const int filter_y_start =
+ std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
+ const int filter_y_end =
+ std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
+ dilation_height_factor);
+ for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
+ out_x_buffer_start += kOutputPixelsInAccBuffer)
+ {
+ const int out_x_buffer_end =
+ std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+ // We call a 'pixel' a group of activation that share all but the
+ // 'depth'/'channel' coordinate. num_output_pixels is the number of
+ // output pixels that we will accumulate in this loop iteration.
+ const int num_output_pixels = out_x_buffer_end - out_x_buffer_start;
+ // Initialize our local accumulator with the bias values, so we don't
+ // have to add them later.
+ DepthwiseConvInitAccBuffer(num_output_pixels, output_depth, bias_data, acc_buffer);
+ // Accumulation loop. Most of the time should be spent in here.
+ for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
+ {
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ row_accum_func(stride_width, dilation_width_factor, input_depth, input_width,
+ input_data + in_y * input_height_stride + b * input_batch_stride,
+ pad_width, depth_multiplier, filter_width,
+ filter_data + filter_y * filter_height_stride, out_x_buffer_start,
+ out_x_buffer_end, output_depth, acc_buffer);
+ }
+ // Finished accumulating. Now store to destination.
+ const int num_output_values = output_depth * num_output_pixels;
+ int i = 0;
+// TODO(benoitjacob) optimized code goes here
+#ifdef USE_NEON
+ // Handle 16 values at a time
+ for (; i <= num_output_values - 16; i += 16)
+ {
+ float32x4_t acc[4];
+ for (int k = 0; k < 4; k++)
+ {
+ acc[k] = vld1q_f32(acc_buffer + i + 4 * k);
+ }
+ for (int k = 0; k < 4; k++)
+ {
+ acc[k] = vmaxq_f32(vdupq_n_f32(output_activation_min),
+ vminq_f32(vdupq_n_f32(output_activation_max), acc[k]));
+ }
+ for (int k = 0; k < 4; k++)
+ {
+ vst1q_f32(output_ptr + 4 * k, acc[k]);
+ }
+ output_ptr += 16;
+ }
+ // Handle 4 values at a time
+ for (; i <= num_output_values - 4; i += 4)
+ {
+ float32x4_t acc = vld1q_f32(acc_buffer + i);
+
+ acc = vmaxq_f32(vdupq_n_f32(output_activation_min),
+ vminq_f32(vdupq_n_f32(output_activation_max), acc));
+
+ vst1q_f32(output_ptr, acc);
+ output_ptr += 4;
+ }
+#endif
+ // Handle leftover values, one by one. This is very slow.
+ for (; i < num_output_values; i++)
+ {
+ float acc = acc_buffer[i];
+ acc = std::max(output_activation_min, std::min(output_activation_max, acc));
+
+ *output_ptr++ = acc;
+ }
+ }
+ }
+ output_ptr += batch_step;
+ }
+}
+
+} // nnfw
+} // cker
+} // optimized
+
+#endif
{
namespace optimized
{
+namespace depthwise_conv
+{
// Implementation of quantized DepthwiseConv
template <> struct QuantizedDepthwiseConvKernel<true, 8, 2>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
for (int i = 0; i < 2; i++)
{
filter[i] =
- vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), vdupq_n_s16(filter_offset));
+ vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), vdupq_n_s16(filter_offset));
}
// Handle one output pixel at a time.
for (int outp = 0; outp < num_output_pixels; outp++)
for (int i = 0; i < 2; i++)
{
acc[0].val[i] =
- vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i]));
+ vmlal_s16(acc[0].val[i], vget_low_s16(filter[i]), vget_low_s16(input_dup2.val[i]));
acc[1].val[i] =
- vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i]));
+ vmlal_s16(acc[1].val[i], vget_high_s16(filter[i]), vget_high_s16(input_dup2.val[i]));
}
// Store the accumulators back to acc_buffer
for (int i = 0; i < 2; i++)
template <> struct QuantizedDepthwiseConvKernel<false, 8, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<false, 4, 2>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
for (int i = 0; i < 2; i++)
{
acc[2 * i + 0] =
- vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i]));
+ vmlal_s16(acc[2 * i + 0], vget_low_s16(filter), vget_low_s16(input_dup2.val[i]));
acc[2 * i + 1] =
- vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i]));
+ vmlal_s16(acc[2 * i + 1], vget_high_s16(filter), vget_high_s16(input_dup2.val[i]));
}
// Store the accumulators back to acc_buffer
for (int i = 0; i < 4; i++)
template <> struct QuantizedDepthwiseConvKernel<false, 2, 8>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<false, 2, 2>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<false, 2, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<false, 1, 2>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<false, 1, 4>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<false, 4, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<false, 4, 4>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<true, 0, 3>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
// We will do that by register-level table-look-up using VTBL instructions.
// Here we prepare the registers containing the table-lookup indices.
static const uint8_t dup3_indices_array[3][8] = {
- {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}};
+ {0, 0, 0, 1, 1, 1, 2, 2}, {2, 3, 3, 3, 4, 4, 4, 5}, {5, 5, 6, 6, 6, 7, 7, 7}};
uint8x8_t dup3_indices[3];
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
acc[0].val[j] =
- vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j]));
+ vmlal_s16(acc[0].val[j], vget_low_s16(input_dup3[j]), vget_low_s16(filter[j]));
acc[1].val[j] =
- vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j]));
+ vmlal_s16(acc[1].val[j], vget_high_s16(input_dup3[j]), vget_high_s16(filter[j]));
}
// Store the accumulators back to acc_buffer
for (int i = 0; i < 2; i++)
// Handle one input channel at a time.
for (; ic < input_depth; ic++)
{
- const uint16_t input_val = *local_input_ptr++ + input_offset;
+ const int16_t input_val = *local_input_ptr++ + input_offset;
for (int i = 0; i < 3; i++)
{
- const uint16_t filter_val = local_filter_ptr[i] + filter_offset;
+ const int16_t filter_val = local_filter_ptr[i] + filter_offset;
*acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
}
local_filter_ptr += 3;
template <> struct QuantizedDepthwiseConvKernel<true, 0, 2>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
for (int j = 0; j < 2; j++)
{
acc[0].val[j] =
- vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j]));
+ vmlal_s16(acc[0].val[j], vget_low_s16(filter[j]), vget_low_s16(input_dup2.val[j]));
acc[1].val[j] =
- vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j]));
+ vmlal_s16(acc[1].val[j], vget_high_s16(filter[j]), vget_high_s16(input_dup2.val[j]));
}
// Store the accumulators back to acc_buffer.
for (int i = 0; i < 2; i++)
for (; ic < input_depth; ic++)
{
// Load the inputs.
- const uint16_t input_val = *local_input_ptr++ + input_offset;
+ const int16_t input_val = *local_input_ptr++ + input_offset;
for (int i = 0; i < 2; i++)
{
- const uint16_t filter_val = local_filter_ptr[i] + filter_offset;
+ const int16_t filter_val = local_filter_ptr[i] + filter_offset;
*acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
}
local_filter_ptr += 2;
template <> struct QuantizedDepthwiseConvKernel<true, 0, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
// Handle one input channel at a time.
for (; ic < input_depth; ic++)
{
- const uint16_t input_val = *local_input_ptr++ + input_offset;
- const uint16_t filter_val = *local_filter_ptr++ + filter_offset;
+ const int16_t input_val = *local_input_ptr++ + input_offset;
+ const int16_t filter_val = *local_filter_ptr++ + filter_offset;
*acc_buffer_ptr++ += static_cast<int32_t>(filter_val) * input_val;
}
input_ptr += input_ptr_increment;
template <> struct QuantizedDepthwiseConvKernel<true, 16, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
{
acc[2 * i + 0] = vmlal_s16(acc[2 * i + 0], vget_low_s16(input[i]), vget_low_s16(filter[i]));
acc[2 * i + 1] =
- vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i]));
+ vmlal_s16(acc[2 * i + 1], vget_high_s16(input[i]), vget_high_s16(filter[i]));
}
// Store the accumulators back to acc_buffer
for (int i = 0; i < 4; i++)
template <> struct QuantizedDepthwiseConvKernel<true, 8, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<true, 1, 16>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
{
uint8_t input_u8 = *input_ptr;
input_ptr += input_ptr_increment;
- uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+ int16_t input = static_cast<int16_t>(input_u8) + input_offset;
// Load the accumulators from acc_buffer
int32x4_t acc[4];
for (int i = 0; i < 4; i++)
template <> struct QuantizedDepthwiseConvKernel<true, 1, 32>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
{
uint8_t input_u8 = *input_ptr;
input_ptr += input_ptr_increment;
- uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+ int16_t input = static_cast<int16_t>(input_u8) + input_offset;
// Load the accumulators from acc_buffer
int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
template <> struct QuantizedDepthwiseConvKernel<true, 1, 20>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
{
uint8_t input_u8 = *input_ptr;
input_ptr += input_ptr_increment;
- uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+ int16_t input = static_cast<int16_t>(input_u8) + input_offset;
// Load the accumulators from acc_buffer
int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0);
int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1);
template <> struct QuantizedDepthwiseConvKernel<true, 1, 8>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
// Load the filters, add filter_offset.
const uint8x8_t filter_u8 = vld1_u8(filter_ptr);
const int16x8_t filter =
- vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8)), vdupq_n_s16(filter_offset));
+ vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8)), vdupq_n_s16(filter_offset));
// Handle one output pixel at a time.
for (int outp = 0; outp < num_output_pixels; outp++)
{
uint8_t input_u8 = *input_ptr;
input_ptr += input_ptr_increment;
- uint16_t input = static_cast<int16_t>(input_u8 + input_offset);
+ int16_t input = static_cast<int16_t>(input_u8) + input_offset;
// Load the accumulators from acc_buffer
int32x4_t acc[2];
for (int i = 0; i < 2; i++)
template <> struct QuantizedDepthwiseConvKernel<true, 2, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
input_u16 = vset_lane_u16((reinterpret_cast<const uint16_t *>(input_ptr))[0], input_u16, 1);
input_ptr += input_ptr_increment;
const int16x4_t input_s16 =
- vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u16(input_u16))));
+ vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u16(input_u16))));
const int16x4_t input = vadd_s16(input_s16, vdup_n_s16(input_offset));
// Multiply-accumulate.
template <> struct QuantizedDepthwiseConvKernel<true, 4, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
template <> struct QuantizedDepthwiseConvKernel<false, 12, 1>
{
static void Run(int num_output_pixels, int input_depth, int depth_multiplier,
- const uint8_t *input_ptr, uint16_t input_offset, int input_ptr_increment,
- const uint8_t *filter_ptr, uint16_t filter_offset, int32_t *acc_buffer_ptr)
+ const uint8_t *input_ptr, int16_t input_offset, int input_ptr_increment,
+ const uint8_t *filter_ptr, int16_t filter_offset, int32_t *acc_buffer_ptr)
{
(void)input_depth;
(void)depth_multiplier;
else
{
out_x_loop_start_unclampled =
- (pad_width - dilation_factor * filter_x + stride - 1) / stride;
+ (pad_width - dilation_factor * filter_x + stride - 1) / stride;
out_x_loop_end_unclampled =
- (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride;
}
}
else
const uint8_t *input_ptr = input_data + in_x_origin * input_depth;
const int num_output_pixels = out_x_loop_end - out_x_loop_start;
QuantizedDepthwiseConvKernel<kAllowStrided, kFixedInputDepth, kFixedDepthMultiplier>::Run(
- num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset,
- input_ptr_increment, filter_base_ptr, filter_offset, acc_buffer_ptr);
+ num_output_pixels, input_depth, depth_multiplier, input_ptr, input_offset,
+ input_ptr_increment, filter_base_ptr, filter_offset, acc_buffer_ptr);
filter_base_ptr += output_depth;
}
}
const uint8_t *filter_base_ptr = filter_data;
for (int filter_x = 0; filter_x < filter_width; ++filter_x)
{
- const int out_x_loop_start = std::max(
- out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
+ const int out_x_loop_start =
+ std::max(out_x_buffer_start, (pad_width - dilation_factor * filter_x + stride - 1) / stride);
const int out_x_loop_end =
- std::min(out_x_buffer_end,
- (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
+ std::min(out_x_buffer_end,
+ (pad_width + input_width - dilation_factor * filter_x + stride - 1) / stride);
int32_t *acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
const int in_x_origin = (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x;
const uint8_t *input_data, const Shape &filter_shape,
const uint8_t *filter_data, const Shape &bias_shape,
const int32_t *bias_data, const Shape &output_shape,
- uint8_t *output_data)
+ uint8_t *output_data, int thread_start, int thread_end,
+ int thread_dim)
{
(void)bias_shape;
const int stride_width = params.stride_width;
assert(kOutputPixelsInAccBuffer * output_depth <= kAccBufferActualSize);
assert(kAccBufferActualSize <= kAccBufferMaxSize);
assert(kOutputPixelsInAccBuffer >= 1);
+ assert(thread_dim == 0 || thread_dim == 1);
+
UNUSED_RELEASE(kAccBufferActualSize);
// row_accum_func will point to the core accumulation function to be used
depth_multiplier == FIXED_DEPTH_MULTIPLIER) \
{ \
row_accum_func = \
- QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>; \
+ QuantizedDepthwiseConvAccumRow<ALLOW_STRIDED, FIXED_INPUT_DEPTH, FIXED_DEPTH_MULTIPLIER>; \
}
#ifdef USE_NEON
const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2);
// Now that we have determined row_accum_func, we can start work.
- uint8_t *output_ptr = output_data;
- for (int b = 0; b < batches; ++b)
+ int batch_start = 0;
+ int batch_end = batches;
+ int row_start = 0;
+ int row_end = output_height;
+ int output_ptr_offset = 0;
+
+ switch (thread_dim)
+ {
+ case 0:
+ // Multithread along with the batch axis
+ assert(thread_start >= 0);
+ assert(thread_end <= batches);
+ batch_start = thread_start;
+ batch_end = thread_end;
+ output_ptr_offset = batch_start * FlatSizeSkipDim(output_shape, 0);
+ break;
+ case 1:
+ // Multithread along with the row axis
+ assert(thread_start >= 0);
+ assert(thread_end <= output_height);
+ row_start = thread_start;
+ row_end = thread_end;
+ output_ptr_offset = row_start * output_width * output_depth;
+ break;
+ }
+
+ uint8_t *output_ptr = output_data + output_ptr_offset;
+ int batch_step = (output_height + row_start - row_end) * output_width * output_depth;
+ for (int b = batch_start; b < batch_end; ++b)
{
- for (int out_y = 0; out_y < output_height; ++out_y)
+ for (int out_y = row_start; out_y < row_end; ++out_y)
{
const int in_y_origin = (out_y * stride_height) - pad_height;
const int filter_y_start =
- std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
+ std::max(0, (-in_y_origin + dilation_height_factor - 1) / dilation_height_factor);
const int filter_y_end =
- std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
- dilation_height_factor);
+ std::min(filter_height, (input_height - in_y_origin + dilation_height_factor - 1) /
+ dilation_height_factor);
for (int out_x_buffer_start = 0; out_x_buffer_start < output_width;
out_x_buffer_start += kOutputPixelsInAccBuffer)
{
const int out_x_buffer_end =
- std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
+ std::min(output_width, out_x_buffer_start + kOutputPixelsInAccBuffer);
// We call a 'pixel' a group of activation that share all but the
// 'depth'/'channel' coordinate. num_output_pixels is the number of
// output pixels that we will accumulate in this loop iteration.
filter_data + filter_y * filter_height_stride, filter_offset,
out_x_buffer_start, out_x_buffer_end, output_depth, acc_buffer);
}
- // Finished accumulating int32 values. Now need to convert them to
+ // Finished accumulating int32_t values. Now need to convert them to
// the final 8bit form and store them.
const int num_output_values = output_depth * num_output_pixels;
int i = 0;
}
}
}
+ output_ptr += batch_step;
}
}
+} // namespace depthwise_conv
+
+// template <DepthwiseConvOutputRounding kOutputRounding>
+inline void DepthwiseConvWithRounding(const DepthwiseConvParams ¶ms, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &filter_shape,
+ const uint8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape,
+ uint8_t *output_data, int thread_start, int thread_end,
+ int thread_dim)
+{
+ const int depth_multiplier = params.depth_multiplier;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ assert(dilation_width_factor >= 1);
+ assert(dilation_height_factor >= 1);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ assert(output_activation_min <= output_activation_max);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_depth = input_shape.Dims(3);
+ assert(output_depth == input_depth * depth_multiplier);
+ assert(bias_shape.FlatSize() == output_depth);
+
+ UNUSED_RELEASE(depth_multiplier);
+ UNUSED_RELEASE(output_activation_min);
+ UNUSED_RELEASE(output_activation_max);
+ UNUSED_RELEASE(dilation_width_factor);
+ UNUSED_RELEASE(dilation_height_factor);
+ UNUSED_RELEASE(output_depth);
+ UNUSED_RELEASE(input_depth);
+
+// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
+// Jetson TX-2. This compiler does not support the offsetof() macro.
+#if defined(__aarch64__) && !defined(GOOGLE_L4T)
+// TODO Use below codes
+// // Dispatch to dot-product 3x3 kernels when supported.
+//
+// ruy::Context *ruy_context = cpu_backend_context->ruy_context();
+// const bool has_dot_product_instructions =
+// ruy_context != nullptr &&
+// (ruy_context->GetRuntimeEnabledPaths() & ruy::Path::kNeonDotprod) != ruy::Path::kNone;
+// if (has_dot_product_instructions)
+// {
+// using optimized_ops::depthwise_conv::DotProduct3x3KernelType;
+// DotProduct3x3KernelType kernel_type =
+// optimized_ops::depthwise_conv::CategorizeDotProductKernel(
+// input_shape, filter_shape, params);
+// if (kernel_type != DotProduct3x3KernelType::kNone)
+// {
+// optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3<
+// DepthwiseConvImplementation::kUseNeon3x3DotProduct>(params, input_shape, input_data,
+// filter_shape, filter_data,
+// bias_shape,
+// bias_data, output_shape,
+// output_data);
+// return;
+// }
+// }
+//
+// // Dispatch to non-dot-product 3x3 kernels when supported.
+//
+// const int stride_width = params.stride_width;
+// const int stride_height = params.stride_height;
+// const int pad_width = params.padding_values.width;
+// const int pad_height = params.padding_values.height;
+// const int output_shift = params.output_shift;
+//
+// // Call kernel optimized for depthwise convolutions using 3x3 filters if
+// // parameters are supported.
+// if (depthwise_conv::Fast3x3FilterKernelSupported(input_shape, filter_shape, stride_width,
+// stride_height, dilation_width_factor,
+// dilation_height_factor, pad_width, pad_height,
+// depth_multiplier, output_shape, output_shift))
+// {
+// depthwise_conv::DepthwiseConv3x3Filter<kOutputRounding>(
+// params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+// output_shape, output_data, thread_start, thread_end, thread_dim);
+// return;
+// }
+#endif
+
+ depthwise_conv::DepthwiseConvGeneral(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ thread_start, thread_end, thread_dim);
+}
+
+inline void DepthwiseConvImpl(const DepthwiseConvParams ¶ms, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &filter_shape,
+ const uint8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape,
+ uint8_t *output_data, int thread_start, int thread_end,
+ int thread_dim)
+{
+ return DepthwiseConvWithRounding(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, thread_start,
+ thread_end, thread_dim);
+}
+
} // namespace optimized
} // namespace cker
} // namespace nnfw
{
const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
const int bottom_start =
- output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
+ output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
memset(conv_buffer_data + bottom_start, zero_byte, (bottom_row_elements * sizeof(T)));
}
}
for (int batch = 0; batch < batches; ++batch)
{
const T zero_byte =
- zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
+ zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
for (int out_y = 0; out_y < output_height; ++out_y)
{
for (int out_x = 0; out_x < output_width; ++out_x)
{
const float *lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const float *rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
- float *out_ptr =
- output_data +
- ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * lhs_rows * rhs_cols;
+ float *out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
+ lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j)
{
for (int i = 0; i < lhs_rows; ++i)
const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < size; i++)
{
- output_data[i] =
- ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
- params.float_activation_min, params.float_activation_max);
+ output_data[i] = ActivationFunctionWithMinMax(
+ fn(input1_data[i], input2_data[i]), params.float_activation_min, params.float_activation_max);
}
}
template <typename T>
inline void BroadcastBinaryArithmeticOpSlowQuant8(
- const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const T *input1_data,
- const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
- const std::function<T(const BinaryArithmeticOpParam ¶ms, const T &, const T &)> &fn)
+ const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const T *input1_data,
+ const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
+ const std::function<T(const BinaryArithmeticOpParam ¶ms, const T &, const T &)> &fn)
{
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
for (int c = 0; c < extended_output_shape.Dims(3); ++c)
{
output_data[Offset(extended_output_shape, b, y, x, c)] =
- ActivationFunctionWithMinMax<uint8_t>(
- fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
- input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
- params.quantized_activation_min, params.quantized_activation_max);
+ ActivationFunctionWithMinMax<uint8_t>(
+ fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+ input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+ params.quantized_activation_min, params.quantized_activation_max);
}
}
}
for (int c = 0; c < extended_output_shape.Dims(3); ++c)
{
output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
- fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
- input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
- params.quantized_activation_min, params.quantized_activation_max);
+ fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+ input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+ params.quantized_activation_min, params.quantized_activation_max);
}
}
}
template <>
inline void BroadcastBinaryArithmeticOpSlow(
- const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const float *input1_data,
- const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
- float *output_data, const std::function<float(const float &, const float &)> &fn)
+ const BinaryArithmeticOpParam ¶ms, const Shape &input1_shape, const float *input1_data,
+ const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
+ float *output_data, const std::function<float(const float &, const float &)> &fn)
{
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
{
for (int c = 0; c < extended_output_shape.Dims(3); ++c)
{
- output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax(
- fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
- input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
- params.float_activation_min, params.float_activation_max);
+ output_data[Offset(extended_output_shape, b, y, x, c)] =
+ ActivationFunctionWithMinMax(fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
+ input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
+ params.float_activation_min, params.float_activation_max);
}
}
}
bias_value = bias_data[out_channel];
}
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
- ActivationFunctionWithMinMax(total + bias_value, output_activation_min,
- output_activation_max);
+ ActivationFunctionWithMinMax(total + bias_value, output_activation_min,
+ output_activation_max);
}
}
}
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
- static_cast<uint8_t>(acc);
+ static_cast<uint8_t>(acc);
}
}
}
ruy::Matrix<Scalar> *dst, bool use_caching = false)
{
ruy::Order ruy_order =
- params.order == Order::kColMajor ? ruy::Order::kColMajor : ruy::Order::kRowMajor;
+ params.order == Order::kColMajor ? ruy::Order::kColMajor : ruy::Order::kRowMajor;
ruy::MakeSimpleLayout(params.rows, params.cols, ruy_order, dst->mutable_layout());
// Note that ruy::Matrix::data is a ConstCheckingPtr, not a plain pointer.
// It does care whether we assign to it a Scalar* or a const Scalar*.
--- /dev/null
+nnfw_find_package(Ruy REQUIRED)
+
+add_library(nnfw_lib_ruy INTERFACE)
+target_link_libraries(nnfw_lib_ruy INTERFACE ruy)
+target_link_libraries(nnfw_lib_ruy INTERFACE ruy_instrumentation)
+target_compile_definitions(nnfw_lib_ruy INTERFACE USE_RUY_GEMV)
+if(PROFILE_RUY)
+ target_link_libraries(nnfw_lib_ruy INTERFACE ruy_profiler)
+endif(PROFILE_RUY)
+
+target_include_directories(nnfw_lib_ruy INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_NEON_TENSOR_UTILS_H__
+#define __NNFW_RUY_NEON_TENSOR_UTILS_H__
+
+#include "ruy/neon/neon_check.h"
+
+#ifdef USE_NEON
+
+#define kFloatWeightsPerNeonLane 4
+
+namespace nnfw
+{
+namespace ruy
+{
+
+inline bool NeonIsZeroVector(const float *vector, int v_size)
+{
+ // If v_size is not divisible by kFloatWeightsPerNeonLane, we cannot
+ // use the main vectorized loop, and we need to process sequentially.
+ // postamble_start shows the start index where this should happen.
+ const int postamble_start = v_size - (v_size & (kFloatWeightsPerNeonLane - 1));
+
+ const float32x4_t zero_x4_float = vmovq_n_f32(0.0f);
+ for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane)
+ {
+ const float32x4_t i_x4_float = vld1q_f32(vector + v);
+ uint32x4_t cmp_result = vceqq_f32(i_x4_float, zero_x4_float);
+ if (vgetq_lane_u32(cmp_result, 0) == 0)
+ return false;
+ if (vgetq_lane_u32(cmp_result, 1) == 0)
+ return false;
+ if (vgetq_lane_u32(cmp_result, 2) == 0)
+ return false;
+ if (vgetq_lane_u32(cmp_result, 3) == 0)
+ return false;
+ }
+
+ // Postamble loop
+ for (int v = postamble_start; v < v_size; ++v)
+ {
+ if (vector[v] != 0.0)
+ return false;
+ }
+ return true;
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // USE_NEON
+
+#endif // __NNFW_RUY_NEON_TENSOR_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__
+#define __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__
+
+namespace nnfw
+{
+namespace ruy
+{
+
+inline bool PortableIsZeroVector(const float *vector, int v_size)
+{
+ for (int i = 0; i < v_size; ++i)
+ {
+ if (*vector++ != 0.0f)
+ return false;
+ }
+ return true;
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_PORTABLE_TENSOR_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_RUY_SUPPORT_H__
+#define __NNFW_RUY_RUY_SUPPORT_H__
+
+#include <util/ConfigSource.h>
+#include <ruy/matrix.h>
+#include <ruy/ruy.h>
+#include <cassert>
+#include "Types.h"
+
+namespace nnfw
+{
+namespace ruy
+{
+namespace ruy_support
+{
+
+inline ::ruy::CachePolicy ToRuyCachePolicy(CachePolicy cache_policy)
+{
+ switch (cache_policy)
+ {
+ case CachePolicy::kNeverCache:
+ return ::ruy::CachePolicy::kNeverCache;
+ case CachePolicy::kCacheIfLargeSpeedup:
+ return ::ruy::CachePolicy::kCacheIfLargeSpeedup;
+ case CachePolicy::kAlwaysCache:
+ return ::ruy::CachePolicy::kAlwaysCache;
+ default:
+ assert(false);
+ return ::ruy::CachePolicy::kNeverCache;
+ }
+}
+
+template <typename Scalar, typename DataPointer>
+void MakeRuyMatrix(const MatrixParams<Scalar> ¶ms, DataPointer data_ptr,
+ ::ruy::Matrix<Scalar> *dst, bool use_caching = false)
+{
+ ::ruy::Order ruy_order =
+ params.order == Order::kColMajor ? ::ruy::Order::kColMajor : ::ruy::Order::kRowMajor;
+ ::ruy::MakeSimpleLayout(params.rows, params.cols, ruy_order, dst->mutable_layout());
+ // Note that ruy::Matrix::data is a ConstCheckingPtr, not a plain pointer.
+ // It does care whether we assign to it a Scalar* or a const Scalar*.
+ dst->set_data(data_ptr);
+ dst->set_zero_point(params.zero_point);
+ if (use_caching)
+ {
+ dst->set_cache_policy(ToRuyCachePolicy(params.cache_policy));
+ }
+}
+
+template <typename GemmParamsType, typename RuySpecType>
+void MakeRuyMulParams(const GemmParamsType ¶ms, RuySpecType *ruy_mul_params)
+{
+ // This validation has already been performed by the Gemm API entry point,
+ // but it doesn't hurt to test specifically this again here, where it's
+ // being used.
+ ValidateGemmParams(params);
+
+ ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
+ ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
+ ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
+ ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+ ruy_mul_params->set_bias(params.bias);
+ ruy_mul_params->set_clamp_min(params.clamp_min);
+ ruy_mul_params->set_clamp_max(params.clamp_max);
+}
+
+} // namespace ruy_support
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_RUY_SUPPORT_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_SHAPE_H__
+#define __NNFW_RUY_SHAPE_H__
+
+#include <algorithm>
+#include <cstring>
+#include <cassert>
+#include <vector>
+
+#define UNUSED_RELEASE(a) (void)(a)
+
+namespace nnfw
+{
+namespace ruy
+{
+
+class Shape
+{
+public:
+ // Shapes with dimensions up to 5 are stored directly in the structure, while
+ // larger shapes are separately allocated.
+ static constexpr int kMaxSmallSize = 5;
+
+ Shape &operator=(Shape const &) = delete;
+
+ Shape() : _size(0) {}
+
+ explicit Shape(int dimensions_count) : _size(dimensions_count)
+ {
+ if (dimensions_count > kMaxSmallSize)
+ {
+ _dims_pointer = new int32_t[dimensions_count];
+ }
+ }
+
+ Shape(int shape_size, int32_t value) : _size(0)
+ {
+ Resize(shape_size);
+ for (int i = 0; i < shape_size; ++i)
+ {
+ SetDim(i, value);
+ }
+ }
+
+ Shape(int dimensions_count, const int32_t *dims_data) : _size(0)
+ {
+ ReplaceWith(dimensions_count, dims_data);
+ }
+
+ Shape(const std::initializer_list<int> init_list) : _size(0) { BuildFrom(init_list); }
+
+ // Avoid using this constructor. We should be able to delete it when C++17
+ // rolls out.
+ Shape(Shape const &other) : _size(other.DimensionsCount())
+ {
+ if (_size > kMaxSmallSize)
+ {
+ _dims_pointer = new int32_t[_size];
+ }
+ std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * _size);
+ }
+
+ bool operator==(const Shape &comp) const
+ {
+ return this->_size == comp._size &&
+ std::memcmp(DimsData(), comp.DimsData(), _size * sizeof(int32_t)) == 0;
+ }
+
+ ~Shape()
+ {
+ if (_size > kMaxSmallSize)
+ {
+ delete[] _dims_pointer;
+ }
+ }
+
+ inline int32_t DimensionsCount() const { return _size; }
+ inline int32_t Dims(int i) const
+ {
+ assert(i >= 0);
+ assert(i < _size);
+ return _size > kMaxSmallSize ? _dims_pointer[i] : _dims[i];
+ }
+ inline void SetDim(int i, int32_t val)
+ {
+ assert(i >= 0);
+ assert(i < _size);
+ if (_size > kMaxSmallSize)
+ {
+ _dims_pointer[i] = val;
+ }
+ else
+ {
+ _dims[i] = val;
+ }
+ }
+
+ inline int32_t *DimsData() { return _size > kMaxSmallSize ? _dims_pointer : _dims; }
+ inline const int32_t *DimsData() const { return _size > kMaxSmallSize ? _dims_pointer : _dims; }
+ // The caller must ensure that the shape is no bigger than 4-D.
+ inline const int32_t *DimsDataUpTo4D() const { return _dims; }
+
+ inline void Resize(int dimensions_count)
+ {
+ if (_size > kMaxSmallSize)
+ {
+ delete[] _dims_pointer;
+ }
+ _size = dimensions_count;
+ if (dimensions_count > kMaxSmallSize)
+ {
+ _dims_pointer = new int32_t[dimensions_count];
+ }
+ }
+
+ inline void ReplaceWith(int dimensions_count, const int32_t *dims_data)
+ {
+ Resize(dimensions_count);
+ int32_t *dst_dims = DimsData();
+ std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
+ }
+
+ inline void ReplaceWith(const Shape &other)
+ {
+ ReplaceWith(other.DimensionsCount(), other.DimsData());
+ }
+
+ inline void ReplaceWith(Shape &&other)
+ {
+ Resize(0);
+ std::swap(_size, other._size);
+ if (_size <= kMaxSmallSize)
+ std::copy(other._dims, other._dims + kMaxSmallSize, _dims);
+ else
+ _dims_pointer = other._dims_pointer;
+ }
+
+ template <typename T> inline void BuildFrom(const T &src_iterable)
+ {
+ const int dimensions_count = std::distance(src_iterable.begin(), src_iterable.end());
+ Resize(dimensions_count);
+ int32_t *data = DimsData();
+ for (auto it : src_iterable)
+ {
+ *data = it;
+ ++data;
+ }
+ }
+
+ // This will probably be factored out. Old code made substantial use of 4-D
+ // shapes, and so this function is used to extend smaller shapes. Note that
+ // (a) as Dims<4>-dependent code is eliminated, the reliance on this should be
+ // reduced, and (b) some kernels are stricly 4-D, but then the shapes of their
+ // inputs should already be 4-D, so this function should not be needed.
+ inline static Shape ExtendedShape(int new_shape_size, const Shape &shape)
+ {
+ return Shape(new_shape_size, shape, 1);
+ }
+
+ inline void BuildFrom(const std::initializer_list<int> init_list)
+ {
+ BuildFrom<const std::initializer_list<int>>(init_list);
+ }
+
+ // Returns the total count of elements, that is the size when flattened into a
+ // vector.
+ inline int FlatSize() const
+ {
+ int buffer_size = 1;
+ const int *dims_data = DimsData();
+ for (int i = 0; i < _size; i++)
+ {
+ const int dim = dims_data[i];
+ assert(dim >= 1);
+ buffer_size *= dim;
+ }
+ return buffer_size;
+ }
+
+ bool operator!=(const Shape &comp) const { return !((*this) == comp); }
+
+private:
+ // For use only by ExtendedShape(), written to guarantee (return-value) copy
+ // elision in C++17.
+ // This creates a shape padded to the desired size with the specified value.
+ Shape(int new_shape_size, const Shape &shape, int pad_value) : _size(0)
+ {
+ assert(new_shape_size >= shape.DimensionsCount());
+ assert(new_shape_size <= kMaxSmallSize);
+ Resize(new_shape_size);
+ const int size_increase = new_shape_size - shape.DimensionsCount();
+ for (int i = 0; i < size_increase; ++i)
+ {
+ SetDim(i, pad_value);
+ }
+ std::memcpy(DimsData() + size_increase, shape.DimsData(),
+ sizeof(int32_t) * shape.DimensionsCount());
+ }
+
+ int32_t _size;
+ union {
+ int32_t _dims[kMaxSmallSize];
+ int32_t *_dims_pointer{nullptr};
+ };
+};
+
+inline int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2)
+{
+ UNUSED_RELEASE(shape2);
+ UNUSED_RELEASE(index2);
+ assert(shape1.Dims(index1) == shape2.Dims(index2));
+ return shape1.Dims(index1);
+}
+
+template <typename... Args>
+int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2, Args... args)
+{
+ assert(shape1.Dims(index1) == shape2.Dims(index2));
+ UNUSED_RELEASE(shape2);
+ UNUSED_RELEASE(index2);
+ return MatchingDim(shape1, index1, args...);
+}
+
+inline Shape GetShape(const std::vector<int32_t> &data) { return Shape(data.size(), data.data()); }
+
+inline int Offset(const Shape &shape, int i0, int i1, int i2, int i3)
+{
+ assert(shape.DimensionsCount() == 4);
+ const int *dims_data = shape.DimsDataUpTo4D();
+ assert(i0 >= 0 && i0 < dims_data[0]);
+ assert(i1 >= 0 && i1 < dims_data[1]);
+ assert(i2 >= 0 && i2 < dims_data[2]);
+ assert(i3 >= 0 && i3 < dims_data[3]);
+ return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
+}
+
+inline int Offset(const Shape &shape, int *index)
+{
+ return Offset(shape, index[0], index[1], index[2], index[3]);
+}
+
+inline int FlatSizeSkipDim(const Shape &shape, int skip_dim)
+{
+ const int dims_count = shape.DimensionsCount();
+ assert(skip_dim >= 0 && skip_dim < dims_count);
+ const auto *dims_data = shape.DimsData();
+ int flat_size = 1;
+ for (int i = 0; i < dims_count; ++i)
+ {
+ flat_size *= (i == skip_dim) ? 1 : dims_data[i];
+ }
+ return flat_size;
+}
+
+// Flat size calculation, checking that dimensions match with one or more other
+// arrays.
+template <typename... Ts> inline bool checkMatching(const Shape &shape, Ts... check_shapes)
+{
+ const Shape check_shapes_array[sizeof...(Ts)] = {std::forward<Ts>(check_shapes)...};
+ for (const auto &check_shape : check_shapes_array)
+ {
+ // Check matching of shapes except the case of that two shapes can be scalar
+ if (shape.DimensionsCount() > 1 || check_shape.DimensionsCount() > 1 || shape.FlatSize() != 1 ||
+ check_shape.FlatSize() != 1)
+ {
+ if (shape.DimensionsCount() != check_shape.DimensionsCount())
+ {
+ return false;
+ }
+ for (int i = 0; i < shape.DimensionsCount(); ++i)
+ {
+ if (shape.Dims(i) != check_shape.Dims(i))
+ {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+struct UNUSED_ALL
+{
+ template <typename... Args> UNUSED_ALL(Args const &...) {}
+};
+template <typename... Ts> inline int MatchingFlatSize(const Shape &shape, Ts... check_shapes)
+{
+ UNUSED_ALL{check_shapes...};
+ assert(checkMatching(shape, std::forward<Ts>(check_shapes)...));
+ return shape.FlatSize();
+}
+
+inline int MatchingFlatSizeSkipDim(const Shape &shape, int skip_dim, const Shape &check_shape_0)
+{
+ UNUSED_RELEASE(check_shape_0);
+ const int dims_count = shape.DimensionsCount();
+ for (int i = 0; i < dims_count; ++i)
+ {
+ if (i != skip_dim)
+ {
+ assert(shape.Dims(i) == check_shape_0.Dims(i));
+ }
+ }
+ return FlatSizeSkipDim(shape, skip_dim);
+}
+
+inline int MatchingFlatSizeSkipDim(const Shape &shape, int skip_dim, const Shape &check_shape_0,
+ const Shape &check_shape_1)
+{
+ UNUSED_RELEASE(check_shape_0);
+ const int dims_count = shape.DimensionsCount();
+ for (int i = 0; i < dims_count; ++i)
+ {
+ if (i != skip_dim)
+ {
+ assert(shape.Dims(i) == check_shape_0.Dims(i));
+ }
+ }
+ return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1);
+}
+
+inline int MatchingElementsSize(const Shape &shape, const Shape &check_shape_0,
+ const Shape &check_shape_1)
+{
+ const int size_1 = shape.FlatSize();
+ const int size_2 = check_shape_0.FlatSize();
+ const int size_3 = check_shape_1.FlatSize();
+ assert(size_1 == size_2);
+ assert(size_2 == size_3);
+ UNUSED_RELEASE(size_2);
+ UNUSED_RELEASE(size_3);
+ return size_1;
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_SHAPE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_TENSOR_UTILS_H__
+#define __NNFW_RUY_TENSOR_UTILS_H__
+
+#include "ruy/PortableTensorUtils.h"
+#include "ruy/NeonTensorUtils.h"
+
+namespace nnfw
+{
+namespace ruy
+{
+
+inline bool IsZeroVector(const float *vector, int v_size)
+{
+ return NEON_OR_PORTABLE(IsZeroVector, vector, v_size);
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_TENSOR_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_TYPES_H__
+#define __NNFW_RUY_TYPES_H__
+
+#include <cassert>
+#include <cstdint>
+#include <type_traits>
+#include <limits>
+#include <string>
+#include "Shape.h"
+
+namespace nnfw
+{
+namespace ruy
+{
+
+enum class FusedActivationFunctionType
+{
+ kNone = 0,
+ kRelu6 = 1,
+ kRelu1 = 2,
+ kRelu = 3,
+ kTanh = 4,
+ kSigmoid = 6,
+};
+
+enum class PaddingType
+{
+ kNone = 0,
+ kSame = 1,
+ kValid = 2,
+};
+
+struct PaddingValues
+{
+ int16_t width;
+ int16_t height;
+};
+
+struct ConvParams
+{
+ PaddingType padding_type;
+ PaddingValues padding_values;
+ // TODO(starka): This was just "stride", so check that width+height is OK.
+ int16_t stride_width;
+ int16_t stride_height;
+ int16_t dilation_width_factor;
+ int16_t dilation_height_factor;
+ // uint8_t inference params.
+ // TODO(b/65838351): Use smaller types if appropriate.
+ int32_t input_offset;
+ int32_t weights_offset;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int output_shift;
+ // uint8_t, etc, activation params.
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ // float activation params.
+ float float_activation_min;
+ float float_activation_max;
+ bool is_replaced_weights{false};
+};
+
+struct FullyConnectedParams
+{
+ FusedActivationFunctionType activation{FusedActivationFunctionType::kNone};
+ // uint8 inference params.
+ // TODO(b/65838351): Use smaller types if appropriate.
+ int32_t input_offset;
+ int32_t weights_offset;
+ float weights_scale;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int output_shift;
+ // uint8, etc, activation params.
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ // float activation params - no one use this params, but ruy might use them later.
+ float float_activation_min;
+ float float_activation_max;
+ // Mark the operands as cacheable if they are unchanging, e.g. weights.
+ bool lhs_cacheable;
+ bool rhs_cacheable;
+ // FullyConnectedWeightsFormat weights_format;
+};
+
+enum class Order
+{
+ kColMajor,
+ kRowMajor
+};
+
+enum class CachePolicy : std::uint8_t
+{
+ kNeverCache,
+ kCacheIfLargeSpeedup,
+ kAlwaysCache,
+};
+
+// MatrixParams encapsulates the parameters that Gemm needs about each
+// matrix, besides the buffer data pointer.
+// Compare to ruy::Matrix, which also encapsulates the data pointer.
+// Rationale for leaving the data pointer out of here: doing so
+// requires complicated const-correctness mechanics. See
+// ruy::ConstCheckingPtr.
+template <typename Scalar> struct MatrixParams
+{
+ // Storage layout order. For now we only do plain linear non-strided
+ // layout. It would be easy to support a stride if needed.
+ Order order = Order::kColMajor;
+ // Number of rows of the matrix.
+ int rows = 0;
+ // Number of columns of the matrix.
+ int cols = 0;
+ // The zero_point, i.e. which Scalar value is to be interpreted as zero.
+ // When Scalar is floating-point, this must be 0.
+ Scalar zero_point = 0;
+ // When the data pointed to by this matrix is constant data, so that it is
+ // valid to assume that equality of pointers implies equality of data,
+ // a CachePolicy may be used instead of the default kNeverCache,
+ // which will enable ruy to take advantage of this constancy of the data to
+ // cache the packing work, which can be a large speedup in matrix*vector
+ // and other narrow shapes.
+ CachePolicy cache_policy = CachePolicy::kNeverCache;
+};
+
+// Enumeration of broad categories of Gemm.
+//
+// The primary reason for this to exist is to allow Gemm to compile
+// only uniform-quantized or only per-channel-quantized code paths.
+// This is unneeded with ruy as the back-end, as this is only a runtime
+// difference in ruy, but with gemmlowp these really are separate code
+// paths and templatizing in a QuantizationFlavor is necessary to avoid
+// compiling unused gemmlowp code. Indeed, TFLite currently uses
+// uint8 with uniform quantization and int8 with per-channel quantization,
+// and does not use uint8 with per-channel. We want to avoid compiling
+// the gemmlowp uint8 per-channel path when gemmlowp is the back-end.
+//
+// It's possible to drop this in the future if gemmlowp goes away and no
+// other then-relevant backend library handles quantized paths in a way that
+// requires knowing this at compile-time.
+enum class QuantizationFlavor
+{
+ // Floating-point Gemm: the accumulators are not multiplied by any
+ // 'multiplier'.
+ kFloatingPoint,
+ // Quantized Gemm using a single multiplier for all accumulators.
+ kIntegerWithUniformMultiplier,
+ // Quantized Gemm using a separate multipliers for accumulators of each
+ // row of the destination matrix. This is what is called 'per-channel'
+ // in GemmParams. Here we use the more specific 'per-row' terminology
+ // to allow for the possibility of 'per-column' in the future, and to
+ // allow for that to be a separate code path in some back-end such as
+ // gemmlowp.
+ kIntegerWithPerRowMultiplier
+};
+
+// Additional parameters that Gemm needs, beyond what falls into
+// the MatrixParams that it takes. Compare to ruy::Spec.
+//
+// Decoupling AccumScalar from DstScalar (rather than deducing it from that)
+// is useful future-proofing. Think of a float16 path using float32 accum.
+//
+// QuantizationFlavor is passed here even though it's technically not used
+// in this class. This is so that we retain the ability in the future to
+// specialize this class for quantization flavor, and this allows for
+// Gemm to be templatized in quantization_flavor via the GemmParams that it
+// takes, allowing for automatic template parameter deduction to take place,
+// so that most call sites don't need to specify a QuantizationFlavor
+// (only those that need perchannel quantization do).
+template <typename AccumScalar, typename DstScalar,
+ QuantizationFlavor quantization_flavor =
+ std::is_floating_point<AccumScalar>::value
+ ? QuantizationFlavor::kFloatingPoint
+ : QuantizationFlavor::kIntegerWithUniformMultiplier>
+struct GemmParams
+{
+ // Only for non-floating-point cases. The fixed-point part (i.e. the mantissa)
+ // of the multiplier by which accumulators are multiplied before being casted
+ // to the destination type.
+ AccumScalar multiplier_fixedpoint = 0;
+ // Only for non-floating-point cases. The exponent part of the aforementioned
+ // multiplier.
+ int multiplier_exponent = 0;
+ // Per-channel variant of multiplier_fixedpoint. If not nullptr, this must
+ // point to a buffer of as many values as there are rows in the destination
+ // matrix. Each row of the destination matrix will use the corresponding
+ // buffer element instead of multiplier_fixedpoint.
+ const AccumScalar *multiplier_fixedpoint_perchannel = nullptr;
+ // Per-channel variant of multiplier_exponent. If not nullptr, this must
+ // point to a buffer of as many values as there are rows in the destination
+ // matrix. Each row of the destination matrix will use the corresponding
+ // buffer element instead of multiplier_exponent.
+ //
+ // Either none or both of multiplier_exponent_perchannel and
+ // multiplier_fixedpoint_perchannel must be nullptr.
+ const int *multiplier_exponent_perchannel = nullptr;
+ // The bias vector data, if not null.
+ const AccumScalar *bias = nullptr;
+ // min clamp bound of destination values.
+ DstScalar clamp_min = std::is_floating_point<DstScalar>::value
+ ? -std::numeric_limits<DstScalar>::infinity()
+ : std::numeric_limits<DstScalar>::lowest();
+ // max clamp bound of destination values.
+ DstScalar clamp_max = std::is_floating_point<DstScalar>::value
+ ? std::numeric_limits<DstScalar>::infinity()
+ : std::numeric_limits<DstScalar>::max();
+};
+
+// Validates self-consistency of GemmParams.
+template <typename AccumScalar, typename DstScalar, QuantizationFlavor quantization_flavor>
+void ValidateGemmParams(const GemmParams<AccumScalar, DstScalar, quantization_flavor> ¶ms)
+{
+ // Guard consistency of the quantized multiplier fields.
+ if (quantization_flavor == QuantizationFlavor::kFloatingPoint)
+ {
+ assert(!params.multiplier_fixedpoint);
+ assert(!params.multiplier_exponent);
+ assert(!params.multiplier_fixedpoint_perchannel);
+ assert(!params.multiplier_exponent_perchannel);
+ }
+ else if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier &&
+ !std::is_same<DstScalar, int32_t>::value)
+ {
+ assert(params.multiplier_fixedpoint);
+ // Nothing to check about multiplier_exponent
+ assert(!params.multiplier_fixedpoint_perchannel);
+ assert(!params.multiplier_exponent_perchannel);
+ }
+ else if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier &&
+ !std::is_same<DstScalar, int32_t>::value)
+ {
+ assert(!params.multiplier_fixedpoint);
+ assert(!params.multiplier_exponent);
+ assert(params.multiplier_fixedpoint_perchannel);
+ assert(params.multiplier_exponent_perchannel);
+ }
+ else
+ {
+ // For the get raw accumulator case, we should make sure none of the
+ // quantization params are set.
+ assert(!params.multiplier_fixedpoint);
+ assert(!params.multiplier_exponent);
+ assert(!params.multiplier_fixedpoint_perchannel);
+ assert(!params.multiplier_exponent_perchannel);
+ }
+ UNUSED_RELEASE(params);
+}
+
+inline CachePolicy DefaultCachePolicy(bool is_constant_data)
+{
+ return is_constant_data ? CachePolicy::kCacheIfLargeSpeedup : CachePolicy::kNeverCache;
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_TYPES_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_UTILS_H__
+#define __NNFW_RUY_UTILS_H__
+
+#include "Types.h"
+#include "Shape.h"
+
+#include <stdexcept>
+
+namespace nnfw
+{
+namespace ruy
+{
+template <typename T>
+inline void ExtractPatchIntoBufferColumn(const Shape &input_shape, int w, int h, int b, int kheight,
+ int kwidth, int stride_width, int stride_height,
+ int pad_width, int pad_height, int in_width, int in_height,
+ int in_depth, int single_buffer_length, int buffer_id,
+ const T *in_data, T *conv_buffer_data, uint8_t zero_byte)
+{
+ assert(input_shape.DimensionsCount() == 4);
+ // This chunk of code reshapes all the inputs corresponding to
+ // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
+ const int kwidth_times_indepth = kwidth * in_depth;
+ const int inwidth_times_indepth = in_width * in_depth;
+ const int ih_ungated_start = h * stride_height - pad_height;
+ const int ih_ungated_end = (ih_ungated_start + kheight);
+ const int ih_end = std::min(ih_ungated_end, in_height);
+ const int iw_ungated_start = w * stride_width - pad_width;
+ const int iw_ungated_end = (iw_ungated_start + kwidth);
+ const int iw_end = std::min(iw_ungated_end, in_width);
+ // If the patch is off the edge of the input image, skip writing those rows
+ // and columns from the patch into the output array.
+ const int h_offset = std::max(0, -ih_ungated_start);
+ const int w_offset = std::max(0, -iw_ungated_start);
+ const int ih_start = std::max(0, ih_ungated_start);
+ const int iw_start = std::max(0, iw_ungated_start);
+ const int single_row_num = std::min(kwidth - w_offset, in_width - iw_start) * in_depth;
+ const int output_row_offset = (buffer_id * single_buffer_length);
+ int out_offset = output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
+ int in_offset = Offset(input_shape, b, ih_start, iw_start, 0);
+
+ // Express all of the calculations as padding around the input patch.
+ const int top_padding = h_offset;
+ const int bottom_padding = (ih_ungated_end - ih_end);
+ const int left_padding = w_offset;
+ const int right_padding = (iw_ungated_end - iw_end);
+ assert(single_row_num == ((kwidth - (left_padding + right_padding)) * in_depth));
+
+ // Write out zeroes to the elements representing the top rows of the input
+ // patch that are off the edge of the input image.
+ if (top_padding > 0)
+ {
+ const int top_row_elements = (top_padding * kwidth * in_depth);
+ memset(conv_buffer_data + output_row_offset, zero_byte, (top_row_elements * sizeof(T)));
+ }
+
+ // If the patch is on the interior of the input image horizontally, just copy
+ // over the rows sequentially, otherwise add zero padding at the start or end.
+ if ((left_padding == 0) && (right_padding == 0))
+ {
+ for (int ih = ih_start; ih < ih_end; ++ih)
+ {
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T));
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ }
+ else
+ {
+ for (int ih = ih_start; ih < ih_end; ++ih)
+ {
+ if (left_padding > 0)
+ {
+ const int left_start = (out_offset - (left_padding * in_depth));
+ memset(conv_buffer_data + left_start, zero_byte, (left_padding * in_depth * sizeof(T)));
+ }
+ memcpy(conv_buffer_data + out_offset, in_data + in_offset, single_row_num * sizeof(T));
+ if (right_padding > 0)
+ {
+ const int right_start = (out_offset + single_row_num);
+ memset(conv_buffer_data + right_start, zero_byte, (right_padding * in_depth * sizeof(T)));
+ }
+ out_offset += kwidth_times_indepth;
+ in_offset += inwidth_times_indepth;
+ }
+ }
+
+ // If the bottom of the patch falls off the input image, pad the values
+ // representing those input rows with zeroes.
+ if (bottom_padding > 0)
+ {
+ const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
+ const int bottom_start =
+ output_row_offset + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
+ memset(conv_buffer_data + bottom_start, zero_byte, (bottom_row_elements * sizeof(T)));
+ }
+}
+
+// Supports per-batch zero_byte for per-batch asymmetric quantized inputs.
+template <typename T>
+void DilatedIm2col(const ConvParams ¶ms, const Shape &input_shape, const T *input_data,
+ const Shape &filter_shape, const Shape &output_shape, T *im2col_data,
+ const int32_t *zero_bytes, const int zero_bytes_len)
+{
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ // For dilated convolution, the input pixels are not contiguous therefore we
+ // can't use the same optimizations as Im2Col(). Though note this code would
+ // work fine for the non-dilated case too (though likely a bit slower).
+ assert(dilation_width_factor != 1 || dilation_height_factor != 1);
+ assert(im2col_data);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ MatchingDim(output_shape, 3, filter_shape, 0);
+
+ // Construct the MxN sized im2col matrix.
+ // The rows M, are sub-ordered B x H x W
+ const Shape row_shape({1, batches, output_height, output_width});
+ // The columns, N, are sub-ordered Kh x Kw x Din
+ const Shape col_shape({1, filter_height, filter_width, input_depth});
+ // Use dimensions M and N to construct dims for indexing directly into im2col
+ const Shape im2col_shape({1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
+
+ // Loop through the output rows (B x H x W)
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ const T zero_byte =
+ zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ // Each im2col row is an output pixel. Arrange the input data in this
+ // row in an order we can conveniently multiply with the filter data.
+ int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ // Loop through all the pixels of the filter (Kh x Kw)
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ if ((in_y >= 0) && (in_y < input_height))
+ {
+ // Filter row is within the input data.
+ // Loop through all the filter pixels in this row.
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
+ T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+ if ((in_x >= 0) && (in_x < input_width))
+ {
+ // Filter pixel is within the input, copy the input data.
+ T const *src = input_data + Offset(input_shape, batch, in_y, in_x, 0);
+ memcpy(dst, src, input_depth * sizeof(T));
+ }
+ else
+ {
+ // Filter pixel is outside the input, zero it out.
+ memset(dst, zero_byte, input_depth * sizeof(T));
+ }
+ }
+ }
+ else
+ {
+ // Filter row is outside the input, zero out the entire filter row.
+ int col_offset = Offset(col_shape, 0, filter_y, 0, 0);
+ T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+ memset(dst, zero_byte, filter_width * input_depth * sizeof(T));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+void DilatedIm2col(const ConvParams ¶ms, uint8_t zero_byte, const Shape &input_shape,
+ const T *input_data, const Shape &filter_shape, const Shape &output_shape,
+ T *im2col_data)
+{
+ const int32_t zero_point = static_cast<int32_t>(zero_byte);
+ DilatedIm2col<T>(params, input_shape, input_data, filter_shape, output_shape, im2col_data,
+ &zero_point, 1);
+}
+
+template <typename T>
+void Im2col(const ConvParams ¶ms, int kheight, int kwidth, uint8_t zero_byte,
+ const Shape &input_shape, const T *input_data, const Shape &output_shape,
+ T *output_data)
+{
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = input_shape.Dims(3);
+ const int input_width = input_shape.Dims(2);
+ const int input_height = input_shape.Dims(1);
+ const int output_depth = output_shape.Dims(3);
+ const int output_width = output_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+
+ int buffer_id = 0;
+ // Loop over the output nodes.
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int h = 0; h < output_height; ++h)
+ {
+ for (int w = 0; w < output_width; ++w)
+ {
+ ExtractPatchIntoBufferColumn(input_shape, w, h, b, kheight, kwidth, stride_width,
+ stride_height, pad_width, pad_height, input_width,
+ input_height, input_depth, output_depth, buffer_id, input_data,
+ output_data, zero_byte);
+ ++buffer_id;
+ }
+ }
+ }
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_NEON_CHECK_H__
+#define __NNFW_RUY_NEON_CHECK_H__
+
+#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define USE_NEON
+#include <arm_neon.h>
+#endif
+
+// Disable X86_NEON
+// #if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
+#if 0
+#define USE_NEON
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#pragma GCC diagnostic ignored "-Wattributes"
+#pragma GCC diagnostic ignored "-Wnarrowing"
+#pragma GCC diagnostic ignored "-Wsequence-point"
+#include "NEON_2_SSE.h"
+#pragma GCC diagnostic pop
+#endif
+
+// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
+// defined, PortableSomeFunc(args) otherwise.
+#ifdef USE_NEON
+// Always use Neon code
+#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
+
+#else
+// No NEON available: Use Portable code
+#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
+
+#endif // defined(USE_NEON)
+
+#endif // __NNFW_RUY_NEON_CHECK_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_CONV_H__
+#define __NNFW_RUY_CONV_H__
+
+#include "ruy/Types.h"
+#include "ruy/Shape.h"
+#include "ruy/Utils.h"
+#include "ruy/RuySupport.h"
+
+#include <ruy/ruy.h>
+#include <ruy/context.h>
+#include <iostream>
+#include <vector>
+
+namespace nnfw
+{
+namespace ruy
+{
+
+class Conv
+{
+public:
+ Conv() : _im2col_shape(4), _need_im2col(false), _prepared(false) {}
+
+ void prepare(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape,
+ uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor,
+ uint32_t dilation_height_factor)
+ {
+ if (!_prepared)
+ {
+ IsRequiredIm2col(input_shape, kernel_shape, output_shape, stride_width, stride_height,
+ dilation_width_factor, dilation_height_factor);
+ _prepared = true;
+ }
+ }
+
+ void operator()(const ConvParams ¶ms, const Shape &input_shape, const float *input_data,
+ const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
+ const float *bias_data, const Shape &output_shape, float *output_data,
+ ::ruy::Context *ruy_context)
+ {
+ if (!_prepared)
+ {
+ // This means that input or output are dynamic or filter is not constant
+ IsRequiredIm2col(input_shape, filter_shape, output_shape, params.stride_width,
+ params.stride_height, params.dilation_width_factor,
+ params.dilation_height_factor);
+ _prepared = true;
+ }
+
+ int im2col_size = _need_im2col ? _im2col_shape.FlatSize() : 0;
+
+ // Use heap if size is larger than 8MB
+ if (im2col_size > 2 * 1024 * 1024)
+ {
+ std::unique_ptr<float[]> im2col_data = std::make_unique<float[]>(im2col_size);
+ ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data, _im2col_shape, im2col_data.get(), ruy_context);
+ }
+ else if (im2col_size > 0)
+ {
+ float im2col_data[im2col_size];
+ ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data, _im2col_shape, im2col_data, ruy_context);
+ }
+ else
+ {
+ ConvFloat(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data, _im2col_shape, nullptr, ruy_context);
+ }
+ }
+
+private:
+ void ConvFloat(const ConvParams ¶ms, const Shape &input_shape, const float *input_data,
+ const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
+ const float *bias_data, const Shape &output_shape, float *output_data,
+ const Shape &im2col_shape, float *im2col_data, ::ruy::Context *ruy_context)
+ {
+ UNUSED_RELEASE(bias_shape);
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const float output_activation_min = params.float_activation_min;
+ const float output_activation_max = params.float_activation_max;
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ // NB: the float 0.0f value is represented by all zero bytes.
+ const uint8_t float_zero_byte = 0x00;
+ const float *gemm_input_data = nullptr;
+ const Shape *gemm_input_shape = nullptr;
+ const int filter_width = filter_shape.Dims(2);
+ const int filter_height = filter_shape.Dims(1);
+ const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
+ const bool need_im2col =
+ stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1;
+ if (need_dilated_im2col)
+ {
+ DilatedIm2col(params, float_zero_byte, input_shape, input_data, filter_shape, output_shape,
+ im2col_data);
+ gemm_input_data = im2col_data;
+ gemm_input_shape = &im2col_shape;
+ }
+ else if (need_im2col)
+ {
+ assert(im2col_data);
+ Im2col(params, filter_height, filter_width, float_zero_byte, input_shape, input_data,
+ im2col_shape, im2col_data);
+ gemm_input_data = im2col_data;
+ gemm_input_shape = &im2col_shape;
+ }
+ else
+ {
+ // TODO(aselle): We need to make sure to not send im2col if it is not
+ // needed.
+ assert(!im2col_data);
+ gemm_input_data = input_data;
+ gemm_input_shape = &input_shape;
+ }
+
+ const int gemm_input_dims = gemm_input_shape->DimensionsCount();
+ int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1);
+ int n = output_shape.Dims(3);
+ int k = gemm_input_shape->Dims(gemm_input_dims - 1);
+
+ // When an optimized CBLAS implementation is not available, fall back
+ // to using cpu_backend_gemm.
+ MatrixParams<float> lhs_params;
+ lhs_params.order = Order::kRowMajor;
+ lhs_params.rows = n;
+ lhs_params.cols = k;
+ MatrixParams<float> rhs_params;
+ rhs_params.order = Order::kColMajor;
+ rhs_params.rows = k;
+ rhs_params.cols = m;
+ MatrixParams<float> dst_params;
+ dst_params.order = Order::kColMajor;
+ dst_params.rows = n;
+ dst_params.cols = m;
+ GemmParams<float, float> gemm_params;
+ gemm_params.bias = bias_data;
+ gemm_params.clamp_min = output_activation_min;
+ gemm_params.clamp_max = output_activation_max;
+
+ // Below code is from tflite::cpu_backend_gemm::detail::GemmImplUsingRuy
+ ::ruy::Matrix<float> ruy_lhs;
+ ::ruy::Matrix<float> ruy_rhs;
+ ::ruy::Matrix<float> ruy_dst;
+ // Note that cache is always enabled for input and weight tensors
+ ruy_support::MakeRuyMatrix(lhs_params, filter_data, &ruy_lhs, true);
+ ruy_support::MakeRuyMatrix(rhs_params, gemm_input_data, &ruy_rhs, true);
+ ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst);
+
+ ::ruy::BasicSpec<float, float> ruy_mul_params;
+ ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
+
+ ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
+ }
+
+ void IsRequiredIm2col(const Shape &input_shape, const Shape &kernel_shape,
+ const Shape &output_shape, uint32_t stride_width, uint32_t stride_height,
+ uint32_t dilation_width_factor, uint32_t dilation_height_factor)
+ {
+ const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
+ const bool need_non_dilated_im2col = stride_width != 1 || stride_height != 1 ||
+ kernel_shape.Dims(1) != 1 || kernel_shape.Dims(2) != 1;
+
+ _need_im2col = need_dilated_im2col || need_non_dilated_im2col;
+
+ if (_need_im2col)
+ {
+ _im2col_shape.SetDim(0, output_shape.Dims(0));
+ _im2col_shape.SetDim(1, output_shape.Dims(1));
+ _im2col_shape.SetDim(2, output_shape.Dims(2));
+ _im2col_shape.SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2));
+ }
+ }
+
+private:
+ Shape _im2col_shape;
+ bool _need_im2col;
+ bool _prepared;
+};
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_CONV_H_
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_RUY_FULLY_CONNECTED_H__
+#define __NNFW_RUY_FULLY_CONNECTED_H__
+
+#include "ruy/Shape.h"
+#include "ruy/Types.h"
+#include "ruy/Utils.h"
+#include "ruy/RuySupport.h"
+
+#include <ruy/ruy.h>
+#include <ruy/context.h>
+
+namespace nnfw
+{
+namespace ruy
+{
+
+inline void FullyConnected(const FullyConnectedParams ¶ms, const Shape &input_shape,
+ const float *input_data, const Shape &weights_shape,
+ const float *weights_data, const Shape &,
+ const float *optional_bias_data, const Shape &output_shape,
+ float *output_data, ::ruy::Context *ruy_context)
+{
+ const int dims_count = weights_shape.DimensionsCount();
+ const int input_rows = weights_shape.Dims(dims_count - 1);
+ MatrixParams<float> rhs_params;
+ rhs_params.order = Order::kColMajor;
+ rhs_params.rows = input_rows;
+ rhs_params.cols = input_shape.FlatSize() / input_rows;
+ rhs_params.cache_policy = DefaultCachePolicy(params.rhs_cacheable);
+ assert(input_shape.FlatSize() == (rhs_params.rows * rhs_params.cols));
+ MatrixParams<float> lhs_params;
+ lhs_params.order = Order::kRowMajor;
+ lhs_params.cols = weights_shape.Dims(dims_count - 1);
+ lhs_params.rows = FlatSizeSkipDim(weights_shape, dims_count - 1);
+ lhs_params.cache_policy = DefaultCachePolicy(params.lhs_cacheable);
+ MatrixParams<float> dst_params;
+ dst_params.order = Order::kColMajor;
+ dst_params.rows = output_shape.Dims(output_shape.DimensionsCount() - 1);
+ dst_params.cols = FlatSizeSkipDim(output_shape, output_shape.DimensionsCount() - 1);
+ GemmParams<float, float> gemm_params;
+ gemm_params.bias = optional_bias_data;
+ gemm_params.clamp_min = params.float_activation_min;
+ gemm_params.clamp_max = params.float_activation_max;
+
+ // Below code was copied from tflite::cpu_backend_gemm::detail::GemmImplUsingRuy
+ ::ruy::Matrix<float> ruy_lhs;
+ ::ruy::Matrix<float> ruy_rhs;
+ ::ruy::Matrix<float> ruy_dst;
+ // Note that cache is always enabled for input and weight tensors
+ ruy_support::MakeRuyMatrix(lhs_params, weights_data, &ruy_lhs, true);
+ ruy_support::MakeRuyMatrix(rhs_params, input_data, &ruy_rhs, true);
+ ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst);
+
+ ::ruy::BasicSpec<float, float> ruy_mul_params;
+ ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
+
+ ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
+}
+
+} // namespace ruy
+} // namespace nnfw
+
+#endif // __NNFW_RUY_FULLY_CONNECTED_H__
const float start = 3;
const float limit = 1;
const float delta = -0.5;
- std::vector<float> expected = {
- 3, 2.5, 2, 1.5,
- };
+ std::vector<float> expected = {3, 2.5, 2, 1.5};
std::vector<float> actual(expected.size());
nnfw::cker::Range<float>(&start, &limit, &delta, actual.data());
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.11.1'
+release = '1.12.0'
# -- General configuration ---------------------------------------------------
## Runtime
-- [How to introduce a new operatoin into runtime](how-to-introduce-a-new-operation-into-runtime.md)
+- [How to introduce a new operation into runtime](how-to-introduce-a-new-operation-into-runtime.md)
- [acl_cl](#acl_cl-1)
- [acl_neon](#acl_neon-1)
- [cpu](#cpu-1)
- - [TensorRegister (in some cases)](#tensorregister-in-some-cases)
- [ConstantInitializer (in some cases)](#constantinitializer-in-some-cases)
- [cpu](#cpu-2)
- [Samples (to be updated)](#samples-to-be-updated)
```cpp
void KernelGenerator::visit(const ir::operation::Select &node)
{
- const auto output_index{node.getOutputs().at(ir::operation::Select::Output::OUTPUT)};
- const auto cond_index{node.getInputs().at(ir::operation::Select::Input::COND)};
- const auto input1_index{node.getInputs().at(ir::operation::Select::Input::INPUT1)};
- const auto input2_index{node.getInputs().at(ir::operation::Select::Input::INPUT2)};
-
- const auto output_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(output_index), _current_op_seq_layout);
- const auto cond_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(cond_index), _current_op_seq_layout);
- const auto input1_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input1_index), _current_op_seq_layout);
- const auto input2_backend_descr = ::onert::backend::cpu::kernel::getTensorDescriptor(
- _ctx.at(input2_index), _current_op_seq_layout);
+ const auto output_index{node.getOutputs().at(0)};
+ const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
+ const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
+ const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto cond_alloc = _tensor_builder->at(cond_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
- auto input2_alloc = _tensor_builder->at(input2_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
+ auto true_tensor = _tensor_reg->getPortableTensor(true_index);
+ auto false_tensor = _tensor_reg->getPortableTensor(false_index);
- auto fn = std::make_unique<::onert::backend::cpu::kernel::SelectLayer>();
+ auto fn = std::make_unique<ops::SelectLayer>();
- fn->configure(cond_alloc->buffer(), cond_backend_descr, input1_alloc->buffer(),
- input1_backend_descr, input2_alloc->buffer(), input2_backend_descr,
- output_alloc->buffer(), output_backend_descr);
+ fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
- _execution_builder->append(std::move(fn));
+ _return_fn = std::move(fn);
}
```
-### TensorRegister (in some cases)
-
-This component registers tensors. Most tensors will be automatically registered internally. There
-are some exceptions, however, where additional implementations are required. It is the case when a
-tensor is treated unusually in its backend.
-
-The kernel of some operation has weights in `HWIO` as layout(data format) in case of that input's
-layout is `NHWC`. And, for `NCHW`, weights is `OIHW`. But TFLite model has weigths, `OHWI` for
-`NHWC` and `OIHW` for `NCHW`. Therefore, to register the appropriate tensor on the backend, you have
-to implement it additionally.
-
### ConstantInitializer (in some cases)
This component registers function initializing constant tensors and initialize constant tensor
-layer. This is similar to TensorRegister. Most tensors will be automatically registered internally.
-And there are some exceptions.
+layer. Most tensors will be automatically registered internally. And there are some exceptions.
#### cpu
--- /dev/null
+# How to Use Specific Backend during Inference
+
+ONE runtime has many ways to use specific backend during inference
+
+## Using NNFW API
+
+### [nnfw_set_available_backends](https://github.com/Samsung/ONE/blob/c46ddc04abdb58323fbd38389e6927f003bfaea1/runtime/onert/api/include/nnfw.h#L458)
+- Multiple backends can be set and they must be separated by a semicolon (ex: "acl_cl;cpu").
+- For each backend string, `libbackend_{backend}.so` will be dynamically loaded during nnfw_prepare.
+- Among the multiple backends, the 1st element is used as the default backend.
+
+### [nnfw_set_op_backend](https://github.com/Samsung/ONE/blob/c46ddc04abdb58323fbd38389e6927f003bfaea1/runtime/onert/api/include/nnfw.h#L476)
+- The backend for op has higher priority than available backends specified by nnfw_set_available_backends.
+
+## Using Environment Variable
+
+### 1. BACKENDS
+- Same as `nnfw_set_available_backends`
+- Example
+```bash
+BACKENDS=cpu ./Product/out/bin/nnpackage_run ...
+```
+
+### 2. OP_BACKEND_[OP_TYPE]
+- Same as `nnfw_set_op_backend`
+- Set backend for specific operator type
+- Example
+ - Execute `Conv2D` operator on ruy backend and others on cpu backend
+```bash
+OP_BACKEND_Conv2D=ruy BACKENDS="cpu;ruy" ./Product/out/bin/nnpackage_run ...
+```
+
+### 3. OP_BACKEND_MAP
+- Set backend for specific operator by its index
+- Format : `<op_id>=<backend>;<op_id>=<backend>...`
+- Example
+ - Execute `operator 10` on `acl_cl` backend and others on `acl_neon` backend
+```bash
+OP_BACKEND_MAP="10=acl_cl" BACKENDS="acl_neon;acl_cl" ./Product/out/bin/nnpackage_run ...
+```
:maxdepth: 2
:caption: Contents:
- ./how-to-add-a-new-operation.md
./how-to-build-compiler.md
./how-to-build-package.md
./how-to-build-runtime.md
./how-to-build-runtime-tizen-gbs-rpi4.md
./how-to-build-runtime-using-prebuilt-docker-image.md
- ./how-to-cross-build-runtime-for-arm.md
./how-to-cross-build-runtime-for-aarch64.md
./how-to-cross-build-runtime-for-android.md
- ./how-to-contribute.md
- ./how-to-make-an-application-with-runtime.md
- ./how-to-remote-debugging-with-visual-studio-code.md
+ ./how-to-cross-build-runtime-for-arm.md
./how-to-run-package.md
+ ./how-to-make-an-application-with-runtime.md
./how-to-use-api.md
- ./how-to-use-nnfw-api.md
./how-to-use-nnapi-binding.md
+ ./how-to-use-nnfw-api.md
+ ./how-to-use-specific-backend.md
+ ./how-to-contribute.md
+ ./how-to-remote-debugging-with-visual-studio-code.md
+ ./how-to-add-a-new-operation.md
+ ./how-to-introduce-a-new-operation-into-compiler.md
+ ./how-to-introduce-a-new-operation-into-runtime.md
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu May 14 18:13:12 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.10.0.md
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu May 14 18:13:12 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.11.0.md
+++ /dev/null
-# Release Note 1.11.1
-
-## ONE Runtime
-
-### Hot Fixes
-
-- Fix segfault due to the wrong BCQGather DynamicShapeInferer's behavior
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu May 14 18:13:12 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.12.0.md
--- /dev/null
+# Release Note 1.12.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- Add optimization pass: ReplaceMulAddWithDepthwiseConvPass, SubstitutePackToReshape, RemoveRedundantTranspose, ShuffleWeightTo16x1Float32Pass
+- Add quantization for InstanceNorm.
+- Fix bug of `one-import-bcq` command for `--v1`, `--v2` arguments.
+- Fix FuseBCQPass to work with inter-subgraphs in the model file and minor BCQ related optimizations.
+
+## ONE Runtime
+
+### Runtime backend operation supports more operations and types
+
+- CPU backend
+ - Concat: int8
+ - DepthToSpace: float, uint8, int8
+ - LeakyRelu: float
+- ACL-CL backend
+ - ArgMin: float, uint8, int8
+- ACL-NEON backend
+ - ArgMax: int8
+ - ArgMin: float, uint8, int8
+
+### nnpackage defines configuration file
+
+- Allow users to set configuration variable via conf file. For more information, See [nnpackage spec](../../../nnpackage/spec)
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu May 14 18:13:12 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.5.0.md
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu May 14 18:13:12 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.6.0.md
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu May 14 18:13:12 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.7.0.md
--- /dev/null
+## Feature Highlights
+
+- **ONE** Compiler
+ - Compiler supports more operations
+ - New command line interface for user interface consistancy
+- **ONE** Runtime
+ - Runtime CPU backend supports more operations
+ - Runtime CPU backend supports more quant8 operations
+ - API changes
+ - New optimization
+
+## ONE Compiler
+
+### Compiler supports more operations
+
+- MatrixDiag, MatrixSetDiag, ReverseSequence, ReverseV2, SegmentSum, SelectV2, SparseToDense, Where
+
+### New command line interface for user interface consistancy
+
+- one-import: imports conventional model files to circle
+ - one-import-tf: imports TensorFlow model to circle
+ - one-import-tflite: imports TensorFlow lite model to circle
+- one-optimize: circle optimize command
+- one-quantize: circle quantize command
+ - supports float32 to uint8, layer wise (for Conv series)
+- one-pack: package command
+- one-prepare-venv: prepares python virtual environment for importing TensorFlow model
+- one-codegen: backend(if available) code generator
+
+## ONE Runtime
+
+### Runtime CPU backend supports more operations
+
+- LogSoftmax, SpaceToBatchND
+
+### Runtime CPU backend supports more quant8 operations
+
+- Logistic, Mul, Tanh, SpaceToBatchND, Transpose, Sub, Max, Min, Less, Greater, GreaterEqual, LessEqual, Equal, NotEqual
+
+### API changes
+
+- Introduce basic asynchronous execution API
+
+### New optimization
+
+- Remove dynamic tensor overhead from static models
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu May 14 18:13:12 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.8.0.md
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Thu May 14 18:13:12 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.0
+===
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.9.0.md
+ ./release-note-1.9.1.md
./1.2/index
./1.3/index
./1.4/index
+ ./1.5/index
+ ./1.6/index
+ ./1.7/index
+ ./1.8/index
+ ./1.9/index
+ ./1.10/index
+ ./1.11/index
+ ./1.12/index
./api.md
./core.md
- ./compute.md
+ ./controlflow-operations.md
./executors.md
- ./backend-api.md
./heterogeneous-execution.md
- ./controlflow-operations.md
+ ./backend-api.md
+ ./compute.md
+ ./supported-operations-backend.md
# Supported Operations and backend
-As of 2020-11-10
+As of 2020-12-07
### Raw-data format (float32, int32, boolean, etc)
Add | O | O | O
AddN | O | |
ArgMax | O | O | O
-ArgMin | O | |
+ArgMin | O | O | O
AvgPool2D | O | O | O
BatchMatmul | O | |
BatchToSpaceND | O | O | O
Conv2D | O | O | O
Cos | O | |
Custom | O | |
-DepthToSpace | | O | O
+DepthToSpace | O | O | O
DepthwiseConv2D | O | O | O
Div | O | O | O
EmbeddingLookup | | O | O
InstanceNormalize | | O | O
L2Normalization | O | O | O
L2Pool | | O | O
-LeakyRelu | | O | O
+LeakyRelu | O | O | O
Less | O | O | O
LessEqual | O | O | O
LocalResponseNormalize | | O | O
Split | O | O | O
SplitV | O | |
Sqrt | O | O | O
+Square | O | | |
SquaredDifference | O | O | O
Squeeze | O | O | O
StridedSlice | O | O | O
-- | -- | -- | --
Add | O | O | O
ArgMax | O | O | O
-ArgMin | O | |
+ArgMin | O | O | O
AvgPool2D | O | O | O
BatchToSpaceND | O | O | O
Cast | O | O |
Concat | O | O | O
Conv2D | O | O | O
Custom | O | |
-DepthToSpace | | O | O
+DepthToSpace | O | O | O
DepthwiseConv2D | O | O | O
Dequantize | O | O | O
EmbeddingLookup | | O | O
### Quantization format (int8)
+Operation | CPU | ACL-CL | ACL-NEON
+-- | -- | -- | --
+ArgMax | O | O | O
+ArgMin | O | O | O
+Concat | O | |
+DepthToSpace | O | |
Dequantize | O | |
Rank | O | |
Shape | O | |
--- /dev/null
+function(_Fp16Source_import)
+ if(NOT ${DOWNLOAD_FP16})
+ set(Fp16Source_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT ${DOWNLOAD_FP16})
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ # fp16 commit in xnnpack 8b283aa30a31
+ envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/3c54eacb74f6f5e39077300c5564156c424d77ba.tar.gz)
+ ExternalSource_Download(FP16
+ DIRNAME FP16
+ URL ${FP16_URL})
+
+ set(Fp16Source_DIR ${FP16_SOURCE_DIR} PARENT_SCOPE)
+ set(Fp16Source_FOUND TRUE PARENT_SCOPE)
+endfunction(_Fp16Source_import)
+
+_Fp16Source_import()
--- /dev/null
+function(_FxdivSource_import)
+ if(NOT ${DOWNLOAD_FXDIV})
+ set(FxdivSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT ${DOWNLOAD_FXDIV})
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ # fxdiv commit in xnnpack 8b283aa30a31
+ envoption(FXDIV_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FXdiv/archive/f8c5354679ec2597792bc70a9e06eff50c508b9a.tar.gz)
+ ExternalSource_Download(FXDIV
+ DIRNAME FXDIV
+ URL ${FXDIV_URL})
+
+ set(FxdivSource_DIR ${FXDIV_SOURCE_DIR} PARENT_SCOPE)
+ set(FxdivSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_FxdivSource_import)
+
+_FxdivSource_import()
--- /dev/null
+function(_PsimdSource_import)
+ if(NOT ${DOWNLOAD_PSIMD})
+ set(PsimdSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT ${DOWNLOAD_PSIMD})
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ # psimd commit in xnnpack 8b283aa30a31
+ envoption(PSIMD_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.tar.gz)
+ ExternalSource_Download(PSIMD
+ DIRNAME PSIMD
+ URL ${PSIMD_URL})
+
+ set(PsimdSource_DIR ${PSIMD_SOURCE_DIR} PARENT_SCOPE)
+ set(PsimdSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_PsimdSource_import)
+
+_PsimdSource_import()
--- /dev/null
+function(_PthreadpoolSource_import)
+ if(NOT ${DOWNLOAD_PTHREADPOOL})
+ set(PthreadpoolSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT ${DOWNLOAD_PTHREADPOOL})
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ # pthreadpool commit in xnnpack 8b283aa30a31
+ envoption(PTHREADPOOL_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/pthreadpool/archive/029c88620802e1361ccf41d1970bd5b07fd6b7bb.tar.gz)
+ ExternalSource_Download(PTHREADPOOL
+ DIRNAME PTHREADPOOL
+ URL ${PTHREADPOOL_URL})
+
+ set(PthreadpoolSource_DIR ${PTHREADPOOL_SOURCE_DIR} PARENT_SCOPE)
+ set(PthreadpoolSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_PthreadpoolSource_import)
+
+_PthreadpoolSource_import()
--- /dev/null
+function(_XnnpackSource_import)
+ if(NOT ${DOWNLOAD_XNNPACK})
+ set(XnnpackSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT ${DOWNLOAD_XNNPACK})
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ # xnnpack commit in tflite v2.3
+ envoption(XNNPACK_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/XNNPACK/archive/8b283aa30a3186c6e640aed520543e9c067132d.tar.gz)
+ ExternalSource_Download(XNNPACK
+ DIRNAME XNNPACK
+ URL ${XNNPACK_URL})
+
+ set(XnnpackSource_DIR ${XNNPACK_SOURCE_DIR} PARENT_SCOPE)
+ set(XnnpackSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_XnnpackSource_import)
+
+_XnnpackSource_import()
fi
CLANG_FORMAT_CANDIDATES+=("clang-format-3.9")
- CLANG_FORMAT_CANDIDATES+=("clang-format")
-
for CLANG_FORMAT_CANDIDATE in ${CLANG_FORMAT_CANDIDATES[@]}; do
if command_exists ${CLANG_FORMAT_CANDIDATE} ; then
CLANG_FORMAT="${CLANG_FORMAT_CANDIDATE}"
done
if [[ -z ${CLANG_FORMAT} ]]; then
- echo "[ERROR] clang-format is unavailable"
+ echo "[ERROR] clang-format-3.9 is unavailable"
echo
- echo "Please install clang-format before running format check"
+ echo " Please install clang-format-3.9 before running format check"
exit 1
fi
+ # Migration to clang-format-8
+ # TODO Remove this after migration to clang-format-8
+ CLANG_FORMAT_8="clang-format-8"
+ if ! command_exists $CLANG_FORMAT_8_CANDIDATE; then
+ echo "[ERROR] clang-format-8 is unavailable"
+ echo
+ echo " Please install clang-format-8 before running format check"
+ echo " (or use latest docker image if you are using docker for format check)"
+ exit 1
+ fi
+ for DIR_CLANG_FORMAT_8 in $(git ls-files -co --exclude-standard '*/.clang-format'); do
+ DIRECTORIES_USE_CLANG_FORMAT_8+=($(dirname "${DIR_CLANG_FORMAT_8}"))
+ done
+
# Check c++ files
FILES_TO_CHECK_CPP=()
+ FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8=()
for f in ${FILES_TO_CHECK[@]}; do
# Manually ignore style checking
if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
# File extension to check
if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
- FILES_TO_CHECK_CPP+=("${f}")
+
+ # Check clang-format-8 target files first
+ # TODO Remove this after migration to clang-format-8
+ FOUND_CLANG_8=0
+ for USE_CLANG_FORMAT_8 in ${DIRECTORIES_USE_CLANG_FORMAT_8[@]}; do
+ if [[ $f = $USE_CLANG_FORMAT_8* ]]; then
+ FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8+=("$f")
+ FOUND_CLANG_8=1
+ break
+ fi
+ done
+
+ if [[ $FOUND_CLANG_8 -ne 1 ]]; then
+ FILES_TO_CHECK_CPP+=("${f}")
+ fi
fi
done
# Skip by '.FORMATDENY' file
for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
FILES_TO_CHECK_CPP=(${FILES_TO_CHECK_CPP[*]/$s*/})
+ FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8=(${FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8[*]/$s*/})
done
if [[ ${#FILES_TO_CHECK_CPP} -ne 0 ]]; then
INVALID_EXIT=${EXIT_CODE}
fi
fi
+
+ # Check by clang-format-8
+ # TODO Remove this after migration to clang-format-8
+ if [[ ${#FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8} -ne 0 ]]; then
+ ${CLANG_FORMAT_8} -i ${FILES_TO_CHECK_CPP_BY_CLANG_FORMAT_8[@]}
+ EXIT_CODE=$?
+ if [[ ${EXIT_CODE} -ne 0 ]]; then
+ INVALID_EXIT=${EXIT_CODE}
+ fi
+ fi
}
function check_python_files() {
RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
# Additonal tools
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget zip unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
+RUN apt-get update && \
+ DEBIAN_FRONTEND=noninteractive \
+ apt-get -qqy install doxygen graphviz wget zip unzip clang-format-3.9 clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
RUN pip3 install --upgrade pip
RUN pip3 install yapf==0.22.0 numpy
--slave /usr/bin/aarch64-linux-gnu-g++ aarch64-linux-gnu-g++ /usr/bin/aarch64-linux-gnu-g++-8 \
--slave /usr/bin/aarch64-linux-gnu-gcov aarch64-linux-gnu-gcov /usr/bin/aarch64-linux-gnu-gcov-8
-# Install lcov 1.13-4 for gcc-8 support (installed lcov 1.13-3 can't support gcc-8)
-RUN wget http://launchpadlibrarian.net/370213541/lcov_1.13-4_all.deb
-RUN dpkg -i lcov_1.13-4_all.deb
+# Install lcov 1.14-2 for gcc-8 support
+# Default version lcov 1.13-3 can't support gcc-8
+# lcov 1.13-4 with gcc-8 have bug: reports no coverage for class declaration
+WORKDIR /root/lcov
+RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/l/lcov/lcov_1.14-2_all.deb
+RUN apt-get update && apt-get -qqy install libperlio-gzip-perl libjson-perl
+RUN dpkg -i lcov_1.14-2_all.deb
+WORKDIR /root
+RUN rm -rf /root/lcov
# Build and install google test static libraries
WORKDIR /root/gtest
RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
# Additonal tools (except clang-format-3.9)
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget zip unzip python3 python3-pip python3-venv hdf5-tools pylint
+RUN apt-get update && \
+ DEBIAN_FRONTEND=noninteractive \
+ apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
RUN pip3 install --upgrade pip
RUN pip3 install yapf==0.22.0 numpy
RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
# Additonal tools
-RUN apt-get update && apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
+RUN apt-get update && \
+ apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
RUN pip3 install --upgrade pip
RUN pip3 install yapf==0.22.0 numpy
option(BUILD_EXT_MULTITHREAD "Build external build using multi thread" ON)
option(BUILD_ONERT "Build onert" ON)
option(BUILD_LOGGING "Build logging runtime" ON)
-CMAKE_DEPENDENT_OPTION(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test"
- # Set BUILD_RUNTIME_NNAPI_TEST as ON
- # if CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2
- ON "CMAKE_COMPILER_IS_GNUCC;NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2"
- # Otherwise set BUILD_RUNTIME_NNAPI_TEST as OFF
- OFF)
+option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" ON)
option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON)
option(BUILD_TFLITE_RUN "Build tflite-run" ON)
option(BUILD_TFLITE_VANILLA_RUN "Build tflite-vanilla-run" OFF)
#
option(BUILD_KBENCHMARK "Build kernel benchmark tool" OFF)
option(BUILD_OPENCL_TOOL "Build OpenCL tool" OFF)
-option(BUILD_NNAPI_QUICKCHECK "Build NN API Quickcheck tools" OFF)
option(BUILD_TFLITE_ACCURACY "Build tflite accuracy tool" OFF)
#
# Default external libraries source download and build configuration
option(BUILD_RUY "Build ruy library from the downloaded source" ON)
option(BUILD_CPUINFO "Build cpuinfo library from the downloaded source" ON)
option(PROFILE_RUY "Enable ruy library profiling" OFF)
+option(DOWNLOAD_XNNPACK "Download xnnpack source" ON)
+option(BUILD_XNNPACK "Build xnnpack library from the downloaded source" ON)
+option(DOWNLOAD_PTHREADPOOL "Download pthreadpool source" ON)
+option(BUILD_PTHREADPOOL "Build pthreadpool library from the source" ON)
+option(DOWNLOAD_PSIMD "Download psimd source" ON)
+option(BUILD_PSIMD "Build psimd library from the source" ON)
+option(DOWNLOAD_FP16 "Download fp16 source" ON)
+option(BUILD_FP16 "Build fp16 library from the source" ON)
+option(DOWNLOAD_FXDIV "Download fxdiv source" ON)
+option(BUILD_FXDIV "Build fxdiv library from the source" ON)
+
#
## Default sample build configuration
# NOTE BUILD_ANDROID_TFLITE(JNI lib) is disabled due to BuiltinOpResolver issue.
# tensorflow-lite does not build BuiltinOpResolver but JNI lib need it
# Related Issue : #1403
-option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" ON)
+option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" OFF)
option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" ON)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
# Need boost library
option(DOWNLOAD_BOOST "Download boost source" ON)
option(BUILD_BOOST "Build boost source" ON)
-option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" OFF)
-option(BUILD_NNAPI_TEST "Build nnapi_test" OFF)
-option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
-option(BUILD_TFLITE_RUN "Build tflite-run" ON)
-option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_XNNPACK "Build XNNPACK" OFF)
set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "Build cpuinfo micro-benchmarks")
add_extdirectory("${CpuInfoSource_DIR}" cpuinfo EXCLUDE_FROM_ALL)
set_target_properties(cpuinfo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ # Suppress warnings generated by clog
+ set_target_properties(clog PROPERTIES COMPILE_FLAGS "-Wno-unused-result")
set(CpuInfoSource_DIR ${CpuInfoSource_DIR} PARENT_SCOPE)
set(CpuInfo_FOUND TRUE PARENT_SCOPE)
endfunction(_CpuInfo_Build)
--- /dev/null
+function(_Fp16_Build)
+ nnas_find_package(Fp16Source QUIET)
+
+ # NOTE This line prevents multiple definitions of target
+ if(TARGET fp16)
+ set(Fp16Source_DIR ${Fp16Source_DIR} PARENT_SCOPE)
+ set(Fp16_FOUND TRUE PARENT_SCOPE)
+ return()
+ endif(TARGET fp16)
+
+ if(NOT Fp16Source_FOUND)
+ message(STATUS "FP16: Source not found")
+ set(Fp16_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT Fp16Source_FOUND)
+
+ set(FP16_BUILD_TESTS OFF CACHE BOOL "Build FP16 unit tests")
+ set(FP16_BUILD_BENCHMARKS OFF CACHE BOOL "Build FP16 micro-benchmarks")
+ nnas_find_package(PsimdSource)
+ set(PSIMD_SOURCE_DIR ${PsimdSource_DIR} CACHE STRING "String to disable download PSIMD on fp16")
+ add_extdirectory("${Fp16Source_DIR}" FP16 EXCLUDE_FROM_ALL)
+ set(Fp16Source_DIR ${Fp16Source_DIR} PARENT_SCOPE)
+ set(Fp16_FOUND TRUE PARENT_SCOPE)
+endfunction(_Fp16_Build)
+
+if(BUILD_FP16)
+ _Fp16_Build()
+else()
+ set(Fp16_FOUND FALSE)
+endif()
--- /dev/null
+function(_Fxdiv_Build)
+ nnas_find_package(FxdivSource QUIET)
+
+ # NOTE This line prevents multiple definitions of target
+ if(TARGET fxdiv)
+ set(FxdivSource_DIR ${FxdivSource_DIR} PARENT_SCOPE)
+ set(Fxdiv_FOUND TRUE PARENT_SCOPE)
+ return()
+ endif(TARGET fxdiv)
+
+ if(NOT FxdivSource_FOUND)
+ message(STATUS "FXDIV: Source not found")
+ set(Fxdiv_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT FxdivSource_FOUND)
+
+ set(FXDIV_BUILD_TESTS OFF CACHE BOOL "Build FXdiv unit tests")
+ set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "Build FXdiv micro-benchmarks")
+
+ add_extdirectory("${FxdivSource_DIR}" FXDIV EXCLUDE_FROM_ALL)
+ set(FxdivSource_DIR ${FxdivSource_DIR} PARENT_SCOPE)
+ set(Fxdiv_FOUND TRUE PARENT_SCOPE)
+endfunction(_Fxdiv_Build)
+
+if(BUILD_FXDIV)
+ _Fxdiv_Build()
+else()
+ set(Fxdiv_FOUND FALSE)
+endif()
--- /dev/null
+function(_Psimd_Build)
+ nnas_find_package(PsimdSource QUIET)
+
+ # NOTE This line prevents multiple definitions of target
+ if(TARGET psimd)
+ set(PsimdSource_DIR ${PsimdSource_DIR} PARENT_SCOPE)
+ set(Psimd_FOUND TRUE PARENT_SCOPE)
+ return()
+ endif(TARGET psimd)
+
+ if(NOT PsimdSource_FOUND)
+ message(STATUS "PSIMD: Source not found")
+ set(Psimd_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT PsimdSource_FOUND)
+
+ add_extdirectory("${PsimdSource_DIR}" PSIMD EXCLUDE_FROM_ALL)
+ set(PsimdSource_DIR ${PsimdSource_DIR} PARENT_SCOPE)
+ set(Psimd_FOUND TRUE PARENT_SCOPE)
+endfunction(_Psimd_Build)
+
+if(BUILD_PSIMD)
+ _Psimd_Build()
+else()
+ set(Psimd_FOUND FALSE)
+endif()
--- /dev/null
+function(_Pthreadpool_Build)
+ nnas_find_package(PthreadpoolSource QUIET)
+
+ # NOTE This line prevents multiple definitions of target
+ if(TARGET pthreadpool)
+ set(PthreadpoolSource_DIR ${PthreadpoolSource_DIR} PARENT_SCOPE)
+ set(Pthreadpool_FOUND TRUE PARENT_SCOPE)
+ return()
+ endif(TARGET pthreadpool)
+
+ if(NOT PthreadpoolSource_FOUND)
+ message(STATUS "PTHREADPOOL: Source not found")
+ set(Pthreadpool_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT PthreadpoolSource_FOUND)
+
+ SET(PTHREADPOOL_BUILD_TESTS OFF CACHE BOOL "Build pthreadpool unit tests")
+ SET(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE BOOL "Build pthreadpool micro-benchmarks")
+
+ nnas_find_package(FxdivSource)
+ set(FXDIV_SOURCE_DIR ${FxdivSource_DIR} CACHE STRING "String to disable download FXDIV")
+
+ add_extdirectory("${PthreadpoolSource_DIR}" PTHREADPOOL EXCLUDE_FROM_ALL)
+ set_target_properties(pthreadpool PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ # Suppress warnings generated by pthreadpool
+ set_target_properties(pthreadpool PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
+ set(PthreadpoolSource_DIR ${PthreadpoolSource_DIR} PARENT_SCOPE)
+ set(Pthreadpool_FOUND TRUE PARENT_SCOPE)
+endfunction(_Pthreadpool_Build)
+
+if(BUILD_PTHREADPOOL)
+ _Pthreadpool_Build()
+else()
+ set(Pthreadpool_FOUND FALSE)
+endif()
--- /dev/null
+function(_Xnnpack_Build)
+ nnas_find_package(XnnpackSource QUIET)
+ nnfw_find_package(Fxdiv QUIET)
+ nnfw_find_package(CpuInfo QUIET)
+ nnfw_find_package(Pthreadpool QUIET)
+ nnfw_find_package(Psimd QUIET)
+ nnfw_find_package(Fp16 QUIET)
+
+ # NOTE This line prevents multiple definitions of cpuinfo target
+ if(TARGET XNNPACK)
+ set(XnnpackSource_DIR ${XnnpackSource_DIR} PARENT_SCOPE)
+ set(Xnnpack_FOUND TRUE PARENT_SCOPE)
+ return()
+ endif(TARGET XNNPACK)
+
+ if(NOT XnnpackSource_FOUND)
+ message(STATUS "XNNPACK: Source not found")
+ set(Xnnpack_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT XnnpackSource_FOUND)
+
+ set(XNNPACK_BUILD_TESTS OFF CACHE BOOL "Build XNNPACK unit tests")
+ set(XNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "Build XNNPACK benchmarks")
+ set(XNNPACK_USE_SYSTEM_LIBS ON CACHE BOOL "Use system-provided dependency libraries")
+
+ add_extdirectory("${XnnpackSource_DIR}" XNNPACK EXCLUDE_FROM_ALL)
+ set_target_properties(XNNPACK PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ # Suppress warnings generated by xnnpack
+ set_target_properties(XNNPACK PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
+ set(XnnpackSource_DIR ${XnnpackSource_DIR} PARENT_SCOPE)
+ set(Xnnpack_FOUND TRUE PARENT_SCOPE)
+endfunction(_Xnnpack_Build)
+
+if(BUILD_XNNPACK)
+ _Xnnpack_Build()
+else(BUILD_XNNPACK)
+ set(Xnnpack_FOUND FALSE)
+endif(BUILD_XNNPACK)
fi
cd ${BUILD_PATH}
-make "$@"
+cmake --build . -- "$@"
export TARGET_OS=android
export CROSS_BUILD=1
-make -f Makefile.template
+export BUILD_TYPE=release
+make -f Makefile.template install
fi
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
fi
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
ROOT_PATH="$CURRENT_PATH/../../"
# docker image name
+# - for xenial, use DOCKER_IMAGE_NAME="nnfw/one-devtools:xenial"
+# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
if [[ -z $DOCKER_IMAGE_NAME ]]; then
echo "It will use default docker image name"
fi
export OP_BACKEND_MaxPool2D="acl_cl"
export OP_BACKEND_AvgPool2D="acl_neon"
export ACL_LAYOUT="NCHW"
+export RUY_THREADS=4
NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed"
{
"major-version" : "1",
- "minor-version" : "0",
+ "minor-version" : "1",
"patch-version" : "0",
+ "configs" : [ "config.cfg" ],
"models" : [ "add.tflite" ],
"model-types" : [ "tflite" ]
}
--- /dev/null
+BACKENDS="cpu"
nnpackage
├── custom_op
├── metadata
-│  └── MANIFEST
+│  ├── MANIFEST
+│  └── config.cfg
└── mymodel.model
```
- `mymodel.model` is a model file that has computation graph and weights.
+- `config.cfg` is a configuration file that has parameters to configure onert.
- `metadata` is a directory that contains all metadata including `MANIFEST`.
- `MANIFEST` is a collection of attributes about this package.
- `custom_op` is a directory that contains implementation objects.
`patch-version` is the patch version of `nnpackage`.
+#### configs
+
+`configs` is an array of configuration file names placed in `metadata` folder. This can be empty or
+attribute itself can be omitted. As of now we only support only one item.
+
#### models
`models` is an array of path to model files, which is relative path from top level directory of this package.
```
{
"major-version" : "1",
- "minor-version" : "0",
+ "minor-version" : "1",
"patch-version" : "0",
+ "configs" : [ "model.cfg" ],
"models" : [ "mymodel.model", "yourmodel.model" ],
"model-types" : [ "tflite", "circle" ]
}
```
+
+## 5. Configuration file
+
+Configuration file is a human readable plain text file having one `key=value` in each line.
+- `#` is used as comment and will be ignored afterwards.
+- all leading and trailing white spaces will be ignored in both `key` and `value`.
+
+For example
+```
+BACKENDS=cpu
+# leading/trailing space is ignored
+ EXCUTOR=Linear # some comment
+```
+
+Refer `runtime/onert/core/include/util/Config.lst` file for more information of `key`.
Name: nnfw
Summary: nnfw
-Version: 1.11.1
+Version: 1.12.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause
Source1004: gemmlowp.tar.gz
Source1005: ruy.tar.gz
Source1006: cpuinfo.tar.gz
+Source1007: XNNPACK.tar.gz
+Source1008: FXDIV.tar.gz
+Source1009: PTHREADPOOL.tar.gz
+Source1010: PSIMD.tar.gz
+Source1011: FP16.tar.gz
Source2001: nnfw.pc.in
Source2002: nnfw-plugin.pc.in
tar -xf %{SOURCE1004} -C ./externals
tar -xf %{SOURCE1005} -C ./externals
tar -xf %{SOURCE1006} -C ./externals
+tar -xf %{SOURCE1007} -C ./externals
+tar -xf %{SOURCE1008} -C ./externals
+tar -xf %{SOURCE1009} -C ./externals
+tar -xf %{SOURCE1010} -C ./externals
+tar -xf %{SOURCE1011} -C ./externals
%build
%ifarch arm armv7l aarch64 x86_64
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 4 }
+}
+operand {
+ name: "gamma"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "0.0123"
+ arg: "-0.3324"
+ arg: "0.2324"
+ arg: "-3.3360"
+ }
+}
+operand {
+ name: "beta"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "0.7023"
+ arg: "-0.3092"
+ arg: "0.7552"
+ arg: "0.2729"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 4 }
+}
+operation {
+ type: "InstanceNorm"
+ input: "ifm"
+ input: "gamma"
+ input: "beta"
+ output: "ofm"
+ instance_norm_options {
+ epsilon: 0.001
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+ quant { min: -128 max: 127 scale: 1 zero_point: 128 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 2 }
+ filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 1 dim: 1 dim: 4 }
+ quant { min: -256 max: 254 scale: 2 zero_point: 128 }
+ shape_signature { dim: -1 dim: 1 dim: 1 dim: 4 }
+}
+operation {
+ type: "Mean"
+ mean_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "-1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "Mean"
+ mean_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 4 }
+ shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 }
+ shape_signature { dim: -1 dim: 4 }
+}
+operation {
+ type: "Mean"
+ mean_options {
+ keep_dims: false
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU6"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLUN1To1"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ shape_signature { dim: -1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: BOOL
+ shape { dim: 1 dim: 3 dim: 4 }
+ shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "1" arg: "2"
+ }
+}
+operand {
+ name: "ofm"
+ type: BOOL
+ shape { }
+}
+operation {
+ type: "ReduceAny"
+ reduce_any_options {
+ keep_dims: false
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: BOOL
+ shape { dim: 1 dim: 3 dim: 4 }
+ shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operand {
+ name: "ofm"
+ type: BOOL
+ shape { dim: 1 dim: 4 }
+ shape_signature { dim: -1 dim: 4 }
+}
+operation {
+ type: "ReduceAny"
+ reduce_any_options {
+ keep_dims: false
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: BOOL
+ shape { dim: 1 dim: 3 dim: 4 }
+ shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "1" arg: "2"
+ }
+}
+operand {
+ name: "ofm"
+ type: BOOL
+ shape { dim: 1 dim: 1 dim: 1 }
+}
+operation {
+ type: "ReduceAny"
+ reduce_any_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: BOOL
+ shape { dim: 2 dim: 1 dim: 4 }
+ shape_signature { dim: 2 dim: -1 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operand {
+ name: "ofm"
+ type: BOOL
+ shape { dim: 2 dim: 1 dim: 4 }
+}
+operation {
+ type: "ReduceAny"
+ reduce_any_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+ name: "axis"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "-1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "ReduceMax"
+ reduce_max_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "axis"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+ name: "axis"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "-1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "ReduceMin"
+ reduce_min_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "axis"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 4 }
+ shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "1" arg: "2"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { }
+}
+operation {
+ type: "ReduceProd"
+ reduce_prod_options {
+ keep_dims: false
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 4 }
+ shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 }
+ shape_signature { dim: -1 dim: 4 }
+}
+operation {
+ type: "ReduceProd"
+ reduce_prod_options {
+ keep_dims: false
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 4 }
+ shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "1" arg: "2"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 }
+}
+operation {
+ type: "ReduceProd"
+ reduce_prod_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 2 dim: 1 dim: 4 }
+ shape_signature { dim: 2 dim: -1 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 2 dim: 1 dim: 4 }
+}
+operation {
+ type: "ReduceProd"
+ reduce_prod_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "-1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+ shape_signature { dim: -1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "Sum"
+ sum_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 4 }
+ shape_signature { dim: -1 dim: 3 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 }
+ shape_signature { dim: -1 dim: 4 }
+}
+operation {
+ type: "Sum"
+ sum_options {
+ keep_dims: false
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+../../.clang-format.8
\ No newline at end of file
} \
} while (0)
#else // __TIZEN__
-#define LEVEL_TO_STR(level) \
- (((level) == ERROR) \
- ? "ERROR" \
- : ((level) == WARNING) \
- ? "WARNING" \
- : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
+#define LEVEL_TO_STR(level) \
+ (((level) == ERROR) ? "ERROR" \
+ : ((level) == WARNING) \
+ ? "WARNING" \
+ : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
#define TFLITE_NATIVE_LOG(log_level, format, args...) \
do \
{ \
extern "C" {
#endif /*__cplusplus*/
-typedef enum {
+typedef enum
+{
/** 32-bit signed integer. */
INT32 = 1,
$(ONERT_PREBUILT_LIB_DIR)/libtflite_loader.so
include $(PREBUILT_SHARED_LIBRARY)
-# libtensorflowlite_jni
-include $(CLEAR_VARS)
-LOCAL_MODULE := tensorflowlite_jni
-PREBUILT_LIB += tensorflowlite_jni
-LOCAL_SRC_FILES := \
- $(ONERT_PREBUILT_LIB_DIR)/libtensorflowlite_jni.so
-include $(PREBUILT_SHARED_LIBRARY)
-
# libnnfw
include $(CLEAR_VARS)
LOCAL_MODULE := nnfw-dev
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.11.1"
+ versionName "1.12.0"
externalNativeBuild {
ndkBuild {
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeLoadModelFromFile(
- JNIEnv *env, jobject, jlong handle, jstring jnnpkg_path)
+ JNIEnv *env, jobject, jlong handle, jstring jnnpkg_path)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInput(
- JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
+ JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutput(
- JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
+ JNIEnv *env, jobject, jlong handle, jint jindex, jint jtype, jobject jbuf, jint jbufsize)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInputLayout(
- JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
+ JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutputLayout(
- JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
+ JNIEnv *, jobject, jlong handle, jint jindex, jint jlayout)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetAvailableBackends(
- JNIEnv *env, jobject, jlong handle, jstring jbackends)
+ JNIEnv *env, jobject, jlong handle, jstring jbackends)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetInputTensorInfo(
- JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
+ JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
}
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutputTensorInfo(
- JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
+ JNIEnv *env, jobject, jlong handle, jint jindex, jobject jinfo)
{
if (jni_helper::verifyHandle(handle) == JNI_FALSE)
return JNI_FALSE;
* Signature: (JLjava/lang/String;)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeLoadModelFromFile(
- JNIEnv *, jobject, jlong, jstring);
+ JNIEnv *, jobject, jlong, jstring);
/*
* Class: com_samsung_onert_NativeSessionWrapper
* Signature: (JIILjava/nio/ByteBuffer;I)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInput(
- JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
+ JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
/*
* Class: com_samsung_onert_NativeSessionWrapper
* Signature: (JIILjava/nio/ByteBuffer;I)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutput(
- JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
+ JNIEnv *, jobject, jlong, jint, jint, jobject, jint);
/*
* Class: com_samsung_onert_NativeSessionWrapper
* Signature: (JII)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetInputLayout(
- JNIEnv *, jobject, jlong, jint, jint);
+ JNIEnv *, jobject, jlong, jint, jint);
/*
* Class: com_samsung_onert_NativeSessionWrapper
* Signature: (JII)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetOutputLayout(
- JNIEnv *, jobject, jlong, jint, jint);
+ JNIEnv *, jobject, jlong, jint, jint);
/*
* Class: com_samsung_onert_NativeSessionWrapper
* Signature: (JILcom/samsung/onert/NativeSessionWrapper/InternalTensorInfo;)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetInputTensorInfo(
- JNIEnv *, jobject, jlong, jint, jobject);
+ JNIEnv *, jobject, jlong, jint, jobject);
/*
* Class: com_samsung_onert_NativeSessionWrapper
* Signature: (JILcom/samsung/onert/NativeSessionWrapper/InternalTensorInfo;)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeGetOutputTensorInfo(
- JNIEnv *, jobject, jlong, jint, jobject);
+ JNIEnv *, jobject, jlong, jint, jobject);
/*
* Class: com_samsung_onert_NativeSessionWrapper
* Signature: (JLjava/lang/String;)Z
*/
JNIEXPORT jboolean JNICALL Java_com_samsung_onert_NativeSessionWrapper_nativeSetAvailableBackends(
- JNIEnv *, jobject, jlong, jstring);
+ JNIEnv *, jobject, jlong, jstring);
#ifdef __cplusplus
}
}
JNIEXPORT void JNICALL Java_com_ndk_tflbench_MainActivity_runInterpreterBenchmark(
- JNIEnv *env, jobject thisObj, jobject model_buffer)
+ JNIEnv *env, jobject thisObj, jobject model_buffer)
{
setTitle(env, thisObj, "Running Interpreter Benchmark");
std::unique_ptr<OpResolver> CreateOpResolver()
{
return std::unique_ptr<::nnfw::tflite::BuiltinOpResolver>(
- new ::nnfw::tflite::BuiltinOpResolver());
+ new ::nnfw::tflite::BuiltinOpResolver());
}
} // namespace tflite
static auto isOriginalFunctionCallSuccessful = [](cl_mem result) -> bool { return result; };
static auto originalFunction =
- findFunctionByName<cl_mem, cl_context, cl_mem_flags, size_t, void *, cl_int *>(
- "clCreateBuffer");
+ findFunctionByName<cl_mem, cl_context, cl_mem_flags, size_t, void *, cl_int *>(
+ "clCreateBuffer");
cl_mem result = originalFunction(context, flags, size, host_ptr, errcode_ret);
if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
{
{
uint8_t *ptr_to_the_free_space_after_allocation = _ptr_to_free_space_start + size;
size_t size_of_reserved_space_after_allocation =
- ptr_to_the_free_space_after_allocation - _buffer;
+ ptr_to_the_free_space_after_allocation - _buffer;
if (size_of_reserved_space_after_allocation >= MAX_SIZE)
{
return false;
if (found_memory_space_description == _memory_in_use_on_gpu.end())
{
_memory_in_use_on_gpu.insert(
- std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes)));
+ std::make_pair(memory_ptr, MemoryTraits(1, size_of_allocated_space_in_bytes)));
_total_allocated_bytes_on_gpu += size_of_allocated_space_in_bytes;
if (_peak_heap_usage_on_gpu < _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu)
{
size_t size;
MemoryTraits(size_t init_counter_value, size_t size_of_allocated_memory)
- : ref_counter(init_counter_value), size(size_of_allocated_memory)
+ : ref_counter(init_counter_value), size(size_of_allocated_memory)
{
}
};
clReleaseMemObject(mem);
GlobalTrace.reset();
ASSERT_STREQ(
- getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
- "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
- "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n");
+ getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 1024 B, Total allocated: 1024 B, Total deallocated: 1024 B\n");
}
TEST_F(ClReleaseMemObjectStub, must_not_log_deallocation_event_if_original_function_failed)
}
TEST_F(
- MallocStub,
- should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero)
+ MallocStub,
+ should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero)
{
void *p = malloc(0);
free(p);
ASSERT_TRUE(p);
ASSERT_STREQ(
- getContentOfFile("./realloc_interception_test.log").c_str(),
- "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn "
- "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+ getContentOfFile("./realloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
free(p);
}
TEST_F(
- ReallocStub,
- should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null)
+ ReallocStub,
+ should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null)
{
void *p = realloc(nullptr, 0);
free(p);
fs::path pathToTestSample2 = exePath() / "libtest_sample2.so";
void *test_sample2_handle = dlopen(pathToTestSample2.c_str(), RTLD_NOW);
void *func_addr_in_test_sample2 =
- dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3");
+ dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3");
ASSERT_TRUE(test_sample2_handle);
ASSERT_TRUE((void *)funcDefinedInTestSample3_ButWrappedInTestSample1 !=
GlobalTrace.reset();
string thisShouldBeInLogFile =
- "Total allocated: " +
- to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
- " B, Total deallocated: " +
- to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
+ "Total allocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
+ " B, Total deallocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
string andThisToo =
- "Total allocated: " +
- to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
- " B, Total deallocated: " +
- to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
+ "Total allocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
+ " B, Total deallocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
ASSERT_TRUE(getContentOfFile("./trace_test.log").find(thisShouldBeInLogFile) != string::npos);
ASSERT_TRUE(getContentOfFile("./trace_test.log").find(andThisToo) != string::npos);
}
TargetHint target_hint = TargetHint::OPENCL;
bool autoinc = true;
- graph << target_hint << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
- std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
+ graph << target_hint
+ << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
+ std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
<< arm_compute::graph::ConvolutionLayer(
- 3U, 3U, 1U, std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
- std::unique_ptr<BiasAccessor>(new BiasAccessor()),
- arm_compute::PadStrideInfo(1, 1, 0, 0))
+ 3U, 3U, 1U, std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
+ std::unique_ptr<BiasAccessor>(new BiasAccessor()),
+ arm_compute::PadStrideInfo(1, 1, 0, 0))
<< Tensor(std::unique_ptr<OutputAccessor>(new OutputAccessor()));
graph.run();
try
{
auto kernel_functor = cl::KernelFunctor<cl_int, cl::Buffer, cl::Buffer, cl_int, cl_int>(
- gpu.program_, "memory_test"); // name should be same as cl function name
+ gpu.program_, "memory_test"); // name should be same as cl function name
// create a queue per device and queue a kernel job
std::cout << "opencl information: \n\n";
std::cout << "\t -h : help\n";
std::cout
- << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
+ << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n";
}
int kernel_idx[MAX_DEVICE_NUM];
unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed.
unsigned char
- kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed
+ kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed
int device_num;
std::mutex kernel_complete_handler_mutex;
try
{
auto kernel_functor = cl::KernelFunctor<cl::Buffer, cl_int>(
- gpu.program_, "test"); // name should be same as cl function name
+ gpu.program_, "test"); // name should be same as cl function name
// variable init
cl::Event ev[MAX_DEVICE_NUM];
// Configure Filter
const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W;
float kernel_data[kernel_size] = {
- 0.0f,
+ 0.0f,
};
// Fill kernel data in NHWC order
}
interp.SetTensorParametersReadOnly(
- 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
- quantization, reinterpret_cast<const char *>(kernel_data), sizeof(kernel_data));
+ 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
+ quantization, reinterpret_cast<const char *>(kernel_data), sizeof(kernel_data));
// Configure Bias
const uint32_t bias_size = bias.size();
float bias_data[bias_size] = {
- 0.0f,
+ 0.0f,
};
// Fill bias data
int fileSize = fileHeaderSize + infoHeaderSize + (bytesPerPixel * width + paddingSize) * height;
static unsigned char fileHeader[] = {
- 0, 0, /// signature
- 0, 0, 0, 0, /// image file size in bytes
- 0, 0, 0, 0, /// reserved
- 0, 0, 0, 0, /// start of pixel array
+ 0, 0, /// signature
+ 0, 0, 0, 0, /// image file size in bytes
+ 0, 0, 0, 0, /// reserved
+ 0, 0, 0, 0, /// start of pixel array
};
fileHeader[0] = (unsigned char)('B');
unsigned char *BitmapHelper::createBitmapInfoHeader(int height, int width)
{
static unsigned char infoHeader[] = {
- 0, 0, 0, 0, /// header size
- 0, 0, 0, 0, /// image width
- 0, 0, 0, 0, /// image height
- 0, 0, /// number of color planes
- 0, 0, /// bits per pixel
- 0, 0, 0, 0, /// compression
- 0, 0, 0, 0, /// image size
- 0, 0, 0, 0, /// horizontal resolution
- 0, 0, 0, 0, /// vertical resolution
- 0, 0, 0, 0, /// colors in color table
- 0, 0, 0, 0, /// important color count
+ 0, 0, 0, 0, /// header size
+ 0, 0, 0, 0, /// image width
+ 0, 0, 0, 0, /// image height
+ 0, 0, /// number of color planes
+ 0, 0, /// bits per pixel
+ 0, 0, 0, 0, /// compression
+ 0, 0, 0, 0, /// image size
+ 0, 0, 0, 0, /// horizontal resolution
+ 0, 0, 0, 0, /// vertical resolution
+ 0, 0, 0, 0, /// colors in color table
+ 0, 0, 0, 0, /// important color count
};
// Minus height means top to bottom write
// Decode image, allocating tensor once the image size is known
const uint8_t *bmp_pixels = &img_bytes[header_size];
std::vector<uint8_t> bmp =
- decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down);
+ decode_bmp(bmp_pixels, row_size, width, abs(height), channels, top_down);
for (uint32_t j = 0; j < bmp.size(); j++)
{
input.push_back(static_cast<float>(bmp[j]));
{
JpegHelper::JpegHelper(int bytes_per_pixel, J_COLOR_SPACE color_space)
- : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space)
+ : _bytes_per_pixel(bytes_per_pixel), _color_space(color_space)
{
// DO NOTHING
}
NNFW_STATUS resolve_op_backend(nnfw_session *session)
{
static std::unordered_map<std::string, std::string> operation_map = {
- {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"},
- {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
- {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
- {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}};
+ {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"},
+ {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
+ {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
+ {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}};
for (auto i : operation_map)
{
const int input_size, const int image_mean, const int image_std,
const std::string &input_name, const std::string &output_name,
const bool use_nnapi)
- : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size),
- _image_mean(image_mean), _image_std(image_std), _input_name(input_name),
- _output_name(output_name)
+ : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size),
+ _image_mean(image_mean), _image_std(image_std), _input_name(input_name),
+ _output_name(output_name)
{
// Load label
std::ifstream label_stream(label_file.c_str());
using namespace tflite::ops::builtin;
InferenceInterface::InferenceInterface(const std::string &model_file, const bool use_nnapi)
- : _interpreter(nullptr), _model(nullptr), _sess(nullptr)
+ : _interpreter(nullptr), _model(nullptr), _sess(nullptr)
{
// Load model
StderrReporter error_reporter;
}
// Create ImageClassifier
- std::unique_ptr<ImageClassifier> classifier(
- new ImageClassifier(MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME,
- OUTPUT_NAME, use_nnapi));
+ std::unique_ptr<ImageClassifier> classifier(new ImageClassifier(
+ MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME, OUTPUT_NAME, use_nnapi));
// Cam setting
cv::VideoCapture cap(0);
--- /dev/null
+../../.clang-format.8
\ No newline at end of file
}
CsvWriter::CsvWriter(const std::string &csv_filename, const std::vector<std::string> &header)
- : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0)
+ : _ofs(csv_filename), _header_size(header.size()), _col_idx(0), _row_idx(0)
{
assert(csv_filename.empty() == false);
assert(header.size() != 0);
{
MemoryPoller::MemoryPoller(std::chrono::milliseconds duration, bool gpu_poll)
- : _duration(duration), _run(false), _term(false), _gpu_poll(gpu_poll)
+ : _duration(duration), _run(false), _term(false), _gpu_poll(gpu_poll)
{
if (prepareMemoryPolling() == false)
throw std::runtime_error("failed to prepare memory pooling");
return average<uint32_t, uint32_t>(phase.memory[type]);
}
-uint32_t peakMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
- [benchmark::MemoryType::END_OF_MEM_TYPE],
- int type)
+uint32_t peakMemory(
+ const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE][benchmark::MemoryType::END_OF_MEM_TYPE],
+ int type)
{
using namespace benchmark;
// tricky. handle WARMUP as EXECUTE
}
void printResultTime(
- const double time[benchmark::PhaseEnum::END_OF_PHASE][benchmark::FigureType::END_OF_FIG_TYPE])
+ const double time[benchmark::PhaseEnum::END_OF_PHASE][benchmark::FigureType::END_OF_FIG_TYPE])
{
using namespace benchmark;
std::cout << "===================================" << std::endl;
}
-void printResultMemory(const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE]
- [benchmark::MemoryType::END_OF_MEM_TYPE])
+void printResultMemory(
+ const uint32_t memory[benchmark::PhaseEnum::END_OF_PHASE][benchmark::MemoryType::END_OF_MEM_TYPE])
{
using namespace benchmark;
* @param[in] col The width index
*/
Index(int32_t batch, int32_t ch, int32_t row, int32_t col)
- : _batch{batch}, _ch{ch}, _row{row}, _col{col}
+ : _batch{batch}, _ch{ch}, _row{row}, _col{col}
{
// DO NOTHING
}
* @param[in] width The width value
*/
Shape(int32_t batch, int32_t depth, int32_t height, int32_t width)
- : N{batch}, C{depth}, H{height}, W{width}
+ : N{batch}, C{depth}, H{height}, W{width}
{
// DO NOTHING
}
* @param[in] width The width index
*/
Shape(int32_t count, int32_t depth, int32_t height, int32_t width)
- : N{count}, C{depth}, H{height}, W{width}
+ : N{count}, C{depth}, H{height}, W{width}
{
// DO NOTHING
}
_values.resize(_shape.dim(0) * _stride.at(0));
// Set 'value'
- iterate(_shape) << [this, &fn](const Index &index) {
- _values.at(_stride.offset(index)) = fn(_shape, index);
- };
+ iterate(_shape) <<
+ [this, &fn](const Index &index) { _values.at(_stride.offset(index)) = fn(_shape, index); };
}
}
* @param[in] rhs @c Reader object of a tensor
*/
Zipper(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs)
- : _shape{shape}, _lhs{lhs}, _rhs{rhs}
+ : _shape{shape}, _lhs{lhs}, _rhs{rhs}
{
// DO NOTHING
}
template <typename Callable> void zip(Callable cb) const
{
iterate(_shape) <<
- [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); };
+ [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); };
}
private:
std::vector<Diff<float>> res;
zip(shape, expected, obtained) <<
- [&](const Index &index, float expected_value, float obtained_value) {
- if (!_compare_fn(expected_value, obtained_value))
- {
- res.emplace_back(index, expected_value, obtained_value);
- }
-
- // Update max_diff_index, if necessary
- if (observer != nullptr)
- {
- observer->notify(index, expected_value, obtained_value);
- }
- };
+ [&](const Index &index, float expected_value, float obtained_value) {
+ if (!_compare_fn(expected_value, obtained_value))
+ {
+ res.emplace_back(index, expected_value, obtained_value);
+ }
+
+ // Update max_diff_index, if necessary
+ if (observer != nullptr)
+ {
+ observer->notify(index, expected_value, obtained_value);
+ }
+ };
return res;
}
* @return ANEURALNETWORKS_NO_ERROR if successful.
*/
inline int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
- ANeuralNetworksModel *model, int32_t index,
- const ANeuralNetworksSymmPerChannelQuantParams *channelQuant)
+ ANeuralNetworksModel *model, int32_t index,
+ const ANeuralNetworksSymmPerChannelQuantParams *channelQuant)
{
LOAD_FUNCTION(ANeuralNetworksModel_setOperandSymmPerChannelQuantParams);
EXECUTE_FUNCTION_RETURN(model, index, channelQuant);
LOAD_FUNCTION(ANeuralNetworksModel_setOperandExtensionData);
EXECUTE_FUNCTION_RETURN(model, index, data, length);
}
-
+#if __ANDROID_API__ >= 30
/**
* Create a {@link ANeuralNetworksMemoryDesc} with no properties.
*
LOAD_FUNCTION(ANeuralNetworksMemory_copy);
EXECUTE_FUNCTION_RETURN(src, dst);
}
-
+#endif // __ANDROID_API__ >= 30
/**/
#endif // __NEURAL_NETWORKS_SHIM_H__
const void *buffer, size_t length);
typedef int (*ANeuralNetworksModel_setOperandSymmPerChannelQuantParams_fn)(
- ANeuralNetworksModel *model, int32_t index,
- const ANeuralNetworksSymmPerChannelQuantParams *channelQuant);
+ ANeuralNetworksModel *model, int32_t index,
+ const ANeuralNetworksSymmPerChannelQuantParams *channelQuant);
typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
- ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
- size_t length);
+ ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length);
typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model,
ANeuralNetworksOperationType type,
const void *buffer, size_t length);
typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
- ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+ ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset, size_t length);
typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution,
int32_t index,
void *buffer, size_t length);
typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
- ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+ ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset, size_t length);
typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution,
ANeuralNetworksEvent **event);
int64_t *featureLevel);
typedef int (*ANeuralNetworksModel_getSupportedOperationsForDevices_fn)(
- const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices,
- uint32_t numDevices, bool *supportedOps);
+ const ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices,
+ uint32_t numDevices, bool *supportedOps);
typedef int (*ANeuralNetworksCompilation_createForDevices_fn)(
- ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices,
- ANeuralNetworksCompilation **compilation);
+ ANeuralNetworksModel *model, const ANeuralNetworksDevice *const *devices, uint32_t numDevices,
+ ANeuralNetworksCompilation **compilation);
typedef int (*ANeuralNetworksCompilation_setCaching_fn)(ANeuralNetworksCompilation *compilation,
const char *cacheDir, const uint8_t *token);
+#if __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksCompilation_setTimeout_fn)(ANeuralNetworksCompilation *compilation,
uint64_t duration);
typedef int (*ANeuralNetworksCompilation_setPriority_fn)(ANeuralNetworksCompilation *compilation,
int priority);
+#endif // __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksExecution_compute_fn)(ANeuralNetworksExecution *execution);
+#if __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksExecution_setTimeout_fn)(ANeuralNetworksExecution *execution,
uint64_t duration);
typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)(ANeuralNetworksExecution *execution,
uint64_t duration);
+#endif // __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(ANeuralNetworksExecution *execution,
int32_t index, uint32_t *rank);
typedef int (*ANeuralNetworksExecution_getOutputOperandDimensions_fn)(
- ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions);
+ ANeuralNetworksExecution *execution, int32_t index, uint32_t *dimensions);
typedef int (*ANeuralNetworksBurst_create_fn)(ANeuralNetworksCompilation *compilation,
ANeuralNetworksBurst **burst);
int32_t *type);
typedef int (*ANeuralNetworksModel_getExtensionOperationType_fn)(
- ANeuralNetworksModel *model, const char *extensionName, uint16_t operationCodeWithinExtension,
- ANeuralNetworksOperationType *type);
+ ANeuralNetworksModel *model, const char *extensionName, uint16_t operationCodeWithinExtension,
+ ANeuralNetworksOperationType *type);
typedef int (*ANeuralNetworksModel_setOperandExtensionData_fn)(ANeuralNetworksModel *model,
int32_t index, const void *data,
size_t length);
+#if __ANDROID_API__ >= 30
typedef int (*ANeuralNetworksMemoryDesc_create_fn)(ANeuralNetworksMemoryDesc **desc);
typedef void (*ANeuralNetworksMemoryDesc_free_fn)(ANeuralNetworksMemoryDesc *desc);
typedef int (*ANeuralNetworksMemoryDesc_addInputRole_fn)(
- ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, int32_t index,
- float frequency);
+ ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, int32_t index,
+ float frequency);
typedef int (*ANeuralNetworksMemoryDesc_addOutputRole_fn)(
- ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index,
- float frequency);
+ ANeuralNetworksMemoryDesc *desc, const ANeuralNetworksCompilation *compilation, uint32_t index,
+ float frequency);
typedef int (*ANeuralNetworksMemoryDesc_setDimensions_fn)(ANeuralNetworksMemoryDesc *desc,
uint32_t rank,
typedef int (*ANeuralNetworksMemory_copy_fn)(const ANeuralNetworksMemory *src,
const ANeuralNetworksMemory *dst);
-
+#endif // __ANDROID_API__ >= 30
#endif // __NEURAL_NETWORKS_TYPES_H__
const void *buffer, size_t length);
typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)(
- ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
- size_t length);
+ ANeuralNetworksModel *model, int32_t index, const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length);
typedef int (*ANeuralNetworksModel_addOperation_fn)(ANeuralNetworksModel *model,
ANeuralNetworksOperationType type,
const void *buffer, size_t length);
typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)(
- ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+ ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset, size_t length);
typedef int (*ANeuralNetworksExecution_setOutput_fn)(ANeuralNetworksExecution *execution,
int32_t index,
void *buffer, size_t length);
typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)(
- ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
- const ANeuralNetworksMemory *memory, size_t offset, size_t length);
+ ANeuralNetworksExecution *execution, int32_t index, const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset, size_t length);
typedef int (*ANeuralNetworksExecution_startCompute_fn)(ANeuralNetworksExecution *execution,
ANeuralNetworksEvent **event);
* @param[in] comparator Comparator object for tensor comparation
*/
TfLiteInterpMatchApp(const nnfw::misc::tensor::Comparator &comparator)
- : _verbose{false}, _comparator(comparator)
+ : _verbose{false}, _comparator(comparator)
{
// DO NOTHING
}
* @param[in] quantization TfLiteQuantizationParams type to represent quantization value
*/
RandomTestRunner(uint32_t seed, const RandomTestParam ¶m)
- : _randgen{seed, 0.0f, 2.0f}, _param{param}
+ : _randgen{seed, 0.0f, 2.0f}, _param{param}
{
// DO NOTHING
}
const TfLiteTensor *tensor = interp.tensor(id);
_outfile << "# tensor name: " << tensor->name << std::endl;
- _outfile << "# tflite::interpreter.tensor(" << id << ") -> "
- "tensor_value_gen["
- << log_index << "]" << std::endl;
+ _outfile << "# tflite::interpreter.tensor(" << id << ") -> tensor_value_gen[" << log_index
+ << "]" << std::endl;
if (tensor->type == kTfLiteInt32)
{
{
public:
DiffSummary()
- : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f},
- max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f},
- max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f}
+ : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f},
+ max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f},
+ max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f}
{
// DO NOTHING
}
using nnfw::misc::tensor::zip;
zip(expected.shape(), expected, obtained)
- << [&](const Index &index, T expected_value, T obtained_value) {
- if (expected_value != obtained_value)
- {
- diffs.emplace_back(index, expected_value, obtained_value);
- }
- };
+ << [&](const Index &index, T expected_value, T obtained_value) {
+ if (expected_value != obtained_value)
+ {
+ diffs.emplace_back(index, expected_value, obtained_value);
+ }
+ };
// TODO Unify summary generation code
if (diffs.size() == 0)
template <>
bool TfLiteInterpMatchApp::compareSingleTensorView<float>(
- const nnfw::tflite::TensorView<float> &expected,
- const nnfw::tflite::TensorView<float> &obtained, int id) const
+ const nnfw::tflite::TensorView<float> &expected, const nnfw::tflite::TensorView<float> &obtained,
+ int id) const
{
DiffSummary summary;
int32_t value = 0;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- ++value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ ++value;
+ };
};
// Generate singed 32-bit integer (s32) input
int32_t value = 0;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
initializers[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<uint8_t>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<uint8_t>);
const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
assert(tfl_interp_view.shape() == data.shape());
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
reseters[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<uint8_t>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<uint8_t>);
const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
assert(tfl_interp_view.shape() == data.shape());
uint8_t value = 0;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
initializers[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<float>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<float>);
const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
assert(tfl_interp_view.shape() == data.shape());
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
reseters[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<float (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<float>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<float>);
const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
float value = 0;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
initializers[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<bool>);
const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
assert(tfl_interp_view.shape() == data.shape());
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
reseters[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
assert(tfl_interp_view.shape() == nnapi_view.shape());
auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<bool>);
const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
std::bind(fp, _randgen, _1, _2));
bool value = false;
nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ tfl_interp_view.at(ind) = value;
+ nnapi_view.at(ind) = value;
+ };
};
// Fill IFM with random numbers
--- /dev/null
+../../../.clang-format.8
\ No newline at end of file
*
* The type of tensor represented in {@link nnfw_tensorinfo}
*/
-typedef enum {
+typedef enum
+{
/** A tensor of 32 bit floating point */
NNFW_TYPE_TENSOR_FLOAT32 = 0,
/** A tensor of 32 bit signed integer */
/**
* @brief Result values returned from a call to an API function
*/
-typedef enum {
+typedef enum
+{
/** Successful */
NNFW_STATUS_NO_ERROR = 0,
/**
/**
* @brief Data format of a tensor
*/
-typedef enum {
+typedef enum
+{
/** Don't care layout */
NNFW_LAYOUT_NONE = 0,
/**
/**
* @brief Information ID for retrieving information on nnfw (e.g. version)
*/
-typedef enum {
+typedef enum
+{
/** nnfw runtime version
* Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch.
*/
*/
NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size);
+/**
+ * @brief Load a tflite/circle model from file.
+ *
+ * @param[in] session session
+ * @param[in] file_path Path to model file. Model type(tflite/circle) is decided by file extension
+ * @return NFNFW_STATUS
+ */
+NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path);
+
#endif // __NNFW_INTERNAL_H__
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01000b01
+#define NNFW_VERSION 0x01000c00
#endif // __NNFW_VERSION_H__
};
Kernel::Kernel(const nnfw_custom_eval evalFunction)
- : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
+ : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
{
}
NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path)
{
NNFW_RETURN_ERROR_IF_NULL(session);
- return session->load_model_from_file(pacakge_file_path);
+ return session->load_model_from_nnpackage(pacakge_file_path);
}
/*
return session->load_circle_from_buffer(buffer, size);
}
+NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->load_model_from_modelfile(file_path);
+}
+
NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
{
NNFW_RETURN_ERROR_IF_NULL(session);
#include "compiler/Compiler.h"
#include "util/ConfigSource.h"
#include "util/Exceptions.h"
+#include "util/logging.h"
#include "exec/Execution.h"
#include "circle_loader.h"
#include "tflite_loader.h"
#include "json/json.h"
#include "ir/OpCode.h"
+#include "util/TracingCtx.h"
+
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <dirent.h>
-#include <util/ConfigSource.h>
#include <misc/string_helpers.h>
/*
#define MAX_PATH_LENGTH 1024
#define MAX_TENSOR_NAME_LENGTH 64
+namespace
+{
+
// Is null-terminating in length ?
-static bool null_terminating(const char *str, uint32_t length)
+bool null_terminating(const char *str, uint32_t length)
{
for (uint32_t i = 0; i < length; i++)
{
return false;
}
-static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
+onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
{
if (layout == NNFW_LAYOUT_CHANNELS_LAST)
{
}
}
+std::string trim(const std::string &value)
+{
+ std::string whitespace = " \t";
+ auto begin = value.find_first_not_of(whitespace);
+ if (begin == std::string::npos)
+ return ""; // no content
+
+ auto end = value.find_last_not_of(whitespace);
+ auto range = end - begin + 1;
+ return value.substr(begin, range);
+}
+
+using CfgKeyValues = std::unordered_map<std::string, std::string>;
+
+bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
+{
+ std::ifstream ifs(cfgfile);
+ if (ifs.is_open())
+ {
+ std::string line;
+ while (std::getline(ifs, line))
+ {
+ auto cmtpos = line.find('#');
+ if (cmtpos != std::string::npos)
+ {
+ line = line.substr(0, cmtpos);
+ }
+ std::istringstream isline(line);
+ std::string key;
+ if (std::getline(isline, key, '='))
+ {
+ std::string value;
+ if (std::getline(isline, value))
+ {
+ key = trim(key);
+ keyValues[key] = trim(value);
+ }
+ }
+ }
+ ifs.close();
+ return true;
+ }
+ return false;
+}
+
+void setConfigKeyValues(const CfgKeyValues &keyValues)
+{
+ auto configsrc = std::make_unique<onert::util::GeneralConfigSource>();
+
+ for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
+ {
+ VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
+ configsrc->set(it->first, it->second);
+ }
+
+ onert::util::config_source_ext(std::move(configsrc));
+}
+
+} // namespace
+
nnfw_session::nnfw_session()
- : _subgraphs{nullptr}, _execution{nullptr},
- _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
+ : _subgraphs{nullptr}, _execution{nullptr},
+ _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}, _tracing_ctx{
+ nullptr}
{
// DO NOTHING
}
return NNFW_STATUS_ERROR;
}
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+ _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+ _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
+
+ _state = State::MODEL_LOADED;
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
+{
+ if (!isStateInitialized())
+ return NNFW_STATUS_INVALID_STATE;
+
+ if (!model_file_path)
+ {
+ std::cerr << "Model file path is null." << std::endl;
+ return NNFW_STATUS_UNEXPECTED_NULL;
+ }
+
+ std::string filename{model_file_path};
+ if (filename.size() < 8) // .tflite or .circle
+ {
+ std::cerr << "Invalid model file path." << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ std::string model_type = filename.substr(filename.size() - 7, 7);
+
+ try
+ {
+ if (model_type == ".tflite")
+ {
+ _subgraphs = onert::tflite_loader::loadModel(filename.c_str());
+ }
+ else if (model_type == ".circle")
+ {
+ _subgraphs = onert::circle_loader::loadModel(filename.c_str());
+ }
+ else
+ {
+ std::cerr << "Unsupported model type" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during model loading : " << e.what() << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+ _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
_state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
}
-NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
{
if (!isStateInitialized())
return NNFW_STATUS_INVALID_STATE;
mfs >> root;
const Json::Value &models = root["models"];
const Json::Value &model_types = root["model-types"];
+ const Json::Value &configs = root["configs"];
+
+ if (!configs.empty() && !configs[0].empty())
+ {
+ auto filepath = package_dir + std::string("/metadata/") + configs[0].asCString();
+
+ CfgKeyValues keyValues;
+ if (loadConfigure(filepath, keyValues))
+ {
+ setConfigKeyValues(keyValues);
+ }
+ }
auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model
auto model_type = model_types[0].asString(); // first model's type
return NNFW_STATUS_ERROR;
}
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+ _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+ _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
_state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
{
_subgraphs.reset();
std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile();
- _execution = std::make_shared<onert::exec::Execution>(executors);
+ _execution = std::make_unique<onert::exec::Execution>(executors);
}
catch (const std::exception &e)
{
if (!buffer && length != 0)
{
std::cerr
- << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0"
- << std::endl;
+ << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0"
+ << std::endl;
return NNFW_STATUS_ERROR;
}
if (!buffer && length != 0)
{
std::cerr
- << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0"
- << std::endl;
+ << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0"
+ << std::endl;
return NNFW_STATUS_ERROR;
}
#include "nnfw_experimental.h"
#include <util/GeneralConfigSource.h>
+#include <util/TracingCtx.h>
#include <string>
#include <memory>
nnfw_session();
~nnfw_session();
- NNFW_STATUS load_model_from_file(const char *package_file_path);
+ NNFW_STATUS load_model_from_nnpackage(const char *package_file_path);
NNFW_STATUS prepare();
NNFW_STATUS run();
NNFW_STATUS set_config(const char *key, const char *value);
NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+ NNFW_STATUS load_model_from_modelfile(const char *file_path);
//
// Experimental API
State _state{State::INITIALIZED};
std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
std::unique_ptr<onert::compiler::Compiler> _compiler;
- std::shared_ptr<onert::exec::Execution> _execution;
+ std::unique_ptr<onert::exec::Execution> _execution;
std::shared_ptr<onert::frontend::custom::KernelRegistry> _kernel_registry;
+
+ std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
};
#endif // __API_NNFW_API_INTERNAL_H__
add_subdirectory(acl_cl)
add_subdirectory(acl_neon)
add_subdirectory(acl_common)
+add_subdirectory(ruy)
+add_subdirectory(xnnpack)
#include <memory>
#include <backend/Backend.h>
+#include "BackendContext.h"
#include "Config.h"
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &,
- bool is_linear_executor) const override
+ std::unique_ptr<backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+ bool is_linear_executor) const override
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
+ auto context = std::make_unique<acl_cl::BackendContext>(this, &graph);
auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
- auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
- context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ // Prepare scanning
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ const auto &li = lower_info.operand.at(ind);
+ if (li->def_factors().getOnlyElement().backend() != backend())
+ continue;
+
+ // Ignore unused tensor
+ if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+ {
+ VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ auto factor = li->def_factors().getOnlyElement();
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ for (const auto &op_idx : op_seq.operations())
+ {
+ auto &op = graph()->operations().at(op_idx);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+ lower_info.operand.at(ind)->use_factors().size() == 1);
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ optimizer->optimize();
+
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (const auto op_ind : op_seq)
+ {
+ bool op_assigned = [&]() {
+ for (auto &op_info : operation_list())
+ if (op_info.index == op_ind)
+ return true;
+ return false;
+ }();
+ if (!op_assigned)
+ continue;
+
+ const auto &op = graph()->operations().at(op_ind);
+ for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+ find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+ {
+ const auto &operand_lower_info =
+ lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+ // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+ // op.getOutputs() of permute (CPU) returns tensor A
+ // but tensor A belongs to the backend of acl_cl.
+ // So, we have to make this tensor NOT registered for CPU.
+ if (operand_lower_info.backend() != backend())
+ continue;
+
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = op_seq.getLayout();
+ const auto backend_layout = operand_lower_info.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+ }
+ }
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ planTensors(order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ tensor_builder->allocate();
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ return ret;
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+ std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
const auto &axis_obj = _operands.at(axis_index);
const auto ifm_rank = input_obj.shape().rank();
- const auto frontend_layout = this->_current_op_seq_layout;
+ const auto frontend_layout = this->_current_layout;
auto output_tensor = this->_tensor_reg->getITensor(output_index);
const auto backend_layout = output_tensor->layout();
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
#include "AclConstantInitializer.h"
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
const std::shared_ptr<TensorBuilder> &tensor_builder,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
_return_fn_seq->enableDynamicShapeInferer(false);
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ const auto NNApiInputs = 2;
+ if (node.getInputs().size() != NNApiInputs)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ if (!_ctx.at(crops_index).isConstant())
+ {
+ throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
+ }
+
+ auto crops = _ctx.at(crops_index).asVector<int32_t>();
+ for (auto crop : crops)
+ {
+ if (crop != 0)
+ {
+ throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
+ }
+ }
+ }
+
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
else
{
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
::arm_compute::CLFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = input_tensor->layout();
std::unique_ptr<arm_compute::IFunction> fn;
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
const auto &perms = _ctx.at(perm_idx);
auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
const size_t output_rank = _ctx.at(output_idx).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
for (const auto &input_index : input_indexes)
inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
if (axis < 0)
void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_reg, _current_op_seq_layout,
- acl_common::convertPoolType(node.param().op_type));
+ node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
const auto stride = node.param().stride;
UNUSED_RELEASE(backend_layout);
assert(backend_layout == ifm_tensor->layout());
assert(backend_layout == indices_tensor->layout());
- assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+ assert(ifm_rank < 4 || _current_layout == backend_layout);
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
auto ifm_shape = _ctx.at(ifm_index).shape();
auto ofm_shape = _ctx.at(ofm_index).shape();
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto frontend_layout = _current_op_seq_layout;
+ auto frontend_layout = _current_layout;
auto backend_layout = ifm_tensor->layout();
int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
+ auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+ : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
- ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
- ::arm_compute::ReductionOperation::ARG_IDX_MAX);
+ ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
_return_fn = asAclFunction(std::move(fn));
}
for (const auto &ofm_ind : output_indexes)
output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
auto axis = _ctx.at(axis_index).asScalar<int32_t>();
if (axis < 0)
{
int32_t split_dim = split_dim_op.asScalar<int32_t>();
uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
for (const auto &output_index : output_indexes)
outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
if (axis < 0)
axis += input_rank;
auto input = _tensor_reg->getAclTensor(input_index)->handle();
auto output = _tensor_reg->getAclTensor(output_index)->handle();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
::arm_compute::PaddingList padding_list;
#ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include "ir/Operands.h"
#include "TensorBuilder.h"
namespace acl_cl
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
+
+ void visit(const ir::operation::ArgMinMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::ConvertFp16ToFp32 &) override;
+ void visit(const ir::operation::ConvertFp32ToFp16 &) override;
+ void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
+ void visit(const ir::operation::EmbeddingLookup &) override;
void visit(const ir::operation::ExpandDims &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::LSTM &) override;
void visit(const ir::operation::OneHot &) override;
void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Pool2D &) override;
+ void visit(const ir::operation::Pad &) override;
void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::Pool2D &) override;
+ void visit(const ir::operation::PReLU &) override;
+ void visit(const ir::operation::Reduce &) override;
+ void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::ResizeNearestNeighbor &) override;
+ void visit(const ir::operation::Reverse &) override;
void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::PReLU &) override;
- void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::TopKV2 &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SplitV &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::TopKV2 &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::ConvertFp32ToFp16 &) override;
- void visit(const ir::operation::ConvertFp16ToFp32 &) override;
- void visit(const ir::operation::Reverse &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
};
} // namespace acl_cl
#ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
#define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
#include "TensorBuilder.h"
namespace onert
namespace acl_cl
{
-class Optimizer : public IOptimizer
+class Optimizer
{
public:
Optimizer(BackendContext *context);
- void optimize() override;
+ void optimize();
private:
BackendContext *_context;
* limitations under the License.
*/
-#include <util/logging.h>
-
#include "Backend.h"
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'acl_cl' loaded\n";
- return new onert::backend::acl_cl::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+ : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
+#include <backend/cpu_common/ConstantInitializerBase.h>
#include <ir/Operands.h>
#include "AclTensorRegistry.h"
namespace acl_common
{
-class AclConstantInitializer : public IConstantInitializer
+class AclConstantInitializer : public cpu_common::ConstantInitializerBase
{
public:
AclConstantInitializer(const ir::Operands &operands,
#include <queue>
#include <arm_compute/core/Types.h>
-#include <backend/ITensorBuilder.h>
#include "ir/OperandIndexMap.h"
#include <ir/Operands.h>
#include "AclTensorManager.h"
LAST
};
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorBuilder : public ITensorBuilder
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
{
public:
using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
- AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
- const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
+ AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
/**
* @brief Register tensor information to allocate on ACL-CL backend
* @param[in] layout Tensor data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override;
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare();
T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<T_AclTensorManager> _tensor_mgr;
- std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
// for linear executor
std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
{
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
- const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
- const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
- : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
+ T_AclTensorManager *tensor_mgr)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}
{
assert(_tensor_mgr);
}
case ir::DataType::UINT8:
return ::arm_compute::DataType::U8;
case ir::DataType::QUANT_INT8_SYMM:
- return ::arm_compute::DataType::S8;
+ return ::arm_compute::DataType::QSYMM8;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ return ::arm_compute::DataType::QASYMM8_SIGNED;
case ir::DataType::FLOAT16:
return ::arm_compute::DataType::F16;
case ir::DataType::INT64:
return ::arm_compute::DataType::S64;
+ case ir::DataType::QUANT_INT16_ASYMM:
+ return ::arm_compute::DataType::QASYMM16;
+ case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
+ return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
default:
- throw std::runtime_error("Not supported, yet");
+ throw std::runtime_error("Not supported internal data type, yet");
break;
}
}
return ::arm_compute::ActivationLayerInfo{
::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported internal activation, yet"};
break;
}
}
return ::arm_compute::ActivationLayerInfo{
::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported internal elementwise activation, yet"};
break;
}
}
return ir::DataType::UINT32;
case ::arm_compute::DataType::QASYMM8:
return ir::DataType::QUANT_UINT8_ASYMM;
+ case ::arm_compute::DataType::QASYMM8_SIGNED:
+ return ir::DataType::QUANT_INT8_ASYMM;
case ::arm_compute::DataType::U8:
return ir::DataType::UINT8;
case ::arm_compute::DataType::QSYMM8:
case ::arm_compute::DataType::S64:
return ir::DataType::INT64;
default:
- throw std::runtime_error{"Not supported, yet"};
+ throw std::runtime_error{"Not supported acl data type, yet"};
break;
}
}
#include <backend/Backend.h>
#include <ir/Operands.h>
+#include "BackendContext.h"
#include "Config.h"
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &,
- bool is_linear_executor) const override
+ std::unique_ptr<backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+ bool is_linear_executor) const override
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
- auto context = std::make_unique<BackendContext>(this, &graph);
+ auto context = std::make_unique<acl_neon::BackendContext>(this, &graph);
auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
- auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
- context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ // Prepare scanning
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ const auto &li = lower_info.operand.at(ind);
+ if (li->def_factors().getOnlyElement().backend() != backend())
+ continue;
+
+ // Ignore unused tensor
+ if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+ {
+ VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ auto factor = li->def_factors().getOnlyElement();
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ for (const auto &op_idx : op_seq.operations())
+ {
+ auto &op = graph()->operations().at(op_idx);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph()->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+ lower_info.operand.at(ind)->use_factors().size() == 1);
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ optimizer->optimize();
+
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (const auto op_ind : op_seq)
+ {
+ bool op_assigned = [&]() {
+ for (auto &op_info : operation_list())
+ if (op_info.index == op_ind)
+ return true;
+ return false;
+ }();
+ if (!op_assigned)
+ continue;
+
+ const auto &op = graph()->operations().at(op_ind);
+ for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+ find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+ {
+ const auto &operand_lower_info =
+ lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+ // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+ // op.getOutputs() of permute (CPU) returns tensor A
+ // but tensor A belongs to the backend of acl_cl.
+ // So, we have to make this tensor NOT registered for CPU.
+ if (operand_lower_info.backend() != backend())
+ continue;
+
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = op_seq.getLayout();
+ const auto backend_layout = operand_lower_info.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+ }
+ }
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ planTensors(order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ tensor_builder->allocate();
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ ifunc.prepare();
+ tensor_builder->postFunctionPrepare();
+ });
+ }
+
+ return ret;
+}
+
+} // namespace neon
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+ std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
#include "AclConstantInitializer.h"
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
const std::shared_ptr<TensorBuilder> &tensor_builder,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
_return_fn_seq->enableDynamicShapeInferer(false);
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
}
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
- auto frontend_layout = _current_op_seq_layout;
+ auto frontend_layout = _current_layout;
auto backend_layout = ifm_tensor->layout();
int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
assert(axis_value >= 0 && axis_value < ifm_rank);
const auto fixed_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+ auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+ : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
- ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
- arm_compute::ReductionOperation::ARG_IDX_MAX);
+ ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
_return_fn = asAclFunction(std::move(fn));
}
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ const auto NNApiInputs = 2;
+ if (node.getInputs().size() != NNApiInputs)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ if (!_ctx.at(crops_index).isConstant())
+ {
+ throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND");
+ }
+
+ auto crops = _ctx.at(crops_index).asVector<int32_t>();
+ for (auto crop : crops)
+ {
+ if (crop != 0)
+ {
+ throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND");
+ }
+ }
+ }
+
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
else
{
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
::arm_compute::NEFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
assert(backend_layout == ifm_tensor->layout());
assert(backend_layout == indices_tensor->layout());
- assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+ assert(ifm_rank < 4 || _current_layout == backend_layout);
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
for (const auto &input_index : input_indexes)
inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
if (axis < 0)
{
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
const auto axis =
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_reg, _current_op_seq_layout,
- acl_common::convertPoolType(node.param().op_type));
+ node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
const auto ofm_index{node.getOutputs().at(0)};
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = input_tensor->layout();
const auto reduce_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
for (const auto &ofm_ind : output_indexes)
output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
auto axis = _ctx.at(axis_index).asScalar<int32_t>();
if (axis < 0)
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
const auto stride = node.param().stride;
auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = ifm_tensor->layout();
const auto rank = _ctx.at(ifm_idx).shape().rank();
for (const auto &output_index : output_indexes)
outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
if (axis < 0)
axis += input_rank;
auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
const size_t output_rank = _ctx.at(out_idx).shape().rank();
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto backend_layout = output_tensor->layout();
int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
#ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include "ir/Operands.h"
#include "TensorBuilder.h"
namespace acl_neon
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::ArgMax &) override;
+
+ void visit(const ir::operation::ArgMinMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
+ void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::FullyConnected &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::OneHot &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Squeeze &) override;
void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::ExpandDims &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::OneHot &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
};
} // namespace acl_neon
#ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
#define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
#include "TensorBuilder.h"
namespace onert
namespace acl_neon
{
-class Optimizer : public IOptimizer
+class Optimizer
{
public:
Optimizer(BackendContext *context);
- void optimize() override;
+ void optimize();
private:
BackendContext *_context;
* limitations under the License.
*/
-#include <util/logging.h>
-
#include "Backend.h"
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'acl_neon' loaded\n";
- return new onert::backend::acl_neon::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
context->external_context());
- context->tensor_register = nullptr;
- context->optimizer = nullptr;
return context;
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
#include "ExternalContext.h"
namespace onert
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
- std::shared_ptr<ITensorRegister> tensor_register = nullptr,
- std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
- constant_initializer, kernel_gen, tensor_register,
- optimizer),
- _external_context(new ExternalContext)
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(new ExternalContext)
{
}
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+ FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
// NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
// the thread pool is also created in duplicate
// TODO Create one ruy context for session
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
-#include "backend/cpu_common/TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
namespace onert
{
namespace cpu
{
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
- void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
- // TODO: For now the only cpu backend supports constant tensor to use data from external
- // If the other backend supports (to do this,
- // ExternalTensor should be abstract such as IExternal, maybe),
- // this can be an interface of IConstantInitializer
- void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-public:
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
-
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
- std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
} // namespace cpu
} // namespace backend
} // namespace onert
-#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
#ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
#define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
-#include <backend/IExternalContext.h>
#include <util/ConfigSource.h>
#include <ruy/context.h>
namespace cpu
{
-class ExternalContext : public IExternalContext
+class ExternalContext
{
public:
ExternalContext() : _ruy_context(new ruy::Context)
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/ConvolutionLayer.h"
+#include "ops/DepthToSpaceLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
#include "ops/EinsumLayer.h"
#include "ops/ElementwiseActivationLayer.h"
{
switch (type_ir)
{
+ case ir::operation::ElementwiseActivation::Type::ELU:
+ return ops::ElementwiseActivationType::kElu;
case ir::operation::ElementwiseActivation::Type::LOGISTIC:
return ops::ElementwiseActivationType::kLogistic;
case ir::operation::ElementwiseActivation::Type::RELU:
return ops::ElementwiseActivationType::kReLU;
case ir::operation::ElementwiseActivation::Type::TANH:
return ops::ElementwiseActivationType::kTanh;
+ case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+ return ops::ElementwiseActivationType::kLeakyReLU;
default:
throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
}
{
switch (type_ir)
{
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ return ops::ElementwiseBinaryType::kLogicalAnd;
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
return ops::ElementwiseBinaryType::kLogicalOr;
case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
return ops::ElementwiseUnaryType::kRSqrt;
case ir::operation::ElementwiseUnary::Type::SIN:
return ops::ElementwiseUnaryType::kSin;
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ return ops::ElementwiseUnaryType::kSqrt;
+ case ir::operation::ElementwiseUnary::Type::SQUARE:
+ return ops::ElementwiseUnaryType::kSquare;
case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
return ops::ElementwiseUnaryType::kZerosLike;
default:
const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
_tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
{
// DO NOTHING
}
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
- _current_op_seq_layout = op_seq.getLayout();
+ _current_layout = op_seq.getLayout();
for (const auto &operation_idx : op_seq.operations())
{
const auto &node = _operations_ctx.at(operation_idx);
_return_fn = std::move(fn);
return;
}
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
// Kernel format is [1, kernel_height, kernel_width, depth_out].
const auto &ker_shape = _ctx.at(ker_index).shape();
const auto ker_height = ker_shape.dim(1);
fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
- dilation_height, activation, ofm_tensor);
+ dilation_height, activation, ofm_tensor, _external_context);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
void KernelGenerator::visit(const ir::operation::Fill &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
+ // SHAPE input is used for shape inference
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
- auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto value_tensor = _tensor_reg->getPortableTensor(value_index);
auto fn = std::make_unique<ops::FillLayer>();
- fn->configure(input_tensor, value_tensor, output_tensor);
+ fn->configure(value_tensor, output_tensor);
_return_fn = std::move(fn);
}
assert(backend_layout == indices_tensor->layout());
const auto &input_shape = _ctx.at(input_index).shape();
UNUSED_RELEASE(input_shape);
- assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+ assert(input_shape.rank() < 4 || _current_layout == backend_layout);
const auto axis_raw = node.param().axis;
const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
for (auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
- // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+ // TODO make sure using `_current_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
auto in_tensor = _tensor_reg->getPortableTensor(idx);
tensors.emplace_back(in_tensor);
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ // AXIS input is used for output shape inference
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
- auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_tensor, axis_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto rank = _ctx.at(ofm_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
assert(-rank <= axis && axis < rank);
const auto input_index{node.getInputs().at(0)};
const auto rank = _ctx.at(input_index).shape().rank();
- const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
assert(rank == 0 || (-rank <= axis && axis < rank));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
+ const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
- fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
+ fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
_return_fn = std::move(fn);
}
const auto kh = node.param().kh;
const auto kw = node.param().kw;
const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
const auto padding =
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
_return_fn = std::move(fn);
}
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ auto block_size = node.param().block_size;
+
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+ auto fn = std::make_unique<ops::DepthToSpaceLayer>();
+
+ fn->configure(input_tensor, block_size, output_tensor);
+ _return_fn = std::move(fn);
+}
+
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
{
const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
namespace cpu
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
- using IKernelGenerator::visit;
+ void visit(const ir::OpSequence &) override;
void visit(const ir::operation::AddN &) override;
- void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::ArgMinMax &) override;
+ void visit(const ir::operation::BatchMatMul &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
+ void visit(const ir::operation::BroadcastTo &) override;
+ void visit(const ir::operation::Comparison &) override;
+ void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::Custom &node) override;
+ void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Concat &) override;
- void visit(const ir::operation::Fill &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Reshape &) override;
- void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Softmax &) override;
- void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Einsum &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Custom &node) override;
void visit(const ir::operation::ElementwiseActivation &) override;
void visit(const ir::operation::ElementwiseBinary &) override;
void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
+ void visit(const ir::operation::Fill &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::FusedBatchNorm &) override;
+ void visit(const ir::operation::Gather &) override;
+ void visit(const ir::operation::L2Normalization &) override;
+ void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Pack &) override;
- void visit(const ir::operation::Unpack &) override;
+ void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::OneHot &) override;
- void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::Select &) override;
- void visit(const ir::operation::Slice &) override;
- void visit(const ir::operation::StridedSlice &) override;
- void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Shape &) override;
- void visit(const ir::operation::ResizeBilinear &node) override;
- void visit(const ir::operation::Reverse &) override;
- void visit(const ir::operation::ArgMax &) override;
+ void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pad &) override;
void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Pow &) override;
- void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Tile &) override;
- void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
void visit(const ir::operation::Rank &) override;
- void visit(const ir::operation::MatrixBandPart &) override;
- void visit(const ir::operation::BatchMatMul &) override;
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::BroadcastTo &) override;
- void visit(const ir::operation::FusedBatchNorm &) override;
- void visit(const ir::operation::LogSoftmax &) override;
+ void visit(const ir::operation::Reduce &) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::ResizeBilinear &node) override;
+ void visit(const ir::operation::Reverse &) override;
+ void visit(const ir::operation::Select &) override;
+ void visit(const ir::operation::Shape &) override;
+ void visit(const ir::operation::Slice &) override;
+ void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::Split &) override;
void visit(const ir::operation::SplitV &) override;
+ void visit(const ir::operation::SquaredDifference &) override;
+ void visit(const ir::operation::Squeeze &) override;
+ void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::StridedSlice &) override;
+ void visit(const ir::operation::Tile &) override;
+ void visit(const ir::operation::Transpose &) override;
+ void visit(const ir::operation::Unpack &) override;
private:
const ir::Operands &_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
const std::shared_ptr<ExternalContext> _external_context;
};
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®,
- cpu_common::DynamicTensorManager *dynamic_tensor_manager)
- : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
- _dynamic_tensor_manager{dynamic_tensor_manager}
-{
- // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
- _nonconst_mgr->allocate();
-
- for (auto &pair : _tensors->native_tensors())
- {
- const auto &ind = pair.first;
- auto tensor = pair.second.get();
- if (!_as_constants[ind] && !tensor->is_dynamic())
- {
- auto *buffer = _nonconst_mgr->getBuffer(ind);
- tensor->setBuffer(buffer);
-
- VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
- }
- }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
- bool as_const)
-{
- assert(!_tensors->getITensor(ind));
- if (as_const)
- {
- auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- else
- {
- auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
- _dynamic_tensor_manager->dynamic_mem_mgr().get());
- _tensors->setNativeTensor(ind, std::move(tensor));
- }
- _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
- assert(_tensors->getITensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getITensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
- assert(_tensors->getITensor(ind));
-
- // This method is called only when a tensor has proper shape
- assert(!_tensors->getITensor(ind)->is_dynamic());
-
- if (!_as_constants[ind])
- _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
- for (const auto &it : _tensors->native_tensors())
- fn(it.first);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
+#include "backend/cpu_common/StaticTensorManager.h"
namespace onert
{
namespace cpu
{
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
- StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®,
- cpu_common::DynamicTensorManager *dynamic_tensor_manager);
- virtual ~StaticTensorManager() = default;
-
- void allocateNonconsts(void);
- void deallocateNonconsts(void);
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout, bool as_const);
-
- void claimPlan(const ir::OperandIndex &ind, uint32_t size);
- void releasePlan(const ir::OperandIndex &ind);
-
- void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
- std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
- const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
- ir::OperandIndexMap<bool> _as_constants;
- cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
-};
+using StaticTensorManager = cpu_common::StaticTensorManager;
} // namespace cpu
} // namespace backend
{
using Tensor = cpu_common::Tensor;
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- * instead of allocating and copying the data. ExternalTensor's data pointer points to
- * an address of memory such as where memory is already allocated, or mmapped area.
- * This is meaning that ExternalTensor can take all of types' ir::Data.
- * To support this, assume below things no padding, always NHWC layout,
- * constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
- ExternalTensor() = delete;
- virtual ~ExternalTensor();
-
-public:
- ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
- : Tensor(info, layout, nullptr)
- {
- assert(_layout == ir::Layout::NHWC);
- assert(_info.isConstant());
- assert(_info.isDynamic() == false);
- }
-
-public:
- /**
- * @brief set Data to be shared from external so that this ExternalTensor will not be
- * allocated on CPU backend
- * @param[in] data data of Operand to be set
- */
- void setData(const std::shared_ptr<ir::Data> data)
- {
- assert(data != nullptr);
- _data = data;
- // Note. Some op such as cker::Conv could take buffer as nullptr.
- // That's why _buffer also would be used
- _buffer = const_cast<uint8_t *>(_data->base());
- }
-
-public:
- uint8_t *buffer() const override { return _buffer; }
-
- bool is_constant() const override { return true; }
- bool is_dynamic() const override { return false; }
- void set_dynamic() override
- {
- throw std::runtime_error("This tensor does not support changing dynamic");
- }
-
- void setShape(const ir::Shape &) override
- {
- throw std::runtime_error("This tensor does not support changing shape");
- }
-
- void increase_ref() override { ++_num_references; }
-
- void decrease_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- --_num_references;
- if (_num_references == 0)
- {
- _data.reset();
- _buffer = nullptr;
- }
- }
-
- /**
- * @brief Reset reference count to zero and release data
- */
- void reset_ref() override
- {
- assert(_data != nullptr);
- assert(_num_references > 0);
- _num_references = 0;
-
- _data.reset();
- _buffer = nullptr;
- }
-
- int32_t num_references() override { return _num_references; }
-
-private:
- std::shared_ptr<const ir::Data> _data;
-};
+using ExternalTensor = cpu_common::ExternalTensor;
} // namespace cpu
} // namespace backend
#include <backend/cpu_common/DynamicTensorManager.h>
#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
#include "StaticTensorManager.h"
namespace cpu
{
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
* @param[in] layout Operand data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
- IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
#include "Backend.h"
-#include <util/logging.h>
-
extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
- VERBOSE(onert_backend_create) << "'cpu' loaded\n";
- return new onert::backend::cpu::Backend;
-}
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
- VERBOSE(onert_backend_create) << "'cpu' unloaded\n";
- delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
}
case ir::DataType::UINT8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
+ break;
case ir::DataType::INT32:
TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
break;
case ir::DataType::UINT8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
+ break;
case ir::DataType::INT32:
TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
break;
void BatchMatMulLayer::run()
{
- if (_lhs->data_type() == OperandType::FLOAT32)
+ if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32))
{
batchMatMulFloat32();
}
void ConcatLayer::run()
{
- if (_output->data_type() == OperandType::FLOAT32)
+ switch (_output->data_type())
{
- concatenationGeneral<float>();
+ case OperandType::FLOAT32:
+ concatenationGeneral<float>();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ concatenationQuant8();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ concatenationGeneral<int8_t>();
+ break;
+ case OperandType::INT32:
+ concatenationGeneral<int32_t>();
+ break;
+ case OperandType::INT64:
+ concatenationGeneral<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Concat: unsupported data type");
}
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- concatenationQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- concatenationGeneral<int32_t>();
- }
- else if (_output->data_type() == OperandType::INT64)
- {
- concatenationGeneral<int64_t>();
- }
- else
- throw std::runtime_error("Concat: unsupported data type");
}
} // namespace ops
_prepare = true;
}
-#undef ANDROID_NN_CONV_PARAMETERS
-
} // namespace ops
} // namespace cpu
} // namespace backend
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpaceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/DepthToSpace.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void DepthToSpaceLayer::depthToSpace()
+{
+ nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()),
+ _block_size);
+}
+
+void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
+ IPortableTensor *output)
+{
+ _input = input;
+ _block_size = block_size;
+ _output = output;
+}
+
+void DepthToSpaceLayer::run()
+{
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ depthToSpace<float>();
+ break;
+ case OperandType::INT32:
+ depthToSpace<int32_t>();
+ break;
+ case OperandType::INT64:
+ depthToSpace<int64_t>();
+ break;
+ case OperandType::QUANT_UINT8_ASYMM:
+ depthToSpace<uint8_t>();
+ break;
+ case OperandType::QUANT_INT8_ASYMM:
+ depthToSpace<int8_t>();
+ break;
+ default:
+ throw std::runtime_error{"DepthToSpace: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class DepthToSpaceLayer : public ::onert::exec::IFunction
+{
+public:
+ DepthToSpaceLayer();
+
+ void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ template <typename T> void depthToSpace();
+
+ const IPortableTensor *_input;
+ int32_t _block_size;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
- nnfw::cker::DepthwiseConv(
+ nnfw::cker::DepthwiseConv<float, float>(
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::convQuant8()
op_params.quantized_activation_min = output_activation_min;
op_params.quantized_activation_max = output_activation_max;
- nnfw::cker::DepthwiseConv(
+ nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()),
+ _external_context->ruy_context());
}
void DepthwiseConvolutionLayer::configure(
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
- const ir::Activation activation, IPortableTensor *output)
+ const ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
{
_input = input;
_kernel = kernel;
_dilationHeight = dilationHeight;
_activation = activation;
_output = output;
+ _external_context = external_context;
}
void DepthwiseConvolutionLayer::run()
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
+#include "../ExternalContext.h"
#include <exec/IFunction.h>
const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
const uint32_t multiplier, const uint32_t dilationWidth,
const uint32_t dilationHeight, const ir::Activation activation,
- IPortableTensor *output);
+ IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
void run() override;
uint32_t _dilationHeight{1};
ir::Activation _activation{ir::Activation::NONE};
+
+ std::shared_ptr<ExternalContext> _external_context;
};
} // namespace ops
#include "OperationUtils.h"
+#include <cker/operation/ELU.h>
+#include <cker/operation/LeakyReLU.h>
#include <cker/operation/Logistic.h>
#include <cker/operation/ReLU.h>
#include <cker/operation/ReLU6.h>
switch (op_type)
{
+ case ElementwiseActivationType::kElu:
+ if (input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"};
+ }
+ break;
case ElementwiseActivationType::kLogistic:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
}
break;
+ case ElementwiseActivationType::kLeakyReLU:
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output),
+ reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"};
+ }
+ break;
default:
throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
}
enum class ElementwiseActivationType
{
+ kElu,
kLogistic,
kReLU,
- kTanh
+ kTanh,
+ kLeakyReLU
};
class ElementwiseActivationLayer : public ::onert::exec::IFunction
#include "OperationUtils.h"
+#include <cker/operation/LogicalAnd.h>
#include <cker/operation/LogicalOr.h>
#include <cker/operation/MaxMin.h>
namespace
{
template <typename T>
+void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output)
+{
+ if (!HaveSameShapes(lhs, rhs))
+ {
+ nnfw::cker::LogicalAndBroadcast<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::LogicalAndElementwise<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ }
+}
+
+template <typename T>
void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
IPortableTensor *output)
{
switch (op_type)
{
+ case ElementwiseBinaryType::kLogicalAnd:
+ if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalAndGeneric<bool>;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalOr: Unsupported data type"};
+ }
+ break;
case ElementwiseBinaryType::kLogicalOr:
if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
{
getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
}
+void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
{
if (!HaveSameShapes(input, output))
throw std::runtime_error{"Sin: Unsupported data type"};
}
break;
+ case ElementwiseUnaryType::kSqrt:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = sqrtFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Sqrt: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kSquare:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = squareFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Square: Unsupported data type"};
+ }
+ break;
case ElementwiseUnaryType::kZerosLike:
if (input->data_type() == OperandType::FLOAT32)
{
kRound,
kRSqrt,
kSin,
+ kSqrt,
+ kSquare,
kZerosLike
};
namespace ops
{
-ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr)
+ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr)
{
// DO NOTHING
}
-void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
- IPortableTensor *output)
+void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
{
_input = input;
- _axis = axis;
_output = output;
}
void ExpandDimsLayer::run()
{
- // TODO use _axis to calculate shape of output when _axis is not constant
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
ExpandDimsLayer();
public:
- void configure(const IPortableTensor *input, const IPortableTensor *axis,
- IPortableTensor *output);
+ void configure(const IPortableTensor *input, IPortableTensor *output);
void run() override;
private:
const IPortableTensor *_input;
- const IPortableTensor *_axis;
IPortableTensor *_output;
};
namespace ops
{
-FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr)
+FillLayer::FillLayer() : _value(nullptr), _output(nullptr)
{
// DO NOTHING
}
-void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value,
- IPortableTensor *output)
+void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output)
{
- _input = input;
_value = value;
_output = output;
}
switch (_output->data_type())
{
case OperandType::FLOAT32:
- nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<float *>(_value->buffer()),
+ nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<float *>(_output->buffer()));
break;
case OperandType::INT32:
- nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<int32_t *>(_value->buffer()),
+ nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<int32_t *>(_output->buffer()));
break;
case OperandType::INT64:
- nnfw::cker::Fill<int64_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<int64_t *>(_value->buffer()),
+ nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()),
getTensorShape(_output),
reinterpret_cast<int64_t *>(_output->buffer()));
break;
case OperandType::UINT32:
- nnfw::cker::Fill<uint32_t *>(
- getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
- reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output),
- reinterpret_cast<uint32_t *>(_output->buffer()));
+ nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()),
+ getTensorShape(_output),
+ reinterpret_cast<uint32_t *>(_output->buffer()));
break;
default:
throw std::runtime_error{"Fill: unsupported data type"};
public:
FillLayer();
- void configure(const IPortableTensor *input, const IPortableTensor *value,
- IPortableTensor *output);
+ void configure(const IPortableTensor *value, IPortableTensor *output);
void run() override;
private:
- const IPortableTensor *_input;
const IPortableTensor *_value;
IPortableTensor *_output;
};
void MeanLayer::MeanFloat32()
{
- nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
- getReducerAxes(_axes));
+ const auto inputShape = getTensorShape(_input);
+ const auto axisVec = getReducerAxes(_axes);
+ bool axis_is_1_and_2 =
+ _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+ ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+
+ if (axis_is_1_and_2)
+ {
+ nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output),
+ reinterpret_cast<float *>(_output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ axisVec);
+ }
}
void MeanLayer::MeanQuant8()
_axes = axes;
_output = output;
_keep_dims = keep_dims;
+
+ if (_input->data_type() != OperandType::FLOAT32 &&
+ _input->data_type() != OperandType::QUANT_UINT8_ASYMM)
+ throw std::runtime_error{"Mean: unsupported data type"};
}
void MeanLayer::run()
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_H__
+#define __ONERT_BACKEND_RUY_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<onert::backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
+ {
+ const auto &operands = graph.operands();
+ const auto &operations = graph.operations();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->external_context());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+ {
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+
+ FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
--- /dev/null
+set(LIB_ONERT_BACKEND_RUY onert_backend_ruy)
+
+nnfw_find_package(Ruy REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy)
+
+set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+ add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD
+ COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_RUY}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib)
* limitations under the License.
*/
-#include "Tensor.h"
+#include "Config.h"
namespace onert
{
namespace backend
{
-namespace cpu
+namespace ruy
{
-// `dynamic_cast` not working across library boundaries on NDK
-// With this as a key function, `dynamic_cast` works across dl
-ExternalTensor::~ExternalTensor() {}
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
} // namespace cpu
} // namespace backend
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONFIG_H__
+#define __ONERT_BACKEND_RUY_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Config : public IConfig
+{
+public:
+ std::string id() override { return "ruy"; }
+ bool initialize() override;
+ ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ bool supportPermutation() override { return true; }
+ bool supportDynamicTensor() override { return true; }
+ bool supportFP16() override { return false; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONFIG_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 4;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class ExternalContext
+{
+public:
+ ExternalContext() : _ruy_context(new ::ruy::Context)
+ {
+ setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+ }
+
+ void setMaxNumThreads(int max_num_threads)
+ {
+ const int target_num_threads =
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ _ruy_context->set_max_num_threads(target_num_threads);
+ }
+
+ ::ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+ const std::unique_ptr<::ruy::Context> _ruy_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ assert(!_return_fn_seq);
+ assert(_tensor_builder->dynamicTensorManager());
+ assert(_tensor_reg);
+
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+ // Prepare to handle dynamic tensors later
+ auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+ {
+ dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->operations = &_operations_ctx;
+ dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+ dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+ _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ }
+
+ _current_layout = op_seq.getLayout();
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ const auto &node = _operations_ctx.at(operation_idx);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+
+ for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ {
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
+
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
+ {
+ tensor->increase_ref();
+ }
+ }
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ const auto stride = node.param().stride;
+ const auto activation = node.param().activation;
+ const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
+ auto fn = std::make_unique<ops::ConvolutionLayer>();
+
+ if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
+ {
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
+ param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
+ stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+ activation, ofm_tensor, _external_context);
+
+ _return_fn = std::move(fn);
+ return;
+ }
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
+ _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ const auto activation = node.param().activation;
+ const auto weights_format = node.param().weights_format;
+
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+ auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::FullyConnectedLayer>();
+
+ fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
+ _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+
+private:
+ const ir::Operands &_ctx;
+ const ir::Operations &_operations_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+ ir::Layout _current_layout;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_H__
+#define __ONERT_BACKEND_RUY_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
+ _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ // CPU backend supports only one layout as NHWC
+ assert(layout == ir::Layout::NHWC);
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto tensor_info = _tensor_info_map.at(ind);
+
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+ // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+ // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class TensorBuilder
+{
+public:
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+ /**
+ * @brief Register tensor information to allocate on CPU backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
+
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+ const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "../Tensor.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer()
+ : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+ _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
+{
+ // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer() = default;
+
+void ConvolutionLayer::convFloat32()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::ruy::ConvParams op_params;
+ op_params.padding_type = getPaddingType(_paddingType);
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ nnfw::ruy::Conv &kernel = *_conv_kernel;
+ kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, const ir::PaddingType paddingType,
+ const uint32_t paddingLeft, const uint32_t paddingRight,
+ const uint32_t paddingTop, const uint32_t paddingBottom,
+ const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor,
+ const ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _paddingType = paddingType;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _dilationWidthFactor = dilationWidthFactor;
+ _dilationHeightFactor = dilationHeightFactor;
+ _activation = activation;
+ _output = output;
+ _external_context = external_context;
+}
+
+void ConvolutionLayer::run()
+{
+ prepare();
+
+ if (_input->is_dynamic() || _kernel->is_dynamic())
+ {
+ const auto ifm_shape = _input->getShape().asFeature(_input->layout());
+ const auto ofm_shape = _output->getShape().asFeature(_input->layout());
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto ker_shape = _kernel->getShape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ ir::Stride stride;
+ stride.vertical = _strideWidth;
+ stride.horizontal = _strideWidth;
+
+ ir::Padding param_padding;
+ param_padding.type = _paddingType;
+ param_padding.param.left = _paddingLeft;
+ param_padding.param.right = _paddingRight;
+ param_padding.param.top = _paddingTop;
+ param_padding.param.bottom = _paddingBottom;
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
+
+ _paddingLeft = padding.left;
+ _paddingRight = padding.right;
+ _paddingTop = padding.top;
+ _paddingBottom = padding.bottom;
+ }
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ convFloat32();
+ }
+ else
+ {
+ throw std::runtime_error{"Conv: unsupported data type"};
+ }
+}
+
+void ConvolutionLayer::prepare()
+{
+ if (_prepare)
+ return;
+
+ nnfw::ruy::Conv &kernel = *_conv_kernel;
+ if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
+ {
+ kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
+ _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+ }
+ _prepare = true;
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <ruy/operation/Conv.h>
+#include <exec/IFunction.h>
+#include <functional>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class ConvolutionLayer : public ::onert::exec::IFunction
+{
+public:
+ ConvolutionLayer();
+ ~ConvolutionLayer();
+
+public:
+ void convFloat32();
+
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType _paddingType,
+ const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor, const ir::Activation activation,
+ IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
+
+ void run() override;
+
+ void prepare() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _paddingType;
+ uint32_t _paddingLeft;
+ uint32_t _paddingTop;
+ uint32_t _paddingRight;
+ uint32_t _paddingBottom;
+
+ uint32_t _strideWidth;
+ uint32_t _strideHeight;
+ uint32_t _dilationWidthFactor;
+ uint32_t _dilationHeightFactor;
+
+ ir::Activation _activation;
+
+ std::unique_ptr<nnfw::ruy::Conv> _conv_kernel;
+
+ bool _prepare;
+
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "../Tensor.h"
+#include <ruy/operation/FullyConnected.h>
+#include <ruy/TensorUtils.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+ : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE), _external_context(nullptr)
+{
+ // DO NOTHING
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::fullyConnectedFloat32()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+ nnfw::ruy::FullyConnectedParams op_params;
+
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+ op_params.activation = convertActivationType(_activation);
+ op_params.lhs_cacheable = _weights->is_constant();
+ op_params.rhs_cacheable = _input->is_constant();
+
+ nnfw::ruy::FullyConnected(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+ _external_context->ruy_context());
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ ir::FullyConnectedWeightsFormat weights_format,
+ IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
+{
+ UNUSED_RELEASE(weights_format);
+ _input = input;
+ _weights = weights;
+ _bias = bias;
+ _activation = activation;
+ _output = output;
+ _external_context = external_context;
+}
+
+void FullyConnectedLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ fullyConnectedFloat32();
+ }
+ else
+ {
+ throw std::runtime_error{"FullyConnected: unsupported data type"};
+ }
+}
+
+void FullyConnectedLayer::prepare()
+{
+ if (_bias && _bias->is_constant())
+ {
+ const int bias_size = getTensorShape(_bias).FlatSize();
+ if (nnfw::ruy::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+ {
+ _bias = nullptr;
+ }
+ }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public ::onert::exec::IFunction
+{
+public:
+ FullyConnectedLayer();
+ ~FullyConnectedLayer();
+
+public:
+ void fullyConnectedFloat32();
+
+ void configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void run() override;
+
+ void prepare() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_weights;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::Activation _activation;
+
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
+{
+ switch (ir_padding_type)
+ {
+ case ir::PaddingType::EXPLICIT:
+ return nnfw::ruy::PaddingType::kNone;
+ case ir::PaddingType::SAME:
+ return nnfw::ruy::PaddingType::kSame;
+ case ir::PaddingType::VALID:
+ return nnfw::ruy::PaddingType::kValid;
+ default:
+ throw std::runtime_error("Wrong padding type.");
+ break;
+ }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <ruy/Shape.h>
+#include <ruy/Types.h>
+#include <iostream>
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+
+#include <limits>
+
+using OperandType = onert::ir::DataType;
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor)
+{
+ if (tensor == nullptr)
+ return nnfw::ruy::Shape();
+
+ const ir::Shape &shape = tensor->get_info().shape();
+
+ assert(tensor->layout() == ir::Layout::NHWC);
+
+ auto rank = shape.rank();
+ nnfw::ruy::Shape ret(rank);
+ auto data = ret.DimsData();
+ for (int i = 0; i < rank; ++i)
+ {
+ data[i] = shape.dim(i);
+ }
+ return ret;
+}
+
+inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation)
+{
+ switch (activation)
+ {
+ case ir::Activation::NONE:
+ return nnfw::ruy::FusedActivationFunctionType::kNone;
+ case ir::Activation::RELU:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu;
+ case ir::Activation::RELU1:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu1;
+ case ir::Activation::RELU6:
+ return nnfw::ruy::FusedActivationFunctionType::kRelu6;
+ case ir::Activation::TANH:
+ return nnfw::ruy::FusedActivationFunctionType::kTanh;
+ case ir::Activation::SIGMOID:
+ return nnfw::ruy::FusedActivationFunctionType::kSigmoid;
+ default:
+ throw std::runtime_error{"RUY backend: Cannot convert activation type"};
+ }
+}
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ std::cout << "Unsupported fused activation function." << std::endl;
+ }
+}
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+ Backend() : _config{std::make_shared<Config>()} {}
+
+ std::shared_ptr<IConfig> config() const override { return _config; }
+
+ std::unique_ptr<onert::backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
+ {
+ const auto &operands = graph.operands();
+ const auto &operations = graph.operations();
+ auto context = std::make_unique<BackendContext>(this, &graph);
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
+ context->tensor_builder = tb;
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+ context->external_context());
+ return context;
+ }
+
+private:
+ std::shared_ptr<IConfig> _config;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <util/ConfigSource.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(nullptr)
+ {
+ int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS);
+ if (num_threads < 1)
+ num_threads = kDefaultNumThreadpoolThreads; // default num of threads
+ _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads)));
+ }
+
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+
+ FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
--- /dev/null
+set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack)
+
+# Unsupported architecture
+nnfw_find_package(Xnnpack QUIET)
+if(NOT Xnnpack_FOUND)
+ return()
+endif(NOT Xnnpack_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK)
+
+set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+ add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD
+ COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_XNNPACK}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib)
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_I_OPTIMIZER_H__
-#define __ONERT_BACKEND_I_OPTIMIZER_H__
+#include "Config.h"
-namespace onert
-{
-namespace ir
-{
-class LoweredGraph;
-}
-} // namespace onert
+#include <xnnpack.h>
namespace onert
{
namespace backend
{
+namespace xnnpack
+{
-/**
- * @brief Class for backend optimizations. This is an optional class so not all backends must have
- * it.
- *
- */
-struct IOptimizer
+Config::~Config() { xnn_deinitialize(); }
+
+bool Config::initialize()
{
- virtual ~IOptimizer() = default;
- /**
- * @brief Run optimization
- *
- */
- virtual void optimize() = 0;
-};
+ xnn_status status = xnn_initialize(nullptr /* allocator */);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to initialize XNNPACK"};
+ }
+ return true;
+}
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
+} // namespace cpu
} // namespace backend
} // namespace onert
-
-#endif // __ONERT_BACKEND_I_OPTIMIZER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__
+#define __ONERT_BACKEND_XNNPACK_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Config : public IConfig
+{
+public:
+ virtual ~Config();
+
+public:
+ std::string id() override { return "xnnpack"; }
+ bool initialize() override;
+ ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ bool supportPermutation() override { return true; }
+ bool supportDynamicTensor() override { return true; }
+ bool supportFP16() override { return false; }
+
+ std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
-#define __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
+#include "ExternalContext.h"
+
+#include <cassert>
namespace onert
{
namespace backend
{
+namespace xnnpack
+{
-struct IExternalContext
+ExternalContext::ExternalContext(size_t num_threads)
+ : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
{
- virtual ~IExternalContext() = default;
- virtual void setMaxNumThreads(int) = 0;
-};
+ assert(_threadpool);
+}
+} // namespace xnnpack
} // namespace backend
} // namespace onert
-
-#endif // __ONERT_BACKEND_IEXTERNAL_CONTEXT__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+
+#include <memory>
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class ExternalContext
+{
+public:
+ ExternalContext(size_t num_threads);
+
+public:
+ pthreadpool *getThreadPool() { return _threadpool.get(); }
+
+private:
+ std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> _threadpool;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/DepthwiseConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ assert(!_return_fn_seq);
+ assert(_tensor_builder->dynamicTensorManager());
+ assert(_tensor_reg);
+
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+ // Prepare to handle dynamic tensors later
+ auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+ {
+ dyn_ctx->op_seq = &op_seq;
+ dyn_ctx->operations = &_operations_ctx;
+ dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+ dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+ _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+ }
+
+ _current_layout = op_seq.getLayout();
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ const auto &node = _operations_ctx.at(operation_idx);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+
+ for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ {
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+ if (portable_tensor)
+ {
+ assert(portable_tensor->layout() == ir::Layout::NHWC);
+ }
+
+ auto tensor = _tensor_reg->getNativeTensor(ind);
+ if (tensor)
+ {
+ tensor->increase_ref();
+ }
+ }
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ const auto stride = node.param().stride;
+ const auto activation = node.param().activation;
+ const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
+ auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context);
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+
+ const auto padding =
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ using ir::operation::DepthwiseConv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto dilation_width = node.param().dilation.width_factor;
+ const auto dilation_height = node.param().dilation.height_factor;
+ const auto param_padding = node.param().padding;
+ const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width,
+ ker_height, dilation_width, dilation_height);
+ const auto multiplier = node.param().multiplier;
+ const auto activation = node.param().activation;
+
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context);
+
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ multiplier, dilation_width, dilation_height, activation, ofm_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ const auto activation = node.param().activation;
+
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+ auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+ auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);
+
+ fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void visit(const ir::OpSequence &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+
+private:
+ const ir::Operands &_ctx;
+ const ir::Operations &_operations_ctx;
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+ ir::Layout _current_layout;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
+ _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ // XNNPACK backend supports only one layout as NHWC
+ assert(layout == ir::Layout::NHWC);
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto tensor_info = _tensor_info_map.at(ind);
+
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+ // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+ // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class TensorBuilder
+{
+public:
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+ /**
+ * @brief Register tensor information to allocate on XNNPACK backend
+ * @param[in] ind Operand index
+ * @param[in] info Operand information
+ * @param[in] layout Operand data layout
+ */
+ void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout backend_layout);
+
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
+
+ bool isRegistered(const ir::OperandIndex &) const;
+
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
+
+ IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+ const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+ std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+ ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+ _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0),
+ _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor,
+ const ir::Activation activation, IPortableTensor *output)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _padding_type = padding_type;
+ _padding_left = padding_left;
+ _padding_right = padding_right;
+ _padding_top = padding_top;
+ _padding_bottom = padding_bottom;
+ _stride_width = stride_width;
+ _stride_height = stride_height;
+ _dilation_width_factor = dilation_width_factor;
+ _dilation_height_factor = dilation_height_factor;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void ConvolutionLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 Convolution operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK Conv: unsupported data type"};
+ }
+}
+
+bool ConvolutionLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ // NHWC
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &kernel_shape = _kernel->getShape();
+ uint32_t kernel_height = kernel_shape.dim(1);
+ uint32_t kernel_width = kernel_shape.dim(2);
+ uint32_t output_channels = kernel_shape.dim(0);
+ uint32_t input_channels = kernel_shape.dim(3);
+ assert(static_cast<uint32_t>(_input->getShape().dim(3)) == input_channels);
+ assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+
+ enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+ _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+ _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+ 1 /* groups */, input_channels /* group_input_channels */,
+ output_channels /* group_output_channels */, input_channels /* input_channel_stride */,
+ output_channels /* output_channel_stride */,
+ reinterpret_cast<const float *>(_kernel->buffer()),
+ reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+ output_activation_max, 0, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 Convolution operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool ConvolutionLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t input_width = _input->getShape().dim(2);
+ uint32_t input_height = _input->getShape().dim(1);
+ uint32_t batch_size = _input->getShape().dim(0);
+ enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+ _kernel_op, batch_size, input_height, input_width,
+ reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 Convolution operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class ConvolutionLayer : public Layer
+{
+public:
+ ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t dilation_width_factor, const uint32_t dilation_height_factor,
+ const ir::Activation activation, IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _padding_type;
+ uint32_t _padding_left;
+ uint32_t _padding_top;
+ uint32_t _padding_right;
+ uint32_t _padding_bottom;
+
+ uint32_t _stride_width;
+ uint32_t _stride_height;
+ uint32_t _dilation_width_factor;
+ uint32_t _dilation_height_factor;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+DepthwiseConvolutionLayer::DepthwiseConvolutionLayer(
+ const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+ _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
+ _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void DepthwiseConvolutionLayer::configure(
+ const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+ ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
+ const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
+{
+ _input = input;
+ _kernel = kernel;
+ _bias = bias;
+ _padding_type = padding_type;
+ _padding_left = padding_left;
+ _padding_right = padding_right;
+ _padding_top = padding_top;
+ _padding_bottom = padding_bottom;
+ _stride_width = stride_width;
+ _stride_height = stride_height;
+ _multiplier = multiplier;
+ _dilation_width_factor = dilation_width_factor;
+ _dilation_height_factor = dilation_height_factor;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void DepthwiseConvolutionLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"};
+ }
+}
+
+bool DepthwiseConvolutionLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ // NHWC
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &kernel_shape = _kernel->getShape();
+ uint32_t kernel_height = kernel_shape.dim(1);
+ uint32_t kernel_width = kernel_shape.dim(2);
+ uint32_t output_channels = kernel_shape.dim(3);
+ uint32_t input_channels = _input->getShape().dim(3);
+ assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+ assert(output_channels == input_channels * _multiplier);
+
+ enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+ _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+ _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+ input_channels /* groups */, 1 /* group_input_channels */,
+ _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
+ output_channels /* output_channel_stride */,
+ reinterpret_cast<const float *>(_kernel->buffer()),
+ reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+ output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool DepthwiseConvolutionLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t input_width = _input->getShape().dim(2);
+ uint32_t input_height = _input->getShape().dim(1);
+ uint32_t batch_size = _input->getShape().dim(0);
+ enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+ _kernel_op, batch_size, input_height, input_width,
+ reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class DepthwiseConvolutionLayer : public Layer
+{
+public:
+ DepthwiseConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+ const IPortableTensor *bias, ir::PaddingType padding_type,
+ const uint32_t padding_left, const uint32_t padding_right,
+ const uint32_t padding_top, const uint32_t padding_bottom,
+ const uint32_t stride_width, const uint32_t stride_height,
+ const uint32_t multiplier, const uint32_t dilation_width_factor,
+ const uint32_t dilation_height_factor, const ir::Activation activation,
+ IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::PaddingType _padding_type;
+ uint32_t _padding_left;
+ uint32_t _padding_top;
+ uint32_t _padding_right;
+ uint32_t _padding_bottom;
+
+ uint32_t _stride_width;
+ uint32_t _stride_height;
+ uint32_t _multiplier;
+ uint32_t _dilation_width_factor;
+ uint32_t _dilation_height_factor;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
+ : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE)
+{
+ // DO NOTHING
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+ const IPortableTensor *bias, ir::Activation activation,
+ IPortableTensor *output)
+{
+ _input = input;
+ _kernel = weights;
+ _bias = bias;
+ _activation = activation;
+ _output = output;
+
+ // TODO Support not nhwc layer
+ assert(_input->layout() == ir::Layout::NHWC);
+
+ assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+ _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void FullyConnectedLayer::run()
+{
+ assert(_external_context && _external_context->getThreadPool());
+ if (!_setup)
+ {
+ _setup = setup();
+ assert(_setup);
+ }
+
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to run FP32 FullyConnected operator"};
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"XNNPACK FC: unsupported data type"};
+ }
+}
+
+bool FullyConnectedLayer::create()
+{
+ float output_activation_min = 0.f, output_activation_max = 0.f;
+ CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+ const auto &kernel_shape = _kernel->getShape();
+ assert(kernel_shape.rank() == 2);
+ uint32_t output_channels = kernel_shape.dim(0);
+ uint32_t input_channels = kernel_shape.dim(1);
+
+ const auto &input_shape = _input->getShape();
+ const auto &output_shape = _output->getShape();
+ uint32_t flag = 0;
+ if (input_shape.rank() != output_shape.rank())
+ {
+ flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
+ assert(input_shape.num_elements() % input_channels == 0);
+ }
+ else
+ {
+ assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
+ }
+
+ assert(_kernel && _kernel->buffer());
+ const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer());
+ const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
+
+ enum xnn_status status = xnn_create_fully_connected_nc_f32(
+ input_channels, output_channels, input_channels /* input stride */,
+ output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+ output_activation_max, flag, &_kernel_op);
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+ }
+ assert(_kernel_op != nullptr);
+ return true;
+}
+
+bool FullyConnectedLayer::setup()
+{
+ if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+ {
+ // it could be models's input or output
+ return false;
+ }
+
+ uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
+ enum xnn_status status = xnn_setup_fully_connected_nc_f32(
+ _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+ if (status != xnn_status_success)
+ {
+ throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+ }
+ return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public Layer
+{
+public:
+ FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+ void configure(const IPortableTensor *input, const IPortableTensor *_kernel,
+ const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+
+ void run() override;
+
+ bool create() override;
+ bool setup() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_kernel;
+ const IPortableTensor *_bias;
+ IPortableTensor *_output;
+
+ ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+
+#include <exec/IFunction.h>
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+#include "../ExternalContext.h"
+#include "../Tensor.h"
+
+#include <cassert>
+#include <memory>
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class Layer : public ::onert::exec::IFunction
+{
+public:
+ Layer(const std::shared_ptr<ExternalContext> external_context)
+ : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
+ {
+ // DO NOTHING
+ }
+
+ ~Layer()
+ {
+ if (_kernel_op)
+ xnn_delete_operator(_kernel_op);
+ }
+
+public:
+ void prepare() override
+ {
+ if (_create)
+ return;
+
+ _create = create();
+ assert(_create);
+
+ _setup = setup();
+ }
+ virtual bool create() = 0;
+ virtual bool setup() = 0;
+
+protected:
+ xnn_operator_t _kernel_op;
+ bool _create;
+ bool _setup;
+ const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+
+// duplicated from cpu/ops/OperationUtils.h
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+#include <ir/DataType.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+using OperandType = ir::DataType;
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+ if (activation == ir::Activation::RELU)
+ {
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else if (activation == ir::Activation::RELU6)
+ {
+ *activation_min = 0;
+ *activation_max = 6;
+ }
+ else if (activation == ir::Activation::RELU1)
+ {
+ *activation_min = -1;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::SIGMOID)
+ {
+ *activation_min = 0;
+ *activation_max = 1;
+ }
+ else if (activation == ir::Activation::NONE)
+ {
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ throw std::runtime_error{"Unsupported fused activation function"};
+ }
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+#include <util/logging.h>
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+ VERBOSE(onert_backend_create) << "'xnnpack' loaded\n";
+ return new onert::backend::xnnpack::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+ VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n";
+ delete backend;
+}
+}
#include <memory>
#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "exec/FunctionSequence.h"
namespace onert
{
{
class Backend;
-class IConstantInitializer;
-class IKernelGenerator;
-class ITensorRegister;
struct ITensorRegistry;
-struct ITensorBuilder;
-struct IOptimizer;
+
+using FunctionMap =
+ std::vector<std::pair<ir::OpSequenceIndex, std::unique_ptr<exec::FunctionSequence>>>;
class BackendContext
{
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
- std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
- std::shared_ptr<ITensorRegister> tensor_register = nullptr,
- std::shared_ptr<IOptimizer> optimizer = nullptr)
- : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
- tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
- kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr)
+ : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry}
{
}
const Backend *backend() const { return _backend; }
const ir::Graph *graph() const { return _graph; }
- const std::vector<OperationInfo> &operation_list() { return _operation_list; }
- const std::vector<ir::OperandIndex> &operand_list() { return _operand_list; }
+ const std::vector<OperationInfo> &operation_list() const { return _operation_list; }
+ const std::vector<ir::OperandIndex> &operand_list() const { return _operand_list; }
+
+ virtual ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &,
+ const ir::OpSequences &, const ir::LowerInfoMap &)
+ {
+ return nullptr;
+ }
+ virtual FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &,
+ const ir::OpSequences &)
+ {
+ return {};
+ }
private:
const Backend *_backend{nullptr};
public:
std::shared_ptr<ITensorRegistry> tensor_registry;
- std::shared_ptr<ITensorBuilder> tensor_builder;
- std::shared_ptr<IConstantInitializer> constant_initializer;
- std::shared_ptr<IKernelGenerator> kernel_gen;
- std::shared_ptr<ITensorRegister> tensor_register;
- std::shared_ptr<IOptimizer> optimizer;
};
using BackendContexts = std::unordered_map<const Backend *, std::unique_ptr<BackendContext>>;
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_BUILDER_H__
-#define __ONERT_BACKEND_ITENSOR_BUILDER_H__
-
-#include <map>
-
-#include "ir/Index.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operation.h"
-#include "ir/Layout.h"
-#include "ITensor.h"
-#include "ITensorManager.h"
-#include "ITensorRegistry.h"
-#include "IDynamicTensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-
-struct ITensorBuilder
-{
- using IterateFunction = std::function<void(const ir::OperandIndex &)>;
-
- virtual ~ITensorBuilder(void) = default;
-
- /**
- * @brief Register tensor information to allocate on backend
- *
- * @param ind Index
- * @param info Info
- * @param backend_layout Backend layout
- * @param as_const Whether this tensor is constant
- */
- virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) = 0;
-
- /**
- * @brief Check if the tensor has been registered with @c registerTensorInfo
- *
- * @return true If the tensor has been registered
- * @return false Otherwise
- */
- virtual bool isRegistered(const ir::OperandIndex &) const = 0;
-
-public: // methods for static tensor allocation
- /**
- * @brief Let the tensor builder know first use(start of lifetime) of a tensor
- * Must be called before calling @c prepare
- * Must be run up to once for each tensor before calling @c notifyLastUse
- * NOTE: Useful only for static models
- */
- virtual void notifyFirstUse(const ir::OperandIndex &) = 0;
- /**
- * @brief Let the tensor builder know last use(end of lifetime) of a tensor
- * Must be run up to once for each tensor after calling @c notifyFirstUse
- * NOTE: Useful only for static models
- */
- virtual void notifyLastUse(const ir::OperandIndex &) = 0;
- /**
- * @brief Prepare the tensors
- * Before calling this, all the tensors must be registered
- */
- virtual void prepare(void) = 0;
- /**
- * @brief Allocate the tensors
- * Before calling this, @c prepare must be called
- */
- virtual void allocate() = 0;
- /**
- * @brief Some actions after functions' @c IFunction::prepare method.
- * This is called right after each function's @c IFunction::prepare function has been
- * called.
- */
- virtual void postFunctionPrepare() = 0;
-
-public: // methods for dynamic tensor allocation
- /**
- * @brief Get dynamicTensorManager. If a backend does not support dynamic tensor, exception
- * will be thrown.
- *
- * @return pointer of IDynamicTensorManager object
- *
- * @note Since it is a pointer, its life time is from the cration of TensorBuilder
- * to the end of execution
- */
- virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_BUILDER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_REGISTER_H__
-#define __ONERT_BACKEND_ITENSOR_REGISTER_H__
-
-#include "ir/LowerInfoMap.h"
-#include "ITensorBuilder.h"
-#include "ir/Layout.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace backend
-{
-
-class ITensorRegister : public ir::OperationVisitor
-{
-public:
- virtual ~ITensorRegister() = default;
-
-public:
- void registerTensors(const ir::OpSequence &op_seq, const ir::LowerInfoMap *lower_info_map)
- {
- _current_op_seq_layout = op_seq.getLayout();
- _lower_info_map = lower_info_map;
- assert(_lower_info_map != nullptr);
- assert(tensor_builder().get() != nullptr);
- op_seq.accept(*this);
- }
-
-protected:
- virtual const ir::Operands &operands() const = 0;
- virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
-
-protected:
-#define OP(InternalName) \
- void visit(const ir::operation::InternalName &node) override \
- { \
- for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) \
- { \
- defaultRegisterTensorInfo(ind); \
- } \
- }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
- void defaultRegisterTensorInfo(const ir::OperandIndex &index) const
- {
- if (tensor_builder()->isRegistered(index))
- {
- return;
- }
-
- const auto &obj = operands().at(index);
- const auto frontend_layout = frontendLayout();
- const auto backend_layout = backendLayout(index);
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
- tensor_builder()->registerTensorInfo(index, backend_info, backend_layout);
- }
-
-protected:
- ir::Layout frontendLayout() const { return _current_op_seq_layout; }
- ir::Layout backendLayout(const ir::OperandIndex &index) const
- {
- assert(_lower_info_map != nullptr);
- const auto lower_info = _lower_info_map->operand.at(index).get();
- return lower_info->def_factors().getOnlyElement().layout();
- }
-
-private:
- ir::Layout _current_op_seq_layout;
- const ir::LowerInfoMap *_lower_info_map{nullptr};
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_REGISTER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
+#define __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
+
+#include <vector>
+
+#include "ir/Index.h"
+#include "ir/OpSequences.h"
+#include "ir/LowerInfoMap.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+// TODO Remove the template param BackendContext once unification of cpu backend context is done
+template <typename T_BackendContext>
+void planTensors(const T_BackendContext &ctx, const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+ auto graph = ctx.graph();
+ auto tensor_builder = ctx.tensor_builder;
+
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexMap<uint32_t> def_map;
+ ir::OperandIndexSequence constants;
+
+ auto model_io =
+ (graph->getInputs() + graph->getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ // Prepare scanning
+ for (auto ind : ctx.operand_list())
+ {
+ if (model_io.contains(ind))
+ continue;
+ const auto &obj = graph->operands().at(ind);
+ const auto &li = lower_info.operand.at(ind);
+ if (li->def_factors().getOnlyElement().backend() != ctx.backend())
+ continue;
+
+ // Ignore unused tensor
+ if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+ {
+ VERBOSE_F() << "Operand #" << ind.value() << " will not be used. no more process."
+ << std::endl;
+ return;
+ }
+
+ uses_map[ind] = obj.getUses().size();
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+ if (obj.isConstant())
+ constants.append(ind);
+
+ auto factor = li->def_factors().getOnlyElement();
+ if (!tensor_builder->isRegistered(ind))
+ {
+ // These tensors do not exist in any op_seq (No use and def)
+ const auto info = obj.info();
+ const auto backend_layout = factor.layout();
+ // TODO Change tensor info to have permuted shape
+ tensor_builder->registerTensorInfo(ind, info, backend_layout);
+ }
+ }
+
+ // Start scanning to do notify{First|Last}Use for each tensor
+
+ // If a tensor is a constant, increase the use of the tensor and allocate it first.
+ // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+ // deallocated last.
+ for (const auto &ind : constants)
+ {
+ uses_map[ind]++;
+ tensor_builder->notifyFirstUse(ind);
+ }
+
+ // At each operation,
+ // 1. Scan DEF of outputs. If the DEF, allocate it
+ // 2. Scan DEF of inputs. If variable tensor, allocate it
+ // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+ for (const auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ for (const auto &op_idx : op_seq.operations())
+ {
+ auto op_inputs = graph->operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED;
+ auto op_outputs = graph->operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED;
+
+ // Define outputs
+ for (const auto &ind : op_outputs)
+ {
+ if (model_io.contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(def_map.find(ind) != def_map.end());
+ if (def_map[ind])
+ {
+ def_map[ind] = 0;
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ // Scan variable tensors
+ // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+ // non-constant because of less memory usage by memory planning in here
+ for (const auto &ind : op_inputs)
+ {
+ if (model_io.contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ const auto &operand = graph->operands().at(ind);
+ if (operand.info().isVariable())
+ {
+ // The variable tensor with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+ lower_info.operand.at(ind)->use_factors().size() == 1);
+ assert(uses_map[ind] == 1 && def_map[ind] == 0);
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ for (const auto &ind : op_inputs)
+ {
+ if (model_io.contains(ind))
+ continue;
+ if (!tensor_builder->isRegistered(ind))
+ continue;
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0)
+ {
+ // plan for deallocation of static tensornode
+ tensor_builder->notifyLastUse(ind);
+
+ // plan for deallocation of dynamic tensor
+ auto dyn_tensor_manager = tensor_builder->dynamicTensorManager();
+ auto *tensor = ctx.tensor_registry->getITensor(ind);
+ assert(tensor);
+ dyn_tensor_manager->planDealloc(op_idx, tensor);
+ }
+ }
+ }
+ }
+
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+ {
+ tensor_builder->notifyLastUse(ind);
+ }
+ }
+
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+ assert(
+ std::all_of(def_map.begin(), def_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
+
+#include "TensorRegistry.h"
+
+#include "ConstantInitializerBase.h"
+#include <ir/Operands.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+class ConstantInitializer : public ConstantInitializerBase
+{
+public:
+ ConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+ void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
+
+ // TODO: For now the only cpu backend supports constant tensor to use data from external
+ // If the other backend supports (to do this,
+ // ExternalTensor should be abstract such as IExternal, maybe),
+ // this can be an interface of cpu_common::ConstantInitializerBase
+ void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
+
+private:
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
+
+private:
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
+#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
#include <unordered_map>
#include <functional>
-#include "ITensorBuilder.h"
#include "ir/Coordinates.h"
#include "ir/Layout.h"
#include "ir/Operand.h"
#include "ir/Operands.h"
#include "ir/OperationVisitor.h"
#include "ir/OpSequence.h"
+#include "backend/ITensorRegistry.h"
#include "util/logging.h"
+#include "backend/ITensorRegistry.h"
namespace
{
{
namespace backend
{
+namespace cpu_common
+{
-class IConstantInitializer : public ir::OperationVisitor
+class ConstantInitializerBase : public ir::OperationVisitor
{
public:
- virtual ~IConstantInitializer() = default;
+ virtual ~ConstantInitializerBase() = default;
public:
void run()
}
public:
- IConstantInitializer(const ir::Operands &operands)
- : _operands{operands}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+ ConstantInitializerBase(const ir::Operands &operands)
+ : _operands{operands}, _current_layout{ir::Layout::UNKNOWN}
{
}
public:
using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
- void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
+ void setLayout(ir::Layout layout) { _current_layout = layout; }
protected:
virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
protected:
const ir::Operands &_operands;
std::unordered_map<ir::OperandIndex, Initializer> _init_map;
- ir::Layout _current_op_seq_layout; // TODO Rename this to _current_layout
+ ir::Layout _current_layout;
};
+} // namespace cpu_common
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_IKERNEL_GENERATOR_H__
-#define __ONERT_BACKEND_IKERNEL_GENERATOR_H__
+#ifndef __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
+#define __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
#include <assert.h>
#include <memory>
#include <functional>
-#include "ITensorBuilder.h"
#include "ir/OperationVisitor.h"
#include "ir/OpSequence.h"
#include <memory>
#include "exec/FunctionSequence.h"
+#include "backend/ITensorRegistry.h"
namespace onert
{
namespace backend
{
+namespace cpu_common
+{
-class IKernelGenerator : public ir::OperationVisitor
+class KernelGeneratorBase : public ir::OperationVisitor
{
public:
- virtual ~IKernelGenerator() = default;
+ virtual ~KernelGeneratorBase() = default;
std::unique_ptr<exec::IFunction> releaseFunction()
{
std::unique_ptr<exec::FunctionSequence> _return_fn_seq; // TODO Extract this out
};
+} // namespace cpu_common
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_IKERNEL_GENERATOR_H__
+#endif // __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
#ifndef __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
-#include "MemoryManager.h"
-
#include "backend/IStaticTensorManager.h"
+#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/MemoryManager.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorManager.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "TensorRegistry.h"
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> ®,
- DynamicMemoryManager *dynamic_mem_mgr);
+ DynamicTensorManager *dynamic_tensor_manager);
virtual ~StaticTensorManager() = default;
- void allocateConsts(void);
void allocateNonconsts(void);
- void deallocateConsts(void);
void deallocateNonconsts(void);
void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
private:
- std::unique_ptr<DynamicMemoryManager> _const_mgr;
std::unique_ptr<MemoryManager> _nonconst_mgr;
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
- DynamicMemoryManager *_dynamic_mem_mgr;
+ DynamicTensorManager *_dynamic_tensor_manager;
};
} // namespace cpu_common
#include <backend/IPortableTensor.h>
#include <ir/OperandInfo.h>
+#include <ir/Data.h>
namespace onert
{
std::shared_ptr<Allocator> _allocator;
};
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ * instead of allocating and copying the data. ExternalTensor's data pointer points to
+ * an address of memory such as where memory is already allocated, or mmapped area.
+ * This is meaning that ExternalTensor can take all of types' ir::Data.
+ * To support this, assume below things no padding, always NHWC layout,
+ * constant tensor and not dynamic.
+ */
+class ExternalTensor : public Tensor
+{
+public:
+ ExternalTensor() = delete;
+ virtual ~ExternalTensor();
+
+public:
+ ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+ : Tensor(info, layout, nullptr)
+ {
+ assert(_layout == ir::Layout::NHWC);
+ assert(_info.isConstant());
+ assert(_info.isDynamic() == false);
+ }
+
+public:
+ /**
+ * @brief set Data to be shared from external so that this ExternalTensor will not be
+ * allocated on CPU backend
+ * @param[in] data data of Operand to be set
+ */
+ void setData(const std::shared_ptr<ir::Data> data)
+ {
+ assert(data != nullptr);
+ _data = data;
+ // Note. Some op such as cker::Conv could take buffer as nullptr.
+ // That's why _buffer also would be used
+ _buffer = const_cast<uint8_t *>(_data->base());
+ }
+
+public:
+ uint8_t *buffer() const override { return _buffer; }
+
+ bool is_constant() const override { return true; }
+ bool is_dynamic() const override { return false; }
+ void set_dynamic() override
+ {
+ throw std::runtime_error("This tensor does not support changing dynamic");
+ }
+
+ void setShape(const ir::Shape &) override
+ {
+ throw std::runtime_error("This tensor does not support changing shape");
+ }
+
+ void increase_ref() override { ++_num_references; }
+
+ void decrease_ref() override
+ {
+ assert(_data != nullptr);
+ assert(_num_references > 0);
+ --_num_references;
+ if (_num_references == 0)
+ {
+ _data.reset();
+ _buffer = nullptr;
+ }
+ }
+
+ /**
+ * @brief Reset reference count to zero and release data
+ */
+ void reset_ref() override
+ {
+ assert(_data != nullptr);
+ assert(_num_references > 0);
+ _num_references = 0;
+
+ _data.reset();
+ _buffer = nullptr;
+ }
+
+ int32_t num_references() override { return _num_references; }
+
+private:
+ std::shared_ptr<const ir::Data> _data;
+};
} // namespace cpu_common
} // namespace backend
} // namespace onert
public:
using backend_create_t = backend::Backend *(*)();
using backend_destroy_t = void (*)(backend::Backend *);
- using dlhandle_destroy_t = void (*)(void *);
+ using dlhandle_destroy_t = std::function<void(void *)>;
static BackendManager &get();
#include "ir/Graph.h"
#include "exec/IExecutor.h"
+#include "util/TracingCtx.h"
namespace onert
{
{
// GENERAL OPTIONS
std::vector<std::string> backend_list;
- bool is_primary_subgraph; // TODO Remove this out of this struct as it is not user-given option
// OPTIONS ONLY FOR DEBUGGING/PROFILING
std::string trace_filepath; //< File path to save trace records
bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
bool disable_compile; //< Run with Interpreter if true, try compilation otherwise
bool fp16_enable; //< Whether fp16 mode ON/OFF
+
+ util::TracingCtx *tracing_ctx; //< Profiling information
};
CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs);
/**
* @brief Construct a new Compiler object
* @param[in] subgs All subgraphs of a model
+ * @param[in] tracing_ctx Profiling information
*/
- Compiler(const std::shared_ptr<ir::Subgraphs> &subgs);
+ Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx);
public:
/**
const compiler::BackendResolver &backend_resolver);
void manipulateLowerInfo(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
- bool is_primary);
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info);
void dumpLowerInfo();
bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
ir::Layout layout, const compiler::BackendResolver &backend_resolver);
private:
// TODO Define visitors for operations. List them in alphabetic order.
- void visit(const ir::operation::ArgMax &op) override;
+ void visit(const ir::operation::ArgMinMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
void visit(const ir::operation::BCQFullyConnected &op) override;
void visit(const ir::operation::BCQGather &op) override;
public:
// TODO Define visitors for operations. List them in alphabetic order.
// Remove TODO when any op starting from the alphabet is added
- void visit(const ir::operation::ArgMax &op) override;
+ void visit(const ir::operation::ArgMinMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
void visit(const ir::operation::BCQFullyConnected &op) override;
void visit(const ir::operation::BCQGather &op) override;
* @file IExecutor.h
* @brief This file defines interface of Executor
*/
-#ifndef __ONERT_EXEC_I_EXECUTOR_H_
-#define __ONERT_EXEC_I_EXECUTOR_H_
+#ifndef __ONERT_EXEC_I_EXECUTOR_H__
+#define __ONERT_EXEC_I_EXECUTOR_H__
#include "ir/Graph.h"
#include "IFunction.h"
#include "IODescription.h"
+#include "ir/Index.h"
#include "ir/OperationIndexMap.h"
-#include "backend/IDynamicTensorManager.h"
+
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
namespace onert
{
+namespace backend
+{
+class IPortableTensor;
+namespace controlflow
+{
+class IOTensor;
+}
+}
+}
+namespace onert
+{
namespace exec
{
class IExecutionObserver;
virtual void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) = 0;
/**
- * @brief Start execution
+ * @brief Execute with user-given input/output description (for primary subgraph)
* @param[in] desc Input and output description
* @note This method should be thread-safe
*/
virtual void execute(const IODescription &desc) = 0;
+
+ /**
+ * @brief Execute with given input/output tensors
+ *
+ * For non-primary subgraphs, input and output tensors must be given.
+ *
+ * @param[in] inputs tensors that are passed as inputs
+ * @param[in] outputs tensors that are passed as outputs
+ */
+ virtual void execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs) = 0;
+
+ /**
+ * @brief Get output tensor objects
+ *
+ * @return Vector of @c IOTensor
+ */
+ virtual const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const = 0;
};
using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
} // namespace exec
} // namespace onert
-#endif // __ONERT_EXEC_I_EXECUTOR_H_
+#endif // __ONERT_EXEC_I_EXECUTOR_H__
INT64 = 8,
QUANT_INT8_ASYMM = 9,
QUANT_INT16_ASYMM = 10,
+ QUANT_INT8_SYMM_PER_CHANNEL = 11,
};
size_t sizeOfDataType(DataType data_type);
// This file has no ifdef guard intentionally
#include "ir/operation/AddN.h"
+#include "ir/operation/ArgMinMax.h"
+#include "ir/operation/BatchMatMul.h"
#include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BCQFullyConnected.h"
+#include "ir/operation/BCQGather.h"
#include "ir/operation/BinaryArithmetic.h"
#include "ir/operation/BroadcastTo.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Pool2D.h"
+#include "ir/operation/Comparison.h"
#include "ir/operation/Concat.h"
-#include "ir/operation/Reshape.h"
-#include "ir/operation/Fill.h"
-#include "ir/operation/FullyConnected.h"
-#include "ir/operation/Softmax.h"
-#include "ir/operation/Transpose.h"
-#include "ir/operation/Permute.h"
-#include "ir/operation/Reduce.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/ConvertFp16ToFp32.h"
+#include "ir/operation/ConvertFp32ToFp16.h"
+#include "ir/operation/Custom.h"
+#include "ir/operation/DepthToSpace.h"
#include "ir/operation/DepthwiseConv2D.h"
-#include "ir/operation/Slice.h"
-#include "ir/operation/StridedSlice.h"
-#include "ir/operation/Squeeze.h"
+#include "ir/operation/Einsum.h"
#include "ir/operation/ElementwiseActivation.h"
#include "ir/operation/ElementwiseBinary.h"
#include "ir/operation/ElementwiseUnary.h"
+#include "ir/operation/EmbeddingLookup.h"
#include "ir/operation/ExpandDims.h"
-#include "ir/operation/Comparison.h"
+#include "ir/operation/Fill.h"
+#include "ir/operation/FullyConnected.h"
+#include "ir/operation/FusedBatchNorm.h"
+#include "ir/operation/Gather.h"
+#include "ir/operation/HashtableLookup.h"
+#include "ir/operation/If.h"
+#include "ir/operation/InstanceNorm.h"
+#include "ir/operation/L2Normalization.h"
+#include "ir/operation/LocalResponseNormalization.h"
+#include "ir/operation/LogSoftmax.h"
#include "ir/operation/LSTM.h"
+#include "ir/operation/MatrixBandPart.h"
+#include "ir/operation/OneHot.h"
+#include "ir/operation/Pack.h"
+#include "ir/operation/Pad.h"
+#include "ir/operation/Permute.h"
+#include "ir/operation/Pool2D.h"
+#include "ir/operation/Pow.h"
+#include "ir/operation/PReLU.h"
+#include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
+#include "ir/operation/Reduce.h"
+#include "ir/operation/Reshape.h"
#include "ir/operation/ResizeBilinear.h"
#include "ir/operation/ResizeNearestNeighbor.h"
#include "ir/operation/Reverse.h"
#include "ir/operation/RNN.h"
+#include "ir/operation/Select.h"
+#include "ir/operation/Shape.h"
+#include "ir/operation/Slice.h"
+#include "ir/operation/Softmax.h"
#include "ir/operation/SpaceToBatchND.h"
#include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/EmbeddingLookup.h"
-#include "ir/operation/L2Normalization.h"
-#include "ir/operation/HashtableLookup.h"
-#include "ir/operation/InstanceNorm.h"
-#include "ir/operation/PReLU.h"
-#include "ir/operation/TransposeConv.h"
-#include "ir/operation/SquaredDifference.h"
-#include "ir/operation/TopKV2.h"
-#include "ir/operation/Gather.h"
-#include "ir/operation/ArgMax.h"
-#include "ir/operation/LocalResponseNormalization.h"
-#include "ir/operation/DepthToSpace.h"
-#include "ir/operation/Pack.h"
-#include "ir/operation/Select.h"
#include "ir/operation/Split.h"
#include "ir/operation/SplitV.h"
+#include "ir/operation/SquaredDifference.h"
+#include "ir/operation/Squeeze.h"
+#include "ir/operation/StatelessRandomUniform.h"
+#include "ir/operation/StridedSlice.h"
+#include "ir/operation/Tile.h"
+#include "ir/operation/TopKV2.h"
+#include "ir/operation/Transpose.h"
+#include "ir/operation/TransposeConv.h"
#include "ir/operation/Unpack.h"
-#include "ir/operation/Pad.h"
-#include "ir/operation/Custom.h"
-#include "ir/operation/Einsum.h"
-#include "ir/operation/OneHot.h"
-#include "ir/operation/Shape.h"
-#include "ir/operation/ConvertFp32ToFp16.h"
-#include "ir/operation/ConvertFp16ToFp32.h"
-#include "ir/operation/If.h"
#include "ir/operation/While.h"
-#include "ir/operation/Pow.h"
-#include "ir/operation/Tile.h"
-#include "ir/operation/Range.h"
-#include "ir/operation/Rank.h"
-#include "ir/operation/BCQFullyConnected.h"
-#include "ir/operation/BCQGather.h"
-#include "ir/operation/MatrixBandPart.h"
-#include "ir/operation/BatchMatMul.h"
-#include "ir/operation/FusedBatchNorm.h"
-#include "ir/operation/LogSoftmax.h"
-#include "ir/operation/StatelessRandomUniform.h"
// Internal Name
OP(AddN)
+OP(ArgMinMax)
+OP(BatchMatMul)
OP(BatchToSpaceND)
+OP(BCQFullyConnected)
+OP(BCQGather)
OP(BinaryArithmetic)
OP(BroadcastTo)
+OP(Comparison)
+OP(Concat)
OP(Conv2D)
+OP(ConvertFp16ToFp32)
+OP(ConvertFp32ToFp16)
+OP(Custom)
+OP(DepthToSpace)
OP(DepthwiseConv2D)
-OP(Pool2D)
-OP(Concat)
-OP(Fill)
-OP(FullyConnected)
-OP(Reduce)
-OP(Reshape)
-OP(Softmax)
-OP(Squeeze)
-OP(Slice)
-OP(StridedSlice)
-OP(Transpose)
+OP(Einsum)
OP(ElementwiseActivation)
OP(ElementwiseBinary)
OP(ElementwiseUnary)
+OP(EmbeddingLookup)
OP(ExpandDims)
-OP(Comparison)
+OP(Fill)
+OP(FullyConnected)
+OP(FusedBatchNorm)
+OP(Gather)
+OP(HashtableLookup)
+OP(If)
+OP(InstanceNorm)
+OP(L2Normalization)
+OP(LocalResponseNormalization)
+OP(LogSoftmax)
OP(LSTM)
+OP(MatrixBandPart)
+OP(OneHot)
+OP(Pack)
+OP(Pad)
+OP(Permute)
+OP(Pool2D)
+OP(Pow)
+OP(PReLU)
+OP(Range)
+OP(Rank)
+OP(Reduce)
+OP(Reshape)
OP(ResizeBilinear)
OP(ResizeNearestNeighbor)
OP(Reverse)
OP(RNN)
+OP(Select)
+OP(Shape)
+OP(Slice)
+OP(Softmax)
OP(SpaceToBatchND)
OP(SpaceToDepth)
-OP(EmbeddingLookup)
-OP(L2Normalization)
-OP(HashtableLookup)
-OP(InstanceNorm)
-OP(PReLU)
-OP(TransposeConv)
-OP(SquaredDifference)
-OP(TopKV2)
-OP(Gather)
-OP(ArgMax)
-OP(Einsum)
-OP(LocalResponseNormalization)
-OP(DepthToSpace)
-OP(Pack)
-OP(Select)
OP(Split)
OP(SplitV)
+OP(SquaredDifference)
+OP(Squeeze)
+OP(StatelessRandomUniform)
+OP(StridedSlice)
+OP(Tile)
+OP(TopKV2)
+OP(Transpose)
+OP(TransposeConv)
OP(Unpack)
-OP(Pad)
-OP(Custom)
-OP(Permute)
-OP(OneHot)
-OP(Shape)
-OP(ConvertFp32ToFp16)
-OP(ConvertFp16ToFp32)
-OP(If)
OP(While)
-OP(Pow)
-OP(Tile)
-OP(Range)
-OP(Rank)
-OP(BCQFullyConnected)
-OP(BCQGather)
-OP(MatrixBandPart)
-OP(BatchMatMul)
-OP(FusedBatchNorm)
-OP(LogSoftmax)
-OP(StatelessRandomUniform)
*
* @return count of Subgraphs
*/
- size_t count() { return _subgraphs.size(); }
+ size_t count() const { return _subgraphs.size(); }
/**
* @brief Return the primary subgraph
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_ARG_MAX_H__
-#define __ONERT_IR_OPERATION_ARG_MAX_H__
+#ifndef __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
+#define __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
#include "ir/Operation.h"
namespace operation
{
-class ArgMax : public Operation
+class ArgMinMax : public Operation
{
public:
enum Input
struct Param
{
DataType output_type;
+ bool is_arg_max = true;
};
public:
- ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m);
+ ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ArgMax; }
+ OpCode opcode() const final { return OpCode::ArgMinMax; }
public:
const Param ¶m() const { return _param; }
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_ARG_MAX_H__
+#endif // __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
-#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
#include "ir/Operation.h"
RSQRT,
SIN,
SQRT,
- SQURE,
+ SQUARE,
ZEROS_LIKE
};
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
public:
enum Input
{
- INPUT = 0,
+ SHAPE = 0,
VALUE,
};
// Name | Type | Default
CONFIG(GRAPH_DOT_DUMP , int , "0")
-CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
+CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
CONFIG(DISABLE_COMPILE , bool , "0")
CONFIG(TRACE_FILEPATH , std::string , "")
CONFIG(FP16_ENABLE , bool , "0")
CONFIG(RUY_THREADS , int , "-1")
+CONFIG(XNNPACK_THREADS , int , "-1")
CONFIG(USE_MMAPED_DATA , bool , "0")
// Auto-generate all operations
{
void config_source(std::unique_ptr<IConfigSource> &&source);
+void config_source_ext(std::unique_ptr<IConfigSource> &&source);
bool toBool(const std::string &val);
int toInt(const std::string &val);
// Define shape calculation for operations. List them in alphabetic order.
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank);
ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
const ir::operation::BatchMatMul::Param ¶m);
ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis);
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf);
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf);
ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape);
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_TRACING_CTX_H__
+#define __ONERT_UTIL_TRACING_CTX_H__
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/Subgraphs.h"
+
+#include <unordered_map>
+#include <mutex>
+
+namespace onert
+{
+namespace util
+{
+
+/**
+ * @brief Class to maintain information about profiling per session
+ */
+class TracingCtx
+{
+public:
+ /**
+ * @brief Create and store unique session id managed by this class
+ * Note that this constructor can be called by multiple sessions running in parallely.
+ * Use this constructor only when there is only one subgraph in a model.
+ */
+ TracingCtx(const ir::Graph *primary_subgraph)
+ {
+ decideSessionID();
+ _subgraph_indices.emplace(primary_subgraph, 0);
+ }
+
+ /**
+ * @brief Create and store unique session id managed by this class
+ * Note that this constructor can be called by multiple sessions running in parallely.
+ */
+ TracingCtx(const onert::ir::Subgraphs *subgraphs)
+ {
+ assert(subgraphs);
+
+ decideSessionID();
+
+ auto count = subgraphs->count();
+ for (size_t i = 0; i < count; i++)
+ _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i);
+ }
+
+ uint32_t getSessionId() const { return _session_id; }
+
+ /**
+ * @brief Set subgraph index of a graph
+ */
+ void setSubgraphIndex(const ir::Graph *g, uint32_t index) { _subgraph_indices.emplace(g, index); }
+
+ /**
+ * @brief Get subgraph index of a graph.
+ */
+ ir::SubgraphIndex getSubgraphIndex(const ir::Graph *g) const { return _subgraph_indices.at(g); }
+
+private:
+ void decideSessionID()
+ {
+ std::unique_lock<std::mutex> lock{_session_id_mutex};
+
+ static uint32_t next_session_id = 0;
+ _session_id = next_session_id++;
+ }
+
+private:
+ std::unordered_map<const ir::Graph *, ir::SubgraphIndex> _subgraph_indices;
+ uint32_t _session_id;
+ static std::mutex _session_id_mutex;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_TRACING_CTX_H__
if (::onert::util::logging::ctx.enabled()) \
std::cout << "[" << __func__ << "] "
+#define WHEN_LOG_ENABLED(METHOD) \
+ if (::onert::util::logging::ctx.enabled()) \
+ do \
+ { \
+ METHOD; \
+ } while (0)
+
#endif // __ONERT_UTIL_LOGGING_H__
#include "backend/BackendContext.h"
#include "ir/Operation.h"
-#include "backend/IConstantInitializer.h"
namespace onert
{
_operand_list = operand_list;
}
-void BackendContext::initConsts()
-{
- for (auto &op : _operation_list)
- {
- constant_initializer->setLayout(op.layout);
- _graph->operations().at(op.index).accept(*constant_initializer);
- }
-
- for (auto ind : _operand_list)
- {
- const auto &obj = _graph->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- }
-
- constant_initializer->run();
-}
-
} // namespace backend
} // namespace onert
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr,
context->external_context());
- context->tensor_register = nullptr;
- context->optimizer = nullptr;
return context;
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "KernelGenerator.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+void BackendContext::initConsts()
+{
+ for (auto &op : operation_list())
+ {
+ constant_initializer->setLayout(op.layout);
+ graph()->operations().at(op.index).accept(*constant_initializer);
+ }
+
+ for (auto ind : operand_list())
+ {
+ const auto &obj = graph()->operands().at(ind);
+ if (obj.isConstant() && !constant_initializer->exist(ind))
+ {
+ constant_initializer->registerDefaultInitializer(ind, obj);
+ }
+ }
+
+ constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info)
+{
+ auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+ ir::Remove::DUPLICATED;
+ for (auto index : operand_list())
+ {
+ if (model_io.contains(index))
+ continue;
+ const auto &obj = graph()->operands().at(index);
+ const auto frontend_layout = [&]() {
+ if (obj.getUses().size() == 0)
+ return ir::Layout::UNKNOWN;
+ auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+ for (auto &operation_info : operation_list())
+ {
+ if (operation_info.index == use_op_ind)
+ return operation_info.layout;
+ }
+ return ir::Layout::UNKNOWN;
+ }();
+ const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+ if (permute_factor.backend() != backend())
+ continue;
+ const auto backend_layout = permute_factor.layout();
+ ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+ obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+ tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+ }
+
+ // TODO Get compiler options from compiler, and use it rather than getting it from Env
+ if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+ {
+ cpu_common::planTensors(*this, order, op_seqs, lower_info);
+ }
+ else
+ {
+ // For the executors that does not have fixed linear execution order:
+ // To make tensors never be deallocated, this is a workaround to use static memory planner
+ for (auto ind : operand_list())
+ {
+ if (tensor_builder->isRegistered(ind))
+ tensor_builder->notifyFirstUse(ind);
+ }
+ }
+
+ tensor_builder->prepare();
+
+ return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs)
+{
+ FunctionMap ret;
+
+ for (auto op_seq_ind : order)
+ {
+ const auto &op_seq = op_seqs.at(op_seq_ind);
+ bool assigned = [&]() {
+ for (auto op_info : operation_list())
+ if (op_seq.exist(op_info.index))
+ return true;
+ return false;
+ }();
+ if (!assigned)
+ continue;
+ auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+ ret.emplace_back(op_seq_ind, std::move(fn_seq));
+ }
+
+ initConsts();
+
+ // NOTE For memory optimization, we want to free some operand data
+ for (auto ind : operand_list())
+ {
+ // TODO Remove const_cast
+ auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+ obj.releaseData();
+ }
+
+ for (auto &it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
#include "ExternalContext.h"
namespace onert
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
- std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
- std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
- std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
- std::shared_ptr<ITensorRegister> tensor_register = nullptr,
- std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
- constant_initializer, kernel_gen, tensor_register,
- optimizer),
- _external_context(std::make_shared<ExternalContext>())
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_registry),
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, _external_context(std::make_shared<ExternalContext>())
{
}
+ ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs,
+ const ir::LowerInfoMap &lower_info) override;
+
+ FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs) override;
+
std::shared_ptr<ExternalContext> external_context() { return _external_context; }
private:
+ void initConsts();
+ void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+ const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<ConstantInitializer> constant_initializer;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
// NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
// the thread pool is also created in duplicate
// TODO Create one ruy context for session
#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#include "TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
namespace onert
{
namespace controlflow
{
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
- {
- }
-
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
- std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
} // namespace controlflow
} // namespace backend
#ifndef __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
#define __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
-#include <backend/IExternalContext.h>
#include <util/ConfigSource.h>
#include <ruy/context.h>
{
// TODO Unify this with cpu::ExternalContext
-class ExternalContext : public IExternalContext
+class ExternalContext
{
public:
ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOTensor.h"
+
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
+ : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
+{
+ setUserTensor(nullptr, 0);
+}
+
+void IOTensor::setTensor(IPortableTensor *tensor)
+{
+ assert(tensor);
+ assert(tensor != this);
+ // TODO Handle when layout was changed
+ assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
+ _user_tensor.reset();
+ _tensor = tensor;
+}
+
+void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
+{
+ _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
+ _tensor = _user_tensor.get();
+}
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
+#define __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
+
+#include "backend/IPortableTensor.h"
+#include "UserTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+/**
+ * @brief Tensor object that indirects to the tensor it is pointing to.
+ *
+ * A model I/O tensor could be two types.
+ *
+ * 1. @c UserTensor, if it is the primary graph
+ * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
+ *
+ * To support these, this object indirects everything to the actual tensor pointer.
+ * Exceptionally if it is UserTensor, this class creates and manages it.
+ */
+class IOTensor : public IPortableTensor
+{
+public:
+ IOTensor(const ir::OperandInfo &info, ir::Layout layout);
+
+public:
+ void setTensor(IPortableTensor *tensor);
+ void setUserTensor(uint8_t *buffer, size_t size);
+ ir::OperandInfo orig_info() const { return _orig_info; }
+ ir::Layout orig_layout() const { return _orig_layout; }
+
+public:
+ uint8_t *buffer() const override { return _tensor->buffer(); }
+ size_t total_size() const override { return _tensor->total_size(); }
+ size_t dimension(size_t index) const override { return _tensor->dimension(index); }
+ size_t num_dimensions() const override { return _tensor->num_dimensions(); }
+ size_t calcOffset(const ir::Coordinates &coords) const override
+ {
+ return _tensor->calcOffset(coords);
+ }
+ ir::Layout layout() const override { return _tensor->layout(); }
+ ir::DataType data_type() const override { return _tensor->data_type(); }
+ float data_scale() const override { return _tensor->data_scale(); }
+ int32_t data_offset() const override { return _tensor->data_offset(); }
+ bool is_dynamic() const override { return _is_dynamic || (_tensor && _tensor->is_dynamic()); }
+ void set_dynamic() override { _is_dynamic = true; }
+ ir::Shape getShape() const override { return _tensor->getShape(); }
+ void setShape(const ir::Shape &shape) override
+ {
+ // Workaround for IPortableTensor holds _info as its member
+ _info.shape(shape);
+ _tensor->setShape(shape);
+ }
+ bool is_constant() const override { return _tensor->is_constant(); }
+ bool applyShape(const ir::Shape &shape) override
+ {
+ // Workaround for IPortableTensor holds _info as its member
+ _info.shape(shape);
+ return _tensor->applyShape(shape);
+ }
+
+private:
+ const ir::OperandInfo _orig_info;
+ const ir::Layout _orig_layout;
+ bool _is_dynamic{false};
+ IPortableTensor *_tensor{nullptr}; //< The actual tensor that is indirected
+ std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
namespace controlflow
{
-KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<ExternalContext> &external_context)
: _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
const auto then_subg_index = node.param().then_subg_index;
const auto else_subg_index = node.param().else_subg_index;
- std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::IPortableTensor *> input_tensors;
for (const auto input_index : node.getInputs())
{
- auto input_tensor = getTensor(input_index);
-
+ auto input_tensor = getPortableTensor(input_index);
input_tensors.emplace_back(input_tensor);
}
- std::vector<backend::ITensor *> output_tensors;
+ std::vector<backend::IPortableTensor *> output_tensors;
for (const auto output_index : node.getOutputs())
{
- auto output_tensor = getTensor(output_index);
+ auto output_tensor = getPortableTensor(output_index);
output_tensors.emplace_back(output_tensor);
}
const auto cond_tensor = input_tensors.front();
input_tensors.erase(input_tensors.begin());
auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>(
- cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, then_subg_index,
- else_subg_index, _executor_map, _external_context);
+ cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
+ _external_context);
_return_fn = std::move(fn);
}
// This op does not support input as a constant, because controlflow backend does not have
// TensorBuilder
- std::vector<backend::ITensor *> input_tensors;
+ std::vector<backend::IPortableTensor *> input_tensors;
for (const auto input_index : node.getInputs())
{
- auto input_tensor = getTensor(input_index);
-
+ auto input_tensor = getPortableTensor(input_index);
input_tensors.emplace_back(input_tensor);
}
- std::vector<backend::ITensor *> output_tensors;
+ std::vector<backend::IPortableTensor *> output_tensors;
for (const auto output_index : node.getOutputs())
{
- auto output_tensor = getTensor(output_index);
+ auto output_tensor = getPortableTensor(output_index);
output_tensors.emplace_back(output_tensor);
}
// WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
// creating executor recusively
auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>(
- input_tensors, output_tensors, node.getOutputs(), _graph, cond_subg_index, body_subg_index,
- _executor_map, _external_context);
+ input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
+ _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
_return_fn = std::move(fn);
}
backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
{
- backend::ITensor *ret = _tensor_registries.getITensor(index);
+ // get Tensor from all tensor registries (for Permute op)
+ auto ret = _tensor_registries.getITensor(index);
+ assert(ret != nullptr);
+ return ret;
+}
+
+backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
+{
+ auto ret = _tensor_reg->getPortableTensor(index);
assert(ret != nullptr);
return ret;
}
#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
-#include <backend/ITensorBuilder.h>
#include <exec/IExecutor.h>
#include "ExternalContext.h"
#include <ir/Graph.h>
#include "TensorBuilder.h"
#include "compiler/TensorRegistries.h"
+#include "backend/cpu_common/KernelGeneratorBase.h"
#include "TensorRegistry.h"
namespace onert
namespace controlflow
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
{
public:
- KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+ KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<ExternalContext> &external_context);
_executor_map = executor_map.get();
}
- using IKernelGenerator::visit;
-
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::If &) override;
void visit(const ir::operation::Permute &) override;
private:
backend::ITensor *getTensor(const ir::OperandIndex &index);
+ backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
private:
const ir::Graph &_graph;
- IDynamicTensorManager *_dyn_tensor_manager;
+ DynamicTensorManager *_dyn_tensor_manager;
std::shared_ptr<TensorRegistry> _tensor_reg;
compiler::TensorRegistries _tensor_registries;
exec::ExecutorMap *_executor_map;
{
using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
} // namespace controlflow
} // namespace backend
TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
: _tensor_reg{tensor_reg},
_dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
- _static_tensor_mgr{new cpu_common::StaticTensorManager(
- _tensor_reg->base_reg(), _dynamic_tensor_mgr->dynamic_mem_mgr().get())}
+ _static_tensor_mgr{
+ new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
{
/* empty */
}
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
-void TensorBuilder::prepare(void)
-{
- _static_tensor_mgr->allocateConsts();
- _static_tensor_mgr->allocateNonconsts();
-}
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
void TensorBuilder::allocate()
{
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-IDynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
+DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
{
return _dynamic_tensor_mgr.get();
}
#include <backend/cpu_common/TensorRegistry.h>
#include <backend/cpu_common/Tensor.h>
-#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
#include <unordered_map>
namespace controlflow
{
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
* @param[in] layout Operand data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
+ void prepare(void);
+ void allocate();
+ void postFunctionPrepare() { /* DO NOTHING */}
- IDynamicTensorManager *dynamicTensorManager(void) override;
+ DynamicTensorManager *dynamicTensorManager(void);
/**
* @brief Get tensor with a specific OperandIndex.
#include "backend/cpu_common/TensorRegistry.h"
#include "backend/ITensorRegistry.h"
#include "Tensor.h"
-#include "UserTensor.h"
+#include "IOTensor.h"
#include <assert.h>
namespace onert
* This class contains three types of tensors. Two native tensors(tensors that are managed by this
* backend) and the other is migrant tensor.
*
- * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
- * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
- * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg )
+ * - NativeIOTensor - @c IOTensor managed by this backend ( in @c _base_reg )
+ * - NOTE The tensor it actually points to can be from another backend
+ * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor - @c IPortableTensor managed by other backends
*
* @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
*
auto base_tensor = _base_reg->getITensor(ind);
if (base_tensor)
return base_tensor;
- return getNativeUserTensor(ind);
+ return getNativeIOTensor(ind);
}
ITensor *getNativeITensor(const ir::OperandIndex &ind) override
auto base_tensor = _base_reg->getNativeITensor(ind);
if (base_tensor)
return base_tensor;
- return getNativeUserTensor(ind);
+ return getNativeIOTensor(ind);
}
IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
auto base_tensor = _base_reg->getPortableTensor(ind);
if (base_tensor)
return base_tensor;
- return getNativeUserTensor(ind);
+ return getNativeIOTensor(ind);
}
IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
auto base_tensor = _base_reg->getNativeTensor(ind);
if (base_tensor)
return base_tensor;
- return getNativeUserTensor(ind);
+ return getNativeIOTensor(ind);
}
Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
return _base_reg->getNativeTensor(ind);
}
- UserTensor *getNativeUserTensor(const ir::OperandIndex &ind)
+ IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
{
- auto tensor = _native_user_tensors.find(ind);
- if (tensor != _native_user_tensors.end())
+ auto tensor = _native_io_tensors.find(ind);
+ if (tensor != _native_io_tensors.end())
return tensor->second.get();
return nullptr;
}
_base_reg->setNativeTensor(ind, std::move(tensor));
}
- void setNativeUserTensor(ir::OperandIndex ind, std::unique_ptr<UserTensor> &&tensor)
+ void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
{
assert(tensor);
assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _native_user_tensors[ind] = std::move(tensor);
+ _native_io_tensors[ind] = std::move(tensor);
}
- const ir::OperandIndexMap<std::unique_ptr<UserTensor>> &native_user_tensors()
+ const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
{
- return _native_user_tensors;
+ return _native_io_tensors;
}
std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
private:
std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
- ir::OperandIndexMap<std::unique_ptr<UserTensor>> _native_user_tensors;
+ ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
};
} // namespace controlflow
#include <backend/ITensor.h>
#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
#include "PermuteLayer.h"
namespace onert
namespace kernel
{
-IfLayer::IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors,
- const std::vector<backend::ITensor *> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
+ const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
exec::ExecutorMap *executor_map,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _output_indices{output_indices}, _graph{graph}, _then_subg_index{then_subg_index},
- _else_subg_index{else_subg_index}, _executor_map{executor_map},
- _external_context{external_context}
+ _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
+ _executor_map{executor_map}, _external_context{external_context}
{
// At this point, executor_map may not have executors of then subg and else subg
}
{
// Check condition
// // If true
- // // // Copy _input_tensors -> then subg's inputs
- // // // Run then subg
- // // // Copy outputs of then subg -> _output_tensors
+ // // // Set _input_tensors -> then-subg's inputs
+ // // // Set outputs of then-subg -> _output_tensors
+ // // // Run then-subg
// // Else
- // // // Copy _input_tensors -> else subg's inputs if false
- // // // Run else subg
- // // // Copy outputs of else subg -> _output_tensors
- auto getResultCond = [](backend::ITensor *tensor) -> bool {
+ // // // Set _input_tensors -> else-subg's inputs
+ // // // Set outputs of else-subg -> _output_tensors
+ // // // Run else-subg
+
+ auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
bool ret = false;
tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
return ret;
};
- exec::ExecutorBase *subg_exec = nullptr;
+ exec::IExecutor *subg_exec = nullptr;
bool cond_result = getResultCond(_cond_tensor);
if (cond_result)
{
VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
- subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_then_subg_index).get());
+ subg_exec = _executor_map->at(_then_subg_index).get();
}
else
{
VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
- subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_else_subg_index).get());
- }
-
- const auto &subg_graph = subg_exec->graph();
-
- std::vector<backend::ITensor *> src_tensors;
- std::vector<backend::ITensor *> dst_tensors;
- // Add tensors used in subgraph or contained in outputs of subgraph
- assert(subg_graph.getInputs().size() == _input_tensors.size());
- assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size());
- for (uint32_t i = 0; i < subg_graph.getInputs().size(); ++i)
- {
- const auto &subg_input_index = subg_graph.getInputs().at(i);
- const auto &subg_input = subg_graph.operands().at(subg_input_index);
- if (subg_input.getUses().size() > 0 || subg_graph.getOutputs().contains(subg_input_index))
- {
- src_tensors.emplace_back(_input_tensors.at(i));
- dst_tensors.emplace_back(subg_exec->getInputTensors().at(i));
- }
+ subg_exec = _executor_map->at(_else_subg_index).get();
}
- const auto permute_op_input_to_subg_input =
- std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _external_context);
-
- // Add tensors used as output of operation or contained in outputs of operation
- src_tensors.clear();
- dst_tensors.clear();
- assert(_output_indices.size() == subg_exec->getOutputTensors().size());
- assert(_output_indices.size() == _output_tensors.size());
- for (uint32_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- src_tensors.emplace_back(subg_exec->getOutputTensors().at(i));
- dst_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_subg_output_to_op_output =
- std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _external_context);
-
- // Remove copying of unused tensor
- permute_op_input_to_subg_input->prepare();
- permute_subg_output_to_op_output->prepare();
- // Copy & run
- subg_exec->execute(_input_tensors, permute_op_input_to_subg_input);
- permute_subg_output_to_op_output->run();
+ subg_exec->execute(_input_tensors, _output_tensors);
VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
<< std::endl;
}
#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-#include <backend/ITensor.h>
+#include <backend/IPortableTensor.h>
#include <exec/IExecutor.h>
#include "../ExternalContext.h"
class IfLayer : public ::onert::exec::IFunction
{
public:
- IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors,
- const std::vector<backend::ITensor *> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+ IfLayer(backend::IPortableTensor *cond_tensor,
+ const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
exec::ExecutorMap *executor_map,
const std::shared_ptr<ExternalContext> &external_context);
void run() override;
private:
- backend::ITensor *_cond_tensor;
- const std::vector<backend::ITensor *> _input_tensors;
- const std::vector<backend::ITensor *> _output_tensors;
- const ir::OperandIndexSequence &_output_indices;
- const ir::Graph &_graph;
+ backend::IPortableTensor *_cond_tensor;
+ const std::vector<backend::IPortableTensor *> _input_tensors;
+ const std::vector<backend::IPortableTensor *> _output_tensors;
const ir::SubgraphIndex _then_subg_index;
const ir::SubgraphIndex _else_subg_index;
exec::ExecutorMap *_executor_map;
#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-#include "backend/ITensorBuilder.h"
#include "exec/IPermuteFunction.h"
#include "exec/IExecutor.h"
#include "../ExternalContext.h"
#include "WhileLayer.h"
+#include <algorithm>
#include <backend/ITensor.h>
#include "exec/ExecutorBase.h"
#include <misc/polymorphic_downcast.h>
namespace kernel
{
-WhileLayer::WhileLayer(const std::vector<backend::ITensor *> input_tensors,
- const std::vector<backend::ITensor *> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index,
const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
+ cpu_common::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
- _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors},
- _output_tensors{output_tensors}, _executor_map{executor_map},
- _external_context{external_context}
+ _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
+ _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
{
// At this point, executor_map may not have executors of cond subg and body subg
}
// // Run cond subg
// If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
// "_dst_tensors"
- auto cond_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_cond_subg_index).get());
- auto body_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_body_subg_index).get());
-
- const auto &cond_graph = cond_exec->graph();
- const auto &body_graph = body_exec->graph();
-
- std::vector<backend::ITensor *> input_tensors;
- std::vector<backend::ITensor *> cond_input_tensors;
- std::vector<backend::ITensor *> body_input_tensors;
- std::vector<backend::ITensor *> body_output_tensors;
- std::vector<backend::ITensor *> output_tensors;
-
- // Add only used tensors in cond subgraph
- assert(cond_graph.getInputs().size() == _input_tensors.size());
- assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
- for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
- if (cond_input.getUses().size() > 0)
- {
- input_tensors.emplace_back(_input_tensors.at(i));
- cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
- }
- }
- const auto permute_op_input_to_cond_input =
- std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, _external_context);
-
- // Add only used tensors among outputs of while operation
- assert(_output_indices.size() == _input_tensors.size());
- assert(_output_indices.size() == _output_tensors.size());
- input_tensors.clear();
- output_tensors.clear();
- for (size_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- input_tensors.emplace_back(_input_tensors.at(i));
- output_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_op_input_to_op_output =
- std::make_shared<PermuteLayer>(input_tensors, output_tensors, _external_context);
-
- // Add all tensors with unused tensors in body subgraph because unused input tensors will be
- // copied output tensors in body subgraph
- assert(_input_tensors.size() == body_exec->getInputTensors().size());
- input_tensors = _input_tensors;
- body_input_tensors = body_exec->getInputTensors();
- const auto permute_op_input_to_body_input =
- std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, _external_context);
-
- // Add only used tensors in cond subgraph
- assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size());
- assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
- body_output_tensors.clear();
- cond_input_tensors.clear();
- for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
- if (cond_input.getUses().size() > 0)
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
- }
- }
- const auto permute_body_output_to_cond_input =
- std::make_shared<PermuteLayer>(body_output_tensors, cond_input_tensors, _external_context);
-
- // Add only used tensors in body subgraph
- assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size());
- assert(body_graph.getInputs().size() == body_exec->getInputTensors().size());
- body_output_tensors.clear();
- body_input_tensors.clear();
- for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i)
- {
- const auto &body_input_index = body_graph.getInputs().at(i);
- const auto &body_input = body_graph.operands().at(body_input_index);
- if (body_input.getUses().size() > 0 &&
- !body_exec->graph().getOutputs().contains(body_input_index))
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- body_input_tensors.emplace_back(body_exec->getInputTensors().at(i));
- }
- }
- const auto permute_body_output_to_body_input =
- std::make_shared<PermuteLayer>(body_output_tensors, body_input_tensors, _external_context);
-
- // Add only used tensors among outputs of while operation
- assert(_output_indices.size() == body_exec->getOutputTensors().size());
- assert(_output_indices.size() == _output_tensors.size());
- body_output_tensors.clear();
- output_tensors.clear();
- for (size_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- output_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_body_output_to_op_output =
- std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _external_context);
+ auto cond_exec = _executor_map->at(_cond_subg_index).get();
+ auto body_exec = _executor_map->at(_body_subg_index).get();
- // Remove copying of unused tensor
- permute_op_input_to_cond_input->prepare();
- permute_op_input_to_op_output->prepare();
- permute_op_input_to_body_input->prepare();
- permute_body_output_to_cond_input->prepare();
- permute_body_output_to_body_input->prepare();
- permute_body_output_to_op_output->prepare();
+ // Need a temp tensor to hold the cond subgraph output
+ assert(cond_exec->getOutputTensors().size() == 1);
+ auto cond_output_tensor = [&]() {
+ auto cond_output = cond_exec->getOutputTensors().at(0);
+ auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
+ _dyn_memory_manager);
+ tensor->set_dynamic();
+ tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+ return tensor;
+ }();
VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
- cond_exec->execute(_input_tensors, permute_op_input_to_cond_input);
+ cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
- assert(cond_exec->getOutputTensors().size() == 1);
- auto &cond_output_tensor = cond_exec->getOutputTensors().at(0);
auto getResultCond = [](backend::ITensor *tensor) -> bool {
bool ret = false;
tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
return ret;
};
+ std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
+ std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
+ // Copying body inputs to outputs when the loop body is never executed
+ if (!getResultCond(cond_output_tensor.get()))
+ {
+ PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
+ copy_body_inputs_to_op_outputs.run();
+ return;
+ }
+
+ // Need some temp tensors to hold the body subgraph output
+ std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
+ std::vector<IPortableTensor *> temp_outputs;
+ for (auto io_tensor : body_exec->getOutputTensors())
+ {
+ auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
+ _dyn_memory_manager);
+ tensor->set_dynamic();
+ tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+ temp_outputs.push_back(tensor.get());
+ temp_outputs_o.push_back(std::move(tensor));
+ }
+
+ std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
+ PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
+
const auto body_execute_with_op_inputs = [&]() {
VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
- body_exec->execute(_input_tensors, permute_op_input_to_body_input);
+ body_exec->execute(_input_tensors, temp_outputs);
VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
};
const auto body_execute_with_body_outputs = [&]() {
VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
- body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input);
+ body_exec->execute(_output_tensors, temp_outputs);
VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
};
std::function<void()> body_execute = body_execute_with_op_inputs;
const auto cond_execute = [&]() {
VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
- cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input);
+ cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
};
- auto permute_to_outputs_fn = permute_op_input_to_op_output;
// Loop while Cond subgraph's output is true
- while (getResultCond(cond_output_tensor))
+ while (getResultCond(cond_output_tensor.get()))
{
body_execute();
+ copy_body_outputs_to_op_outputs.run();
cond_execute();
body_execute = body_execute_with_body_outputs;
- permute_to_outputs_fn = permute_body_output_to_op_output;
}
- permute_to_outputs_fn->run();
+
+ // Clean-up the temp tensors
+ _dyn_memory_manager->deallocate(cond_output_tensor.get());
+ for (auto tensor : temp_outputs)
+ {
+ _dyn_memory_manager->deallocate(tensor);
+ }
}
} // namespace kernel
#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-#include <backend/ITensor.h>
+#include <backend/IPortableTensor.h>
#include <exec/IExecutor.h>
#include <exec/IFunction.h>
#include <ir/OperandIndexSequence.h>
#include <ir/Graph.h>
#include "../ExternalContext.h"
+#include "backend/cpu_common/MemoryManager.h"
+
namespace onert
{
namespace backend
class WhileLayer : public ::onert::exec::IFunction
{
public:
- WhileLayer(const std::vector<backend::ITensor *> input_tensors,
- const std::vector<backend::ITensor *> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+ WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
- exec::ExecutorMap *executor_map,
+ exec::ExecutorMap *executor_map, cpu_common::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context);
public:
private:
const ir::SubgraphIndex _cond_subg_index;
const ir::SubgraphIndex _body_subg_index;
- const ir::OperandIndexSequence &_output_indices;
- const ir::Graph &_graph;
- const std::vector<backend::ITensor *> _input_tensors;
- const std::vector<backend::ITensor *> _output_tensors;
+ const std::vector<backend::IPortableTensor *> _input_tensors;
+ const std::vector<backend::IPortableTensor *> _output_tensors;
exec::ExecutorMap *_executor_map;
+ cpu_common::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
const std::shared_ptr<ExternalContext> _external_context;
};
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/cpu_common/BackendContextHelpers.h"
* limitations under the License.
*/
-#include "ConstantInitializer.h"
-#include "Tensor.h"
+#include "backend/cpu_common/ConstantInitializer.h"
+#include "backend/cpu_common/Tensor.h"
namespace onert
{
namespace backend
{
-namespace cpu
+namespace cpu_common
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+ : ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
};
}
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerExternalInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerExternalInitializer(kernel_index, kernel_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto &weight_obj = _operands.at(weight_index);
- registerExternalInitializer(weight_index, weight_obj);
-
- const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- if (!bias_index.undefined())
- {
- const auto &bias_obj = _operands.at(bias_index);
- registerExternalInitializer(bias_index, bias_obj);
- }
-}
-
-} // namespace cpu
+} // namespace cpu_common
} // namespace backend
} // namespace onert
* limitations under the License.
*/
-#include "backend/IConstantInitializer.h"
+#include "backend/cpu_common/ConstantInitializerBase.h"
#include <Half.h>
{
namespace backend
{
+namespace cpu_common
+{
-void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
+void ConstantInitializerBase::registerCopyInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
{
// For only CONSTANTS
// TODO Add to check if tensor has been allocated
}
}
-void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
+void ConstantInitializerBase::registerPermuteInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
{
// For only CONSTANTS
// TODO Add to check if tensor has been allocated
switch (type)
{
case DataType::FLOAT32:
- _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
break;
case DataType::INT32:
- _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
break;
case DataType::UINT32:
- _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout);
break;
case DataType::BOOL8:
case DataType::QUANT_UINT8_ASYMM:
- _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout);
break;
case DataType::QUANT_INT8_SYMM:
case DataType::QUANT_INT8_ASYMM:
- _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout);
break;
case DataType::FLOAT16:
- _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout);
break;
case DataType::INT64:
- _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_op_seq_layout);
+ _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout);
break;
default:
throw std::runtime_error("Not supported, yet");
}
}
+} // namespace cpu_common
} // namespace backend
} // namespace onert
#include "backend/cpu_common/StaticTensorManager.h"
#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/Tensor.h"
#include <util/logging.h>
namespace onert
{
StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®,
- DynamicMemoryManager *dynamic_mem_mgr)
- : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
- _dynamic_mem_mgr{dynamic_mem_mgr}
+ DynamicTensorManager *dynamic_tensor_manager)
+ : _nonconst_mgr{new MemoryManager()}, _tensors{reg},
+ _dynamic_tensor_manager{dynamic_tensor_manager}
{
// DO NOTHING
}
-void StaticTensorManager::allocateConsts(void)
-{
- for (auto &pair : _tensors->native_tensors())
- {
- const auto &ind = pair.first;
- auto tensor = pair.second.get();
- if (_as_constants[ind])
- {
- auto mem_alloc = _const_mgr->allocate(_tensors->getITensor(ind), tensor->total_size());
- tensor->setBuffer(mem_alloc);
- auto buffer = mem_alloc->base();
- VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer)
- << "size : " << tensor->total_size() << std::endl;
- }
- }
-}
-
void StaticTensorManager::allocateNonconsts(void)
{
_nonconst_mgr->allocate();
auto *buffer = _nonconst_mgr->getBuffer(ind);
tensor->setBuffer(buffer);
- VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
+ VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
+ << "): " << static_cast<void *>(buffer) << std::endl;
}
}
}
-void StaticTensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
-
void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
- auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr);
- _tensors->setNativeTensor(ind, std::move(tensor));
+ if (as_const)
+ {
+ auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
+ _tensors->setNativeTensor(ind, std::move(tensor));
+ }
+ else
+ {
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
+ _dynamic_tensor_manager->dynamic_mem_mgr().get());
+ _tensors->setNativeTensor(ind, std::move(tensor));
+ }
_as_constants[ind] = as_const;
}
} // namespace cpu_common
} // namespace backend
} // namespace onert
+
+// ExternalTensor
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ExternalTensor::~ExternalTensor() {}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
return;
}
- // TODO Remove indentation
+ const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
+ void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+
+ if (handle == nullptr)
{
- const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
- void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+ VERBOSE(BackendManager) << "Failed to load backend '" << backend << "' - " << dlerror() << "\n";
+ return;
+ }
- if (handle == nullptr)
+ VERBOSE(BackendManager) << "Successfully loaded '" << backend << "'(" << backend_so << ")\n";
+
+ {
+ // load object creator function
+ auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
+ if (backend_create == nullptr)
{
- VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl;
+ // TODO replace `fprintf` with `VERBOSE`
+ fprintf(stderr, "BackendManager: unable to find function `onert_backend_create` : %s\n",
+ dlerror());
+ dlclose(handle);
return;
}
- VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n";
-
+ // load object creator function
+ auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
+ if (backend_destroy == nullptr)
{
- // load object creator function
- auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
- if (backend_create == nullptr)
- {
- fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n",
- dlerror());
- abort();
- }
-
- // load object creator function
- auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
- if (backend_destroy == nullptr)
- {
- fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n",
- dlerror());
- abort();
- }
-
- auto backend_object =
- std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
- bool initialized = backend_object->config()->initialize(); // Call initialize here?
- if (!initialized)
- {
- VERBOSE_F() << backend.c_str() << " backend initialization failed. Don't use this backend"
- << std::endl;
- dlclose(handle);
- return;
- }
- _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
+ // TODO replace `fprintf` with `VERBOSE`
+ fprintf(stderr, "BackendManager: unable to find `function onert_backend_destroy` : %s\n",
+ dlerror());
+ dlclose(handle);
+ return;
}
- // Save backend handle (avoid warning by handle lost without dlclose())
- auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }};
- _handle_map.emplace(backend, std::move(u_handle));
+ auto backend_object =
+ std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
+ bool initialized = backend_object->config()->initialize(); // Call initialize here?
+ if (!initialized)
+ {
+ VERBOSE(BackendManager) << backend.c_str()
+ << " backend initialization failed. Don't use this backend"
+ << std::endl;
+ dlclose(handle);
+ return;
+ }
+ _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
}
+
+ // Save backend handle (avoid warning by handle lost without dlclose())
+
+ // NOTE This is a workaround for clang-format3.9 (seems like it does not understand
+ // "by-copy capture with an initializer"
+ // clang-format off
+ auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{
+ handle, [id = backend, filename = backend_so](void *h) {
+ if (dlclose(h) == 0)
+ {
+ VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
+ }
+ else
+ {
+ VERBOSE(BackendManager)
+ << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
+ }
+ }};
+// clang-format on
+_handle_map.emplace(backend, std::move(u_handle));
}
backend::Backend *BackendManager::get(const std::string &key)
#include "ir/OperationDumper.h"
#include "misc/string_helpers.h"
+namespace
+{
+
+using namespace onert;
+
+std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
+{
+ std::unordered_map<ir::OpCode, std::string>::iterator it;
+ std::string opbackends;
+
+ for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
+ {
+ if (!opbackends.empty())
+ opbackends = opbackends + ", ";
+
+ auto opcode = it->first;
+ const std::string opname = ir::toString(opcode);
+ opbackends += opname + "=" + it->second;
+ }
+ return opbackends;
+}
+
+} // namespace
+
namespace onert
{
{
CompilerOptions options;
options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
- options.is_primary_subgraph = false;
options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE);
return options;
}
-Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs)
+Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx)
: _subgraphs{subgs}, _state{State::CREATED}
{
// Set default values for CompilerOptions
// All these default values should not be fetched from Env, when we stop supporting Android NN
// API.
_options = fetchCompilerOptionsFromGlobalConfig(*subgs);
+
+ _options.tracing_ctx = tracing_ctx;
}
void Compiler::enableToFp16() { _options.fp16_enable = true; }
{
// Set control flow backend for control flow operators
{
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] =
- backend::controlflow::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] =
- backend::controlflow::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] =
- backend::controlflow::Config::ID;
+ auto &cfid = backend::controlflow::Config::ID;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = cfid;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = cfid;
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = cfid;
}
// FIXME This is a workaround for bcq operations, should remove it
VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl;
VERBOSE(Compiler) << "op_seq_max_node : " << _options.op_seq_max_node << std::endl;
VERBOSE(Compiler) << "executor : " << _options.executor << std::endl;
- VERBOSE(Compiler) << "manual_scheduler_options : (Too many things to print)" << std::endl;
+ VERBOSE(Compiler) << "manual backend_for_all : "
+ << _options.manual_scheduler_options.backend_for_all << std::endl;
+ VERBOSE(Compiler) << "manual_scheduler_options : "
+ << getOpBackends(_options.manual_scheduler_options.opcode_to_backend)
+ << std::endl;
VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl;
VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl;
VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl;
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
_subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- _options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
_subgraphs.reset();
+ for (auto &pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+ onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
+ dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
+ }
+
// Shape inference.
{
const auto primary_subg_idx = ir::SubgraphIndex{0};
auto &lowered_subg = pair.second;
auto indexed_ranks = lowered_subg->indexed_ranks();
- _options.is_primary_subgraph = (subg_index == ir::SubgraphIndex{0});
-
- onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
- dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
-
- ir::OperationDumper dumper("START SUBGRAPH " + std::to_string(subg_index.value()));
+ ir::OperationDumper dumper("Executor generation of Subgraph " +
+ std::to_string(subg_index.value()));
lowered_subg->graph().operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
auto executor = std::unique_ptr<exec::IExecutor>{
#include "ExecutorFactory.h"
+#include <deque>
#include <functional>
#include "exec/ExecutionObservers.h"
#include "exec/LinearExecutor.h"
#include "compiler/ExecutionBuilder.h"
#include "exec/ExecTime.h"
#include "compiler/Linear.h"
-#include "compiler/TensorBuilders.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/IKernelGenerator.h"
-#include "backend/IOptimizer.h"
#include "backend/IPortableTensor.h"
-#include "backend/ITensorRegister.h"
#include "backend/controlflow/Config.h"
#include "backend/controlflow/KernelGenerator.h"
#include "backend/controlflow/UserTensor.h"
#include "backend/controlflow/TensorBuilder.h"
+#include "util/TracingCtx.h"
+
#include <memory>
namespace onert
std::shared_ptr<backend::IConfig> _config;
};
+void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
+ const ir::OperandIndexSequence &indices)
+{
+ // TODO Store controlflow backend in BackendContext
+ std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+ for (const auto &e : lowered_graph.backend_contexts())
+ {
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::controlflow::Config::ID)
+ {
+ cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ }
+ }
+ assert(cf_tensor_reg);
+
+ for (auto ind : indices)
+ {
+ const auto &operand = lowered_graph.graph().operands().at(ind);
+ auto tensor = std::make_unique<backend::controlflow::IOTensor>(
+ operand.info(),
+ ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
+ );
+
+ // Add tensor to controlflow TensorRegistry.
+ cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+ }
+}
+
} // namespace
} // namespace onert
}
}
-void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
-{
- for (const auto index : order)
- {
- const auto &op_seq = lowered_graph->op_seqs().at(index);
- const auto backend = lowered_graph->getLowerInfo(index)->backend();
- const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
-
- if (tensor_register)
- {
- // Custom registration
- tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo());
- }
- else
- {
- // Default registration
- for (const auto op_idx : op_seq)
- {
- const auto &op = lowered_graph->graph().operations().at(op_idx);
- for (const auto &index :
- (op.getInputs() | ir::Remove::UNDEFINED) + (op.getOutputs() | ir::Remove::UNDEFINED))
- {
- if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
- {
- const auto &operand_lower_info =
- lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
-
- // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
- // op.getOutputs() of permute (CPU) returns tensor A
- // but tensor A belongs to the backend of acl_cl.
- // So, we have to make this tensor NOT registered for CPU.
- if (operand_lower_info.backend() != backend)
- continue;
-
- const auto &obj = lowered_graph->graph().operands().at(index);
- const auto frontend_layout = op_seq.getLayout();
- const auto backend_layout = operand_lower_info.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(),
- obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
- }
- }
- }
- }
-}
-
-std::vector<backend::ITensor *>
-ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
- const ir::OperandIndexSequence &indices)
-{
- std::vector<backend::ITensor *> ret;
-
- // TODO Store controlflow backend in BackendContext
- std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
- std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
- for (const auto &e : lowered_graph.backend_contexts())
- {
- auto backend = e.first;
- auto &context = e.second;
- if (backend->config()->id() == backend::controlflow::Config::ID)
- {
- cf_tensor_builder =
- std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
- cf_tensor_reg =
- std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
- }
- }
- assert(cf_tensor_builder);
- assert(cf_tensor_reg);
-
- for (auto ind : indices)
- {
- const auto &operand = lowered_graph.graph().operands().at(ind);
- auto tensor = std::make_unique<backend::controlflow::UserTensor>(
- operand.info(),
- ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
- );
-
- // Add tensor to controlflow TensorRegistry.
- cf_tensor_reg->setNativeUserTensor(ind, std::move(tensor));
- auto *itensor = cf_tensor_reg->getITensor(ind);
- ret.push_back(itensor);
- }
- return ret;
-}
-
void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
{
TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
initializeBackendContext(lowered_graph.get());
- // linearize
- assert(!lowered_graph->graph().isBuildingPhase());
-
- /*************************************************
- * Backend dependent analysis & optimization phase
- *************************************************/
-
- for (auto &pair : backend_contexts)
- {
- auto &optimizer = pair.second->optimizer;
- if (optimizer)
- optimizer->optimize();
- }
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
- /**********************************************************
- * Backend dependent analysis & optimization phase finished
- **********************************************************/
+ assert(!lowered_graph->graph().isBuildingPhase());
- /***********************
- * Code generation phase
- ***********************/
+ initializeSubgraphIOTensors(
+ *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+ // linearize
auto order = Linear::linearize(*lowered_graph);
- runTensorRegistration(lowered_graph.get(), order);
-
- std::vector<backend::ITensor *> input_tensors;
- std::vector<backend::ITensor *> output_tensors;
- if (options.is_primary_subgraph)
- {
- input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
- output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
- }
-
Linear::dump(*lowered_graph, order);
- Linear::planTensors(*lowered_graph, order);
- TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
- TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
-
- for (auto &tensor_builder : tensor_builders)
+ for (auto &pair : backend_contexts)
{
- tensor_builder->prepare();
+ pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
}
prepareMigrantTensors(*lowered_graph);
- ExecutionBuilder builder;
-
- // Generate kernels
- lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
- const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
- auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
- // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
- auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
- if (cf_kernel_gen != nullptr)
+ // Give some runtime objects to controlflow KernelGenerator
+ for (auto &pair : backend_contexts)
+ {
+ auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
+ if (cf_context != nullptr)
{
+ auto cf_kernel_gen = cf_context->kernel_gen;
cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
- auto fn_seq = kernel_gen->generate(op_seq);
- if (options.he_profiling_mode)
- {
- fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
- }
- builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
- });
-
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocate();
}
+ ExecutionBuilder builder;
+
+ // Adjust the order of backends for the upcoming iteration
+ std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
for (auto &pair : backend_contexts)
{
- pair.second->initConsts();
+ // NOTE controlflow backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "controlflow")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
}
- lowered_graph->graph().operands().iterate(
- [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- auto code_map = builder.releaseCodeMap();
-
- for (auto &it : code_map)
+ // Generate kernels
+ for (auto &pair : ordered_contexts)
{
- auto op_seq_index = it.first;
- auto &fn_seq = it.second.fn_seq;
-
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
- });
+ auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+ for (auto &pair : codes)
+ {
+ auto &op_seq_ind = pair.first;
+ auto &fn_seq = pair.second;
+ auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
+ auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+ if (options.he_profiling_mode)
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+ }
}
- auto exec =
- new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(code_map), order};
+ auto code_map = builder.releaseCodeMap();
+
+ auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
+ order, options.tracing_ctx};
if (!options.trace_filepath.empty())
{
- std::unique_ptr<exec::IExecutionObserver> ctp =
- std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+ std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
+ options.trace_filepath, exec->graph(), options.tracing_ctx);
exec->addObserver(std::move(ctp));
}
initializeBackendContext(lowered_graph.get());
- auto order = Linear::linearize(*lowered_graph);
- runTensorRegistration(lowered_graph.get(), order);
-
- std::vector<backend::ITensor *> input_tensors;
- std::vector<backend::ITensor *> output_tensors;
- if (options.is_primary_subgraph)
- {
- input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
- output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
- }
-
- TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto &tensor_builder : tensor_builders)
- {
- lowered_graph->graph().operands().iterate(
- [&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (tensor_builder->isRegistered(ind))
- {
- tensor_builder->notifyFirstUse(ind);
- }
- });
- }
+ assert(!lowered_graph->graph().isBuildingPhase());
+
+ initializeSubgraphIOTensors(
+ *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
- for (auto &tensor_builder : tensor_builders)
+ // linearize
+ // This order is just for giving topological order info to the backens
+ // TODO When we pass a partial graph to a backend, we can remove this
+ auto order = Linear::linearize(*lowered_graph);
+ for (auto &pair : backend_contexts)
{
- tensor_builder->prepare();
+ pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
}
prepareMigrantTensors(*lowered_graph);
- ExecutionBuilder builder;
-
- // Generate kernels
- lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
- const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
- auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
- // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
- auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
- if (cf_kernel_gen != nullptr)
+ // Give some runtime objects to controlflow KernelGenerator
+ for (auto &pair : backend_contexts)
+ {
+ auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
+ if (cf_context != nullptr)
{
- assert(cf_kernel_gen != nullptr);
+ auto cf_kernel_gen = cf_context->kernel_gen;
cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
- auto fn_seq = kernel_gen->generate(op_seq);
- if (options.he_profiling_mode)
- {
- fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
- }
- builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
- });
-
- for (const auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocate();
}
+ ExecutionBuilder builder;
+
+ // Adjust the order of backends for the upcoming iteration
+ std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
for (auto &pair : backend_contexts)
{
- pair.second->initConsts();
+ // NOTE controlflow backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "controlflow")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
}
- lowered_graph->graph().operands().iterate(
- [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- auto code_map = builder.releaseCodeMap();
-
- for (auto &it : code_map)
+ // Generate kernels
+ for (auto &pair : ordered_contexts)
{
- auto op_seq_index = it.first;
- auto &fn_seq = it.second.fn_seq;
-
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
- });
+ auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+ for (auto &pair : codes)
+ {
+ auto &op_seq_ind = pair.first;
+ auto &fn_seq = pair.second;
+ auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
+ auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+ if (options.he_profiling_mode)
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+ }
}
+ auto code_map = builder.releaseCodeMap();
+
exec::ExecutorBase *exec = nullptr;
if (parallel)
{
- exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_regs, std::move(code_map)};
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
+ options.tracing_ctx};
}
else
{
- auto dataflow_exec = new exec::DataflowExecutor{
- std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, std::move(code_map)};
+ auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs,
+ std::move(code_map), options.tracing_ctx};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
if (!options.trace_filepath.empty())
{
- std::unique_ptr<exec::IExecutionObserver> ctp =
- std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+ std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
+ options.trace_filepath, exec->graph(), options.tracing_ctx);
exec->addObserver(std::move(ctp));
}
static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
- static std::vector<backend::ITensor *>
- initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
- const ir::OperandIndexSequence &indices);
static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph);
static exec::IExecutor *
createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
#include "Linear.h"
#include "backend/IConfig.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/ITensorRegister.h"
#include "backend/Backend.h"
#include "util/logging.h"
}
}
-void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
-{
- const auto &graph = lowered_graph.graph();
- ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map;
-
- ir::OperandIndexMap<uint32_t> uses_map;
- ir::OperandIndexMap<uint32_t> def_map;
- ir::OperandIndexSequence constants;
-
- // Prepare scanning
- graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- const auto lower_info = lowered_graph.getLowerInfo(ind);
- // TODO Remove if onert doesn't support anymore such as
- // GeneratedTests.reshape_quant8_weights_as_inputs
- if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
- !graph.getInputs().contains(ind))
- {
- VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
- return;
- }
-
- // Unused input of subgraph
- // TODO Register unused input as nullptr in tensor_builder
- if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
- graph.getInputs().contains(ind))
- {
- VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
- return;
- }
-
- uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
- bool is_const = obj.isConstant();
- if (is_const)
- {
- constants.append(ind);
- }
-
- auto factor = lower_info->def_factors().getOnlyElement();
- auto backend = factor.backend();
- auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder;
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any op_seq (No use and def)
- const auto info = obj.info();
- const auto backend_layout = factor.layout();
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, backend_layout);
- }
-
- tensor_builder_map[ind] = tensor_builder;
- });
-
- const auto io_tensors =
- (graph.getInputs() + graph.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
- // If a tensor is model output, increase the use of the tensor.
- // This aim is same to above one.
- for (const auto &ind : io_tensors)
- {
- uses_map[ind]++;
- }
-
- // Start scanning to do notify{First|Last}Use for each tensor
-
- // If a tensor is a constant, increase the use of the tensor.
- // It makes the tensor not be dealloced. It means these will be deallocated last.
- // And allocate constant operands first
- VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl;
- for (const auto &ind : constants)
- {
- uses_map[ind]++;
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
-
- // Allocate Model's inputs
- VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl;
- for (const auto &ind : graph.getInputs() | ir::Remove::DUPLICATED)
- {
- auto tensor_builder = tensor_builder_map[ind];
- if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs
- continue;
- tensor_builder->notifyFirstUse(ind);
- }
-
- // At each operation,
- // 1. Scan DEF of outputs. If the DEF, allocate it
- // 2. Scan DEF of inputs. If variable tensor, allocate it
- // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- VERBOSE(LINEAR) << "TENSORS" << std::endl;
- for (const auto op_seq_ind : order)
- {
- const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind);
- for (const auto &op_idx : op_seq.operations())
- {
- for (const auto &ind : graph.operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
- }
-
- // Scan variable tensors
- // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
- // non-constant because of less memory usage by memory planning in here
- for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- const auto &operand = graph.operands().at(ind);
- if (operand.info().isVariable())
- {
- // The variable tensor with buffer is not supported yet
- assert(operand.data() == nullptr);
- assert(operand.getUses().size() == 1 && !operand.getDef().valid());
- assert(lowered_graph.getLowerInfo(ind)->def_factors().size() == 1 &&
- lowered_graph.getLowerInfo(ind)->use_factors().size() == 1);
- assert(uses_map[ind] == 1 && def_map[ind] == 0);
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
- }
-
- for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder_map[ind]->notifyLastUse(ind);
-
- // plan for deallocation of dynamic tensor
- auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
- if (dyn_tensor_manager)
- {
- const auto *backend =
- lowered_graph.getLowerInfo(ind)->def_factors().getOnlyElement().backend();
- auto &tensor_registry = lowered_graph.backend_contexts().at(backend)->tensor_registry;
- auto *tensor = tensor_registry->getITensor(ind);
- assert(tensor);
- if (!io_tensors.contains(ind)) // I/O tensors cannot be deallocated
- dyn_tensor_manager->planDealloc(op_idx, tensor);
- }
- }
- }
- }
- }
-
- // Dispose and validate
- for (const auto &ind : io_tensors)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
-
- for (const auto &ind : constants)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
-
- assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
} // namespace compiler
} // namespace onert
#include "ir/OpSequences.h"
#include "ir/Index.h"
-#include "backend/ITensorBuilder.h"
#include "compiler/LoweredGraph.h"
namespace onert
static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
static void dump(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
- static void planTensors(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order);
};
} // namespace compiler
#include "compiler/BackendResolver.h"
#include "compiler/ManualScheduler.h"
#include "compiler/HEScheduler.h"
+#include "util/TracingCtx.h"
namespace onert
{
LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
+ // set tracing_ctx for copied graph
+ if (options.tracing_ctx)
+ {
+ auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph);
+ options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
+ }
+
bool linear_executor = (options.executor == "Linear");
// Build backend contexts
.run();
// Set LowerInfo for each operand from the operand::LowerInfo holder
- manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph);
+ manipulateLowerInfo(operands_lower_info);
dumpLowerInfo();
}
// Optimization passes
pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
- VERBOSE(OpSequences) << "Dump after permutation insertion" << std::endl;
+ VERBOSE(LoweredGraph) << "Dump after permutation insertion" << std::endl;
+ for (auto operand : _graph.getInputs())
+ VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
+ for (auto operand : _graph.getOutputs())
+ VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
dumpOpSequences(_op_seqs, _graph.operations());
// Graph verifications
}
void LoweredGraph::manipulateLowerInfo(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
- bool is_primary)
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info)
{
const auto controlflow_backend = BackendManager::get().getControlflow();
- // TODO Rather than handling primary graph specially,
- // let the permute inserted and remove it later
- if (is_primary)
+ // TODO Rather than using NHWC Get frontend layout of this node from IR
+ auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
+ for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
{
- // TODO Rather than using NHWC Get frontend layout of this node from IR
- auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
- for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(index);
- assert(lower_info->def_factors().empty());
- lower_info->addDefPermuteFactor(factor);
- }
- for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(index);
- lower_info->addUsePermuteFactor(factor);
- }
+ auto &&lower_info = operands_lower_info.at(index);
+ assert(lower_info->def_factors().empty());
+ lower_info->addDefPermuteFactor(factor);
}
- else
+ for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
{
- for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(index);
- if (!(lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0))
- {
- // In case of not that Graph's input is not used in any operation and not the graph's
- // output.
- // In other words, it is not unused input in Graph.
- lower_info->addDefPermuteFactor(*lower_info->use_factors().begin());
- }
- else
- {
- // In case of that an operand is Graph's input and not input or output of any operation
- lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
- controlflow_backend,
- ir::Layout::NHWC // TODO Get frontend layout of this node from IR
- });
- }
- }
+ auto &&lower_info = operands_lower_info.at(index);
+ lower_info->addUsePermuteFactor(factor);
}
for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
{
sstream << (shape.dim(i)) << " ";
}
sstream << "}" << std::endl;
- sstream << " - Def ir::Operations : " << def_ops << std::endl;
- sstream << " - Use ir::Operations : " << use_ops << std::endl;
+ sstream << " - Def Operations : " << def_ops << std::endl;
+ sstream << " - Use Operations : " << use_ops << std::endl;
+ sstream << " - Data : "
+ << (object.data() ? (std::to_string(object.data()->size()) + " bytes") : "N/A")
+ << std::endl;
sstream << " - Lower Info" << std::endl;
sstream << " - Def Backends : " << def_layouts << std::endl;
sstream << " - Use Backends : " << use_layouts << std::endl;
}
// Dump final assignment
- backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) {
- VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
- << backend.config()->id() << std::endl;
- });
+ WHEN_LOG_ENABLED(backend_resolver->iterate(
+ [&](const ir::OperationIndex &index, const backend::Backend &backend) {
+ VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
+ << backend.config()->id() << std::endl;
+ }));
return backend_resolver;
}
{
ShapeValidator::ShapeValidator(const ir::Graph &graph)
- : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+ : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
{
}
// creating Compiler
assert(_graph.subgraphs() == nullptr);
- _current_op_seq_layout = _graph.layout();
+ _current_layout = _graph.layout();
_graph.operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
+ if (node.getInputs().size() != 2)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2);
+ OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2));
+ OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2);
+ }
+
OP_REQUIRES(input_shape.C == output_shape.C);
}
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
const auto block_size = node.param().block_size;
OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
// The kernel has only IHWO layout on frontend
const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
- const auto frontend_layout = _current_op_seq_layout;
+ const auto frontend_layout = _current_layout;
const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
// TODO Remove _ctx field
const ir::Graph &_graph;
const ir::Operands &_ctx;
- ir::Layout _current_op_seq_layout;
+ ir::Layout _current_layout;
};
} // namespace compiler
}
}
-void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
+void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+ const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
const auto &input = _operands.at(input_idx);
- const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
const auto &axis = _operands.at(axis_idx);
// get mutable output operand
axis_value = axis_value < 0 ? axis_value + rank : axis_value;
// re-sizing output shape
- ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis_value, rank);
+ ir::Shape new_shape =
+ shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
output.info().shape(new_shape);
}
// even when axis is constant, output shape should be recalculated since user might call
// nnfw_set_input_tensorinfo(input, some_new_shape)
- auto axis_buf = reinterpret_cast<const int32_t *>(axis.data()->base());
- assert(axis_buf);
+ auto axis_type = axis.typeInfo().type();
+ assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
+
+ assert(axis.data()->base());
+ int32_t axis_value =
+ (axis_type == ir::DataType::INT32)
+ ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
+ : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
// re-sizing output shape
- ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_buf[0]);
+ ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value);
output.info().shape(new_shape);
}
void StaticShapeInferer::visit(const ir::operation::Fill &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::Fill::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)};
+ const auto &shape = _operands.at(shape_idx);
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (!input.isConstant())
+ if (!shape.isConstant())
{
output.info().setDynamic();
_return_has_dynamic_tensor = true;
return;
}
- assert(input.typeInfo().type() == ir::DataType::INT32);
+ const auto dims_type = shape.typeInfo().type();
+ assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
- auto input_buf = reinterpret_cast<const int32_t *>(input.data()->base());
- assert(input_buf);
+ auto dims_buf = shape.data()->base();
+ assert(dims_buf);
+
+ const auto &dims_shape = shape.info().shape();
+ auto new_shape = ((dims_type == ir::DataType::INT32)
+ ? shape_inference::inferFillShape<int32_t>(
+ dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+ : shape_inference::inferFillShape<int64_t>(
+ dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferFillShape(input.info().shape(), input_buf);
output.info().shape(new_shape);
}
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_TENSOR_BUILDERS_H__
-#define __ONERT_COMPILER_TENSOR_BUILDERS_H__
-
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
-#include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace compiler
-{
-
-class TensorBuilders
-{
-public:
- TensorBuilders() = default;
-
- TensorBuilders(const onert::backend::BackendContexts &backend_contexts, bool include_controlflow)
- {
- for (const auto &e : backend_contexts)
- {
- if (e.first->config()->id() == backend::controlflow::Config::ID)
- {
- _cf_tensor_builder = std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(
- e.second->tensor_builder);
- if (include_controlflow)
- _tensor_builders.insert(e.second->tensor_builder);
- }
- else
- {
- _tensor_builders.insert(e.second->tensor_builder);
- }
- }
- }
-
- std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator begin() const
- {
- return _tensor_builders.cbegin();
- }
- std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator end() const
- {
- return _tensor_builders.cend();
- }
-
- std::shared_ptr<backend::controlflow::TensorBuilder> getControlflowTensorBuilder() const
- {
- return _cf_tensor_builder;
- }
-
-private:
- std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
- std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_TENSOR_BUILDERS_H__
// Generate output operand and permute operation
auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
- // change model output if operand_index is model output index
+ // change model output if operand_index is model output index and the out operand is controlflow
+ // backend
auto &model_outputs = _graph.getOutputs();
- if (model_outputs.contains(operand_index))
+ const backend::Backend *cf_backend = compiler::BackendManager::get().getControlflow();
+ if (model_outputs.contains(operand_index) && factor.backend() == cf_backend)
{
model_outputs.replace(operand_index, out_operand_index);
}
const auto &node = _graph.operations().at(node_index);
VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl;
- VERBOSE_F() << " - Input (original) Operand : " << operand_index << std::endl;
- VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << std::endl;
+ VERBOSE_F() << " - Input (original) Operand : " << operand_index << "("
+ << input_factor.backend()->config()->id() << ")" << std::endl;
+ VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << "("
+ << factor.backend()->config()->id() << ")" << std::endl;
// OpSequence
{
}
DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
const compiler::TensorRegistries &tensor_regs,
- compiler::CodeMap &&code_map)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs},
+ compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx)
+ : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx},
_code_map{std::move(code_map)}
{
VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
}
assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
- _subject.notifyModelBegin(this);
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
while (!_ready_jobs.empty())
{
const backend::Backend *backend =
_lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend();
- _subject.notifyJobBegin(this, op_seq, backend);
+ _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
job->fn_seq()->initRunning();
job->run();
- _subject.notifyJobEnd(this, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
notify(job_index);
_finished_jobs[job_index] = std::move(job);
}
assert(noWaitingJobs());
- _subject.notifyModelEnd(this);
+ _subject.notifySubgraphEnd(profiling_subg_index);
// Reset input info for the next execution
_input_info = _initial_input_info;
#include <memory>
#include "exec/ExecutorBase.h"
#include "compiler/CodeMap.h"
+#include "util/TracingCtx.h"
namespace onert
{
* @param code_map OpSequence and its code map
*/
DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
- const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx);
void executeImpl() override;
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
+void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+ const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
const auto input = _tensor_registry->getITensor(input_idx);
- const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+ const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
const auto axis = _tensor_registry->getITensor(axis_idx);
auto output_ind = op.getOutputs().at(0);
const auto rank = input_shape.rank();
axis_value = axis_value < 0 ? axis_value + rank : axis_value;
- ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis_value, rank);
+ ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank);
output->applyShape(new_shape);
assert(output->buffer() != nullptr);
auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS);
auto axis = _tensor_registry->getITensor(axis_ind);
- auto axis_buf = reinterpret_cast<const int32_t *>(axis->buffer());
- assert(axis_buf);
+ auto axis_type = axis->data_type();
+ assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
- auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
+ assert(axis->buffer());
+ int32_t axis_value =
+ (axis_type == ir::DataType::INT32)
+ ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
+ : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
+
+ auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value);
output->applyShape(output_shape);
assert(output->buffer() != nullptr);
// check if output is not dynamic
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT);
- auto input = _tensor_registry->getITensor(input_ind);
- ir::Shape input_shape = input->getShape();
+ auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE);
+ auto shape = _tensor_registry->getITensor(shape_ind);
- if ((!input->is_dynamic()) && (!output->is_dynamic()))
+ if ((!shape->is_dynamic()) && (!output->is_dynamic()))
return;
- assert(input->data_type() == ir::DataType::INT32);
+ const auto dims_type = shape->data_type();
+ assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
- auto input_buf = reinterpret_cast<const int32_t *>(input->buffer());
- assert(input_buf);
+ auto dims_buf = shape->buffer();
+ assert(dims_buf);
- auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
+ const auto &dims_shape = shape->getShape();
+ auto output_shape = ((dims_type == ir::DataType::INT32)
+ ? shape_inference::inferFillShape<int32_t>(
+ dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+ : shape_inference::inferFillShape<int64_t>(
+ dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
output->applyShape(output_shape);
assert(output->buffer() != nullptr);
/**
* @brief Update metrics file with new data.
*/
- void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); }
+ void storeOperationsExecTime() const { _json.storeOperationsExecTime(); }
static const int64_t NOT_FOUND = -1;
private:
_observers.emplace_back(std::move(observer));
}
-void ExecutionObservee::notifyModelBegin(IExecutor *executor)
+void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
{
for (auto &o : _observers)
{
- o->handleBegin(executor);
+ o->handleSubgraphBegin(ind);
}
}
-void ExecutionObservee::notifyModelEnd(IExecutor *executor)
+void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
{
for (auto &o : _observers)
{
- o->handleEnd(executor);
+ o->handleSubgraphEnd(ind);
}
}
-void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index,
+ const ir::OpSequence *op_seq,
const backend::Backend *backend)
{
for (auto &o : _observers)
{
- o->handleBegin(executor, op_seq, backend);
+ o->handleJobBegin(executor, index, op_seq, backend);
}
}
-void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index,
+ const ir::OpSequence *op_seq, const backend::Backend *backend)
{
for (auto &o : _observers)
{
- o->handleEnd(executor, op_seq, backend);
+ o->handleJobEnd(executor, index, op_seq, backend);
}
}
#include <list>
#include "exec/ExecutionObservers.h"
+#include "ir/Index.h"
namespace onert
{
* @param observer Observer to be added
*/
void add(std::unique_ptr<IExecutionObserver> observer);
- void notifyModelBegin(IExecutor *executor);
- void notifyModelEnd(IExecutor *executor);
- void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+ void notifySubgraphBegin(ir::SubgraphIndex ind);
+ void notifySubgraphEnd(ir::SubgraphIndex ind);
+ void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
const backend::Backend *backend);
- void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+ void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
const backend::Backend *backend);
private:
#include "exec/ExecutionObservers.h"
#include <string>
+#include <sstream>
#include "util/logging.h"
#include "exec/IExecutor.h"
#include "misc/polymorphic_downcast.h"
#include "ir/OpSequence.h"
#include "util/EventWriter.h"
+#include "util/Utils.h"
+
+namespace
+{
+
+void setUserData(const onert::ir::Graph &g, const onert::ir::OpSequence *op_seq,
+ decltype(EventCollector::Event::userData) &data)
+{
+ if (op_seq->size() == 0)
+ return;
+
+ // From a tensor of shape [a, b, c], this will return a string "shape(a b c)".
+ // String like "[1, 2, 3]" looks better but this will be considered as a list in Json
+ // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult
+ auto build_shape_str = [&](onert::ir::OperandIndex operand_idx) {
+ std::string shape_str;
+ auto &shape = g.operands().at(operand_idx).info().shape();
+ for (int i = 0; i < shape.rank(); i++)
+ {
+ if (i == 0)
+ shape_str = "shape(" + std::to_string(shape.dim(i));
+ else
+ shape_str += " " + std::to_string(shape.dim(i));
+ }
+ shape_str += ")";
+
+ return shape_str;
+ };
+
+ const auto &first_op_idx = op_seq->operations().at(0);
+ const auto &first_op_node = g.operations().at(first_op_idx);
+
+ auto &inputs = first_op_node.getInputs();
+ auto size = inputs.size();
+ for (size_t i = 0; i < size; i++)
+ {
+ auto operand_idx = inputs.at(i);
+ if (operand_idx.undefined())
+ continue;
+
+ std::string key("input_shape_" + std::to_string(i));
+ std::string value = build_shape_str(operand_idx);
+ data.emplace_back(std::make_pair(key, value));
+ }
+
+ // add other userData as needed
+}
+
+} // namespace
namespace onert
{
namespace exec
{
-void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *,
- const onert::backend::Backend *backend)
+void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex,
+ const ir::OpSequence *, const onert::backend::Backend *backend)
{
_timer = backend->config()->timer();
if (_timer == nullptr)
_timer->handleBegin();
}
-void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, const ir::OpSequence *op_seq,
+ const backend::Backend *backend)
{
_timer->handleEnd();
const auto timer_res = _timer->getTime();
}
};
-ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph)
- : _base_filepath(filepath), _recorder{}, _collector{&_recorder}, _graph{graph}
+TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph,
+ const util::TracingCtx *tracing_ctx)
+ : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
+ _tracing_ctx{tracing_ctx}
{
+ // TODO Remove below after using _tracing_ctx
+ UNUSED_RELEASE(_tracing_ctx);
+
+ _event_writer = EventWriter::get(filepath);
+ _event_writer->startToUse();
}
-ChromeTracingObserver::~ChromeTracingObserver()
+TracingObserver::~TracingObserver()
{
try
{
- EventWriter{_recorder}.writeToFiles(_base_filepath);
+ _event_writer->readyToFlush(std::move(_recorder));
}
catch (const std::exception &e)
{
- std::cerr << "E: Fail to record event in ChromeTracingObserver: " << e.what() << std::endl;
+ std::cerr << "E: Fail to record event in TracingObserver: " << e.what() << std::endl;
}
}
-void ChromeTracingObserver::handleBegin(IExecutor *)
+void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind)
{
+ // TODO Write subg_ind into profling result
+ UNUSED_RELEASE(subg_ind);
_collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
}
-void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind,
+ const ir::OpSequence *op_seq, const backend::Backend *backend)
{
+ // TODO Write subg_ind into profling result
+ UNUSED_RELEASE(subg_ind);
+
std::string backend_id = backend->config()->id();
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
- opSequenceTag(op_seq, _graph.operations())});
+
+ auto ev = EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
+ opSequenceTag(op_seq, _graph.operations())};
+ // add shape of inputs
+ setUserData(_graph, op_seq, ev.userData);
+
+ _collector.onEvent(ev);
}
-void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind,
+ const ir::OpSequence *op_seq, const backend::Backend *backend)
{
+ // TODO Write subg_ind into profling result
+ UNUSED_RELEASE(subg_ind);
+
std::string backend_id = backend->config()->id();
_collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id,
opSequenceTag(op_seq, _graph.operations())});
}
-void ChromeTracingObserver::handleEnd(IExecutor *)
+void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind)
{
+ // TODO Write subg_ind into profling result
+ UNUSED_RELEASE(subg_ind);
+
_collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
}
-std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
- const ir::Operations &operations)
+std::string TracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
+ const ir::Operations &operations)
{
if (op_seq->size() == 0)
return "Empty OpSequence";
#define __ONERT_EXEC_OBSREVERS_H__
#include "exec/IFunction.h"
+#include "ir/Index.h"
#include "ir/OpSequence.h"
#include "ExecTime.h"
#include "util/ITimer.h"
#include "exec/IExecutor.h"
#include "util/EventCollector.h"
#include "util/EventRecorder.h"
+#include "util/EventWriter.h"
+#include "util/TracingCtx.h"
+#include "util/EventWriter.h"
namespace onert
{
{
public:
/// @brief Invoked just before model (not individual operation) execution begins
- virtual void handleBegin(IExecutor *) { return; }
+ virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; }
- virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
- virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
+ virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) = 0;
+ virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) = 0;
/// @brief Invoked just after model (not individual operation) execution ends
- virtual void handleEnd(IExecutor *) { return; }
+ virtual void handleSubgraphEnd(ir::SubgraphIndex) { return; }
virtual ~IExecutionObserver() = default;
};
: _et(std::move(et)), _graph(graph)
{
}
- void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) override;
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) override;
- void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); }
+ void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); }
private:
std::unique_ptr<util::ITimer> _timer;
const ir::Graph &_graph;
};
-class ChromeTracingObserver : public IExecutionObserver
+class TracingObserver : public IExecutionObserver
{
public:
- ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph);
- ~ChromeTracingObserver();
- void handleBegin(IExecutor *) override;
- void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *) override;
+ TracingObserver(const std::string &filepath, const ir::Graph &graph,
+ const util::TracingCtx *tracing_ctx);
+ ~TracingObserver();
+ void handleSubgraphBegin(ir::SubgraphIndex) override;
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) override;
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+ const backend::Backend *) override;
+ void handleSubgraphEnd(ir::SubgraphIndex) override;
private:
static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations);
private:
- const std::string &_base_filepath;
- EventRecorder _recorder;
+ std::unique_ptr<EventRecorder> _recorder;
EventCollector _collector;
const ir::Graph &_graph;
+ EventWriter *_event_writer;
+ const util::TracingCtx *_tracing_ctx;
};
} // namespace exec
*/
#include "ExecutorBase.h"
+#include "ShapeConverter.h"
-#include "backend/ITensor.h"
#include "backend/controlflow/UserTensor.h"
-#include "backend/cpu_common/Tensor.h"
#include "util/logging.h"
+#include "misc/polymorphic_downcast.h"
namespace onert
{
{
ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
- const compiler::TensorRegistries &tensor_regs)
- : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
- _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
+ const compiler::TensorRegistries &tensor_regs,
+ const util::TracingCtx *tracing_ctx)
+ : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex(),
+ _tracing_ctx(tracing_ctx)
{
- // TODO Fix the way of knowing whether it is primary or not
- bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
- if (!primary_executor)
- {
- auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
- std::vector<backend::ITensor *> list;
- for (auto ind : ind_seq)
- {
- backend::ITensor *tensor = tensor_regs.getITensor(ind);
- assert(tensor != nullptr);
- list.push_back(tensor);
- }
- return list;
- };
- auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
- std::vector<backend::ITensor *> list;
- for (auto ind : ind_seq)
- {
- backend::ITensor *tensor = tensor_regs.getITensor(ind);
- assert(tensor != nullptr);
- list.push_back(tensor);
- }
- return list;
- };
- _input_tensors = build_input_tensor_list(_graph.getInputs());
- _output_tensors = build_output_tensor_list(_graph.getOutputs());
- }
+ auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
+ assert(tensors.empty());
+ for (auto ind : ind_seq)
+ {
+ backend::ITensor *tensor = tensor_regs.getITensor(ind);
+ assert(tensor != nullptr);
+ auto io_tensor = nnfw::misc::polymorphic_downcast<backend::controlflow::IOTensor *>(tensor);
+ tensors.push_back(io_tensor);
+ }
+ };
+ build_tensor_list(_graph.getInputs(), _input_tensors);
+ build_tensor_list(_graph.getOutputs(), _output_tensors);
}
-void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors,
- const std::shared_ptr<IPermuteFunction> &pre_fn)
+void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs)
{
// For thread-safe, use mutex
// TODO: if all used backends on this executor are thread-safe,
// Deadlock occurs when an Executor is called recursively.
std::lock_guard<std::mutex> lock(_mutex);
- assert(src_tensors.size() == _graph.getInputs().size());
- assert(src_tensors.size() == _input_tensors.size());
- for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
+ assert(inputs.size() == _graph.getInputs().size());
+ assert(inputs.size() == _input_tensors.size());
+ for (uint32_t n = 0; n < inputs.size(); ++n)
{
- // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
- // This code find the info to allocate dynamic tensor, and allocate memory based on the source
- // tensor's shape set by caller.
- const auto src_tensor = src_tensors[n];
+ const auto input = inputs[n];
+ assert(input->buffer() != nullptr);
auto input_tensor = _input_tensors[n];
- // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
- if (src_tensor != nullptr && input_tensor != nullptr)
+ assert(input_tensor != nullptr);
+ if (input != nullptr)
{
- const auto orig_input_shape = input_tensor->getShape();
+ const auto orig_input_shape = input_tensor->orig_info().shape();
const auto changed_input_shape =
- convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
+ convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
if (orig_input_shape != changed_input_shape)
{
input_tensor->set_dynamic();
}
}
+ input_tensor->setTensor(input);
}
- // TODO Move calling permute_fn.run() into executeImpl()
- assert(pre_fn);
- pre_fn->run();
+ assert(outputs.size() == _graph.getOutputs().size());
+ assert(outputs.size() == _output_tensors.size());
+ for (uint32_t n = 0; n < outputs.size(); ++n)
+ {
+ const auto output = outputs[n];
+ // assert(dst_tensor->buffer() != nullptr);
+ auto output_tensor = _output_tensors[n];
+ assert(output_tensor != nullptr);
+ output_tensor->setTensor(output);
+ }
executeImpl();
}
assert(_input_tensors.size() == desc.inputs.size());
for (uint32_t i = 0; i < _input_tensors.size(); ++i)
{
- // TODO Remove dynamic_cast
- auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_input_tensors[i]);
- assert(tensor);
+ auto tensor = _input_tensors[i];
+
+ // TODO Check if (desc.inputs[i] == nullptr)
+ // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
+ tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
+ desc.inputs[i]->size);
+
auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
if (input_shape != desc.dynamic_input_shapes.end())
{
tensor->set_dynamic();
tensor->setShape(input_shape->second);
}
- // TODO Check if (desc.inputs[i] == nullptr)
- // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
- tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
- desc.inputs[i]->size);
handleDynamicInputTensor(ir::IOIndex{i}, desc);
}
assert(_output_tensors.size() == desc.outputs.size());
for (uint32_t i = 0; i < _output_tensors.size(); ++i)
{
- // TODO Remove dynamic_cast
- auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_output_tensors[i]);
- assert(tensor);
- tensor->set_dynamic(); // It can't be resized but shape could change
+ auto tensor = _output_tensors[i];
+
if (desc.outputs[i] == nullptr)
throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
- tensor->setBuffer(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
+ tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
+ tensor->set_dynamic(); // It can't be resized but shape could change
}
executeImpl();
#ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
#define __ONERT_EXEC_EXECUTOR_BASE_H__
-#include <mutex>
-
#include "IPermuteFunction.h"
-#include "exec/ExecutionObservers.h"
-#include "ShapeConverter.h"
#include "exec/IExecutor.h"
-#include "compiler/LoweredGraph.h"
-#include "ir/LowerInfoMap.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
#include "exec/ExecTime.h"
-#include "exec/IFunction.h"
-#include "backend/IDynamicTensorManager.h"
-#include "backend/ITensorManager.h"
#include "exec/ExecutionObservee.h"
+#include "exec/IFunction.h"
+#include "exec/IODescription.h"
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OperationIndexMap.h"
+#include "compiler/LoweredGraph.h"
#include "compiler/TensorRegistries.h"
-#include <list>
+#include "backend/controlflow/IOTensor.h"
+#include "util/TracingCtx.h"
+
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <vector>
namespace onert
{
* @param tensor_builders Tensor builders that are currently used
*/
ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
- const compiler::TensorRegistries &tensor_regs);
+ const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx);
virtual ~ExecutorBase() = default;
const ir::Graph &graph() final { return _graph; }
- /**
- * @brief Execute without IODescription
- *
- * @param src_tensor Tensor list that will be copied to input tensors of this
- * @param pre_fn The permutation function that copy from src_tensor to input tensors of this
- */
- void execute(const std::vector<backend::ITensor *> &src_tensors,
- const std::shared_ptr<IPermuteFunction> &pre_fn);
-
void execute(const IODescription &desc) final;
+ void execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs) override;
+
// Used only in Dataflow and Parallel Executors
void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
{
void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
- const std::vector<backend::ITensor *> &getInputTensors() const { return _input_tensors; }
-
- const std::vector<backend::ITensor *> &getOutputTensors() const { return _output_tensors; }
+ const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const override
+ {
+ return _output_tensors;
+ }
protected:
/**
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
const ir::Graph &_graph;
- std::vector<backend::ITensor *> _input_tensors;
- std::vector<backend::ITensor *> _output_tensors;
+ std::vector<backend::controlflow::IOTensor *> _input_tensors;
+ std::vector<backend::controlflow::IOTensor *> _output_tensors;
std::mutex _mutex;
+ const util::TracingCtx *_tracing_ctx;
private:
void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc);
}
assert(src_tensor != dst_tensor);
- assert(underlying_type(src_tensor->data_type()) == underlying_type(dst_tensor->data_type()));
+ if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
+ throw std::runtime_error("data type does not match");
switch (src_tensor->data_type())
{
case ir::DataType::FLOAT32:
stream.seekp(-2, std::ofstream::end);
}
-void JSON::uploadOperationsExecTime() const
+void JSON::storeOperationsExecTime() const
{
std::ofstream stream(_measurement_file);
if (!stream.is_open())
loadOperationsExecTime();
};
/**
- * @brief Update _operations_exec_time_file with new data.
+ * @brief Update _measurement_file with new data.
*/
- void uploadOperationsExecTime() const;
+ void storeOperationsExecTime() const;
private:
///@brief file containing measurements
std::string _measurement_file;
std::unordered_map<std::string, const backend::Backend *> _backends;
- std::unordered_map<
- const backend::Backend *,
- std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>
- &_measurements;
+ MeasurementData &_measurements;
+
/**
* @brief Helper function for inserting data to OperationExecTimes
*
void printOperation(const std::map<uint32_t, int64_t> &operation_info,
std::ofstream &stream) const;
/**
- * @brief Parse and load operations_exec_time from _operations_exec_time_file.
+ * @brief Parse and load _measurements from _measurement_file.
*/
void loadOperationsExecTime();
};
void LinearExecutor::executeImpl()
{
- _subject.notifyModelBegin(this);
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
for (auto &&code : _code)
{
const auto op_seq = code.op_seq;
#ifdef RUY_PROFILER
ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations()));
#endif
- _subject.notifyJobBegin(this, op_seq, backend);
+ _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
auto &fn_seq = code.fn_seq;
fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
fn_seq->run();
- _subject.notifyJobEnd(this, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
}
- _subject.notifyModelEnd(this);
+ _subject.notifySubgraphEnd(profiling_subg_index);
}
} // namespace exec
#include "compiler/Linear.h"
#include "exec/FunctionSequence.h"
#include "compiler/CodeMap.h"
+#include "util/TracingCtx.h"
namespace onert
{
* @param code_map OpSequence and its code map
*/
LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
- const std::vector<ir::OpSequenceIndex> &order)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs}
+ const std::vector<ir::OpSequenceIndex> &order, const util::TracingCtx *tracing_ctx)
+ : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx}
{
for (auto index : order)
{
}
ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
const compiler::TensorRegistries &tensor_regs,
- compiler::CodeMap &&code_map)
- : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(code_map)}
+ compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx)
+ : DataflowExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), tracing_ctx}
{
VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}
VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
- _subject.notifyModelBegin(this);
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
+
while (true)
{
std::unique_lock<std::mutex> lock{_mu_jobs};
auto op_sequence_index = _job_to_op_seq[job_index];
auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index);
auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend();
- auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); };
+ auto setup = [&, op_seq, backend]() {
+ _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+ };
auto teardown = [&, job_index, op_seq, backend]() {
- _subject.notifyJobEnd(this, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
notify(job_index);
};
// Wait for all the jobs done
_scheduler->finish();
- _subject.notifyModelEnd(this);
+ _subject.notifySubgraphEnd(profiling_subg_index);
// Reset input info for the next execution
_input_info = _initial_input_info;
#include <memory>
#include "exec/DataflowExecutor.h"
#include "ParallelScheduler.h"
+#include "util/TracingCtx.h"
namespace onert
{
* @param code_map OpSequence and its code map
*/
ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<backend::ITensor *> &input_tensors,
- const std::vector<backend::ITensor *> &output_tensors,
- const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx);
void executeImpl() override;
* @note It should be called after setting input and output buffer
*/
void execute(const exec::IODescription &desc) final;
+ void execute(const std::vector<backend::IPortableTensor *> &,
+ const std::vector<backend::IPortableTensor *> &) final
+ {
+ throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"};
+ }
+ const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const final
+ {
+ throw new std::runtime_error{"Interpreter does not support this function."};
+ }
private:
const ir::Graph &_graph;
float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
+ cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr, nullptr);
}
void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
return sizeof(uint8_t);
case DataType::QUANT_INT8_SYMM:
case DataType::QUANT_INT8_ASYMM:
+ case DataType::QUANT_INT8_SYMM_PER_CHANNEL:
return sizeof(int8_t);
case DataType::FLOAT16:
return sizeof(float16);
VERBOSE(LIR) << start_msg << std::endl;
}
-void OperationDumper::visit(const ArgMax &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const ArgMinMax &node)
+{
+ std::string min_max = node.param().is_arg_max ? "(Max)" : "(Min)";
+ VERBOSE(LIR) << "* " << node.name() << min_max << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMinMax::INPUT) << ") Axis("
+ << node.getInputs().at(ArgMinMax::AXIS) << ") " << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
void OperationDumper::visit(const BatchToSpaceND &node)
{
dumpUnaryInputOp(node, axis);
}
+void OperationDumper::visit(const Fill &node)
+{
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(Fill::Input::SHAPE) << ") Value("
+ << node.getInputs().at(Fill::Input::VALUE) << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
void OperationDumper::visit(const FullyConnected &node)
{
std::string inputs =
}
VERBOSE(LIR) << " - Inputs : "
<< "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph ("
- << node.param().cond_subg_index << ") Inputs(" << inputs << ")" << std::endl;
+ << node.param().body_subg_index << ") Inputs(" << inputs << ")" << std::endl;
std::string outputs;
const auto &output_indices = node.getOutputs();
for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
OperationDumper(const std::string &start_msg);
public:
- void visit(const operation::ArgMax &) override;
+ void visit(const operation::ArgMinMax &) override;
void visit(const operation::BatchToSpaceND &node) override;
void visit(const operation::BCQFullyConnected &node) override;
void visit(const operation::BinaryArithmetic &node) override;
void visit(const operation::ElementwiseUnary &) override;
void visit(const operation::EmbeddingLookup &) override;
void visit(const operation::ExpandDims &) override;
+ void visit(const operation::Fill &) override;
void visit(const operation::FullyConnected &node) override;
void visit(const operation::Gather &) override;
void visit(const operation::HashtableLookup &) override;
return operandType(idx1) == operandType(idx2);
}
+bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2)
+{
+ if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale())
+ return false;
+
+ if (_operands.at(idx1).typeInfo().offset() != _operands.at(idx2).typeInfo().offset())
+ return false;
+
+ return true;
+}
+
bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &type)
{
return operandType(idx) == type;
void OperationValidator::visit(const operation::AddN &node)
{
+ const auto output_index(node.getOutputs().at(0));
+
int size = node.getInputs().size();
for (int i = 0; i < size; i++)
{
const auto input_index(node.getInputs().at(i));
OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32}));
+ OP_REQUIRES(isSameType(input_index, output_index));
}
}
+void OperationValidator::visit(const operation::ArgMinMax &node)
+{
+ const auto input_index(node.getInputs().at(operation::ArgMinMax::Input::INPUT));
+ const auto axis_index(node.getInputs().at(operation::ArgMinMax::Input::AXIS));
+ const auto output_index(node.getOutputs().at(0));
+ const auto output_type = node.param().output_type;
+
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::UINT8,
+ DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index, output_type));
+}
+
void OperationValidator::visit(const operation::BatchMatMul &node)
{
const auto lhs_index(node.getInputs().at(operation::BatchMatMul::Input::LHS));
const auto rhs_index(node.getInputs().at(operation::BatchMatMul::Input::RHS));
+ const auto output_index(node.getOutputs().at(0));
// Constant lhs and rhs is not implemented yet
OP_REQUIRES(!isConstant(lhs_index) && !isConstant(rhs_index));
+
+ // Allow hybrid quantization (lhs: float / rhs: qint8 / out: float)
+ OP_REQUIRES(isValidType(lhs_index, {DataType::FLOAT32, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isSameType(lhs_index, rhs_index) ||
+ ((operandType(lhs_index) == DataType::FLOAT32) &&
+ (operandType(rhs_index) == DataType::QUANT_INT8_ASYMM)));
+ OP_REQUIRES(isSameType(lhs_index, output_index));
}
void OperationValidator::visit(const operation::BatchToSpaceND &node)
{
- const auto block_size_index{node.getInputs().at(operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+ const auto input_index{node.getInputs().at(operation::BatchToSpaceND::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
- // Non-constant block_size is not implemented yet
- OP_REQUIRES(isConstant(block_size_index));
+ OP_REQUIRES(isSameType(input_index, output_index));
}
void OperationValidator::visit(const operation::BinaryArithmetic &node)
OP_REQUIRES(isValidType(output_index, DataType::BOOL8));
}
+void OperationValidator::visit(const operation::Concat &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+
+ for (auto input_index : node.getInputs())
+ {
+ OP_REQUIRES(isSameType(input_index, output_index));
+
+ // Int8 quantization requires same scale and zero point
+ if (isValidType(output_index, DataType::QUANT_INT8_ASYMM))
+ {
+ OP_REQUIRES(isSameQuantParam(input_index, output_index));
+ }
+ }
+}
+
+void OperationValidator::visit(const operation::Conv2D &node)
+{
+ const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ uint32_t stride_horizontal = node.param().stride.horizontal;
+ uint32_t stride_vertical = node.param().stride.vertical;
+ uint32_t dilation_width = node.param().dilation.width_factor;
+ uint32_t dilation_height = node.param().dilation.height_factor;
+
+ OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
+ OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
+ OP_REQUIRES(isSameType(input_index, output_index));
+}
+
void OperationValidator::visit(const operation::DepthToSpace &node)
{
+ const auto input_index{node.getInputs().at(operation::DepthToSpace::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+
int32_t block_size = node.param().block_size;
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::INT64,
+ DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isSameType(input_index, output_index));
+
OP_REQUIRES(block_size > 0);
}
// Check if I/O types match
OP_REQUIRES(isSameType(output_index, input_index));
+
+ switch (node.param().op_type)
+ {
+ case operation::ElementwiseActivation::Type::ELU:
+ OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
+ break;
+ case operation::ElementwiseActivation::Type::LEAKY_RELU:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::LOGISTIC:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::RELU:
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::TANH:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ }
}
void OperationValidator::visit(const operation::ElementwiseBinary &node)
OP_REQUIRES(isSameType(lhs_index, rhs_index));
OP_REQUIRES(isSameType(lhs_index, output_index));
+
+ const auto op_type = node.param().op_type;
+ if (op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND ||
+ op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR)
+ {
+ OP_REQUIRES(isValidType(lhs_index, DataType::BOOL8));
+ }
}
void OperationValidator::visit(const operation::ElementwiseUnary &node)
void OperationValidator::visit(const operation::EmbeddingLookup &node)
{
const auto lookups_index{node.getInputs().at(operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(operation::EmbeddingLookup::Input::VALUES)};
+ const auto output_index{node.getOutputs().at(0)};
OP_REQUIRES(isValidType(lookups_index, DataType::INT32));
+
+ // TFLite: Allow hybrid type - value table & output
+ // NNAPI: Require same value table and output type
+ OP_REQUIRES(
+ isSameType(values_index, output_index) ||
+ (isValidType(output_index, DataType::FLOAT32) &&
+ (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
}
void OperationValidator::visit(const operation::ExpandDims &node)
const auto axis_index{node.getInputs().at(operation::ExpandDims::Input::AXIS)};
OP_REQUIRES(isSameType(output_index, input_index));
- OP_REQUIRES(isValidType(axis_index, DataType::INT32));
+ OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+}
+
+void OperationValidator::visit(const operation::Fill &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::Fill::Input::SHAPE)};
+ const auto value_index{node.getInputs().at(operation::Fill::Input::VALUE)};
+
+ OP_REQUIRES(isSameType(output_index, value_index));
+ OP_REQUIRES(isValidType(input_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index,
+ {DataType::FLOAT32, DataType::INT32, DataType::INT64, DataType::BOOL8}));
}
void OperationValidator::visit(const operation::HashtableLookup &node)
public:
void visit(const operation::AddN &node) override;
+ void visit(const operation::ArgMinMax &node) override;
void visit(const operation::BatchMatMul &node) override;
void visit(const operation::BatchToSpaceND &node) override;
void visit(const operation::BinaryArithmetic &node) override;
void visit(const operation::Comparison &node) override;
+ void visit(const operation::Concat &node) override;
+ void visit(const operation::Conv2D &node) override;
void visit(const operation::DepthToSpace &node) override;
void visit(const operation::DepthwiseConv2D &node) override;
void visit(const operation::ElementwiseActivation &node) override;
void visit(const operation::ElementwiseUnary &node) override;
void visit(const operation::EmbeddingLookup &node) override;
void visit(const operation::ExpandDims &node) override;
+ void visit(const operation::Fill &node) override;
void visit(const operation::HashtableLookup &node) override;
void visit(const operation::Pack &node) override;
void visit(const operation::Pad &node) override;
DataType operandType(const OperandIndex &idx);
bool isConstant(const OperandIndex &idx);
bool isSameType(const OperandIndex &idx1, const OperandIndex &idx2);
+ bool isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2);
bool isValidType(const OperandIndex &idx, const DataType &type);
bool isValidType(const OperandIndex &idx, std::initializer_list<DataType> valid_types);
* limitations under the License.
*/
-#include "ir/operation/ArgMax.h"
-
-#include <cassert>
-
+#include "ir/operation/ArgMinMax.h"
#include "ir/OperationVisitor.h"
namespace onert
namespace operation
{
-void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); }
+void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); }
-ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param ¶m)
+ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m)
: Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
{ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
{ElementwiseUnaryType::SIN, std::string{"Sin"}},
{ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
- {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
+ {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
{ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
return name_map.at(_param.op_type);
}
{
static std::unique_ptr<IConfigSource> _source;
+static std::unique_ptr<IConfigSource> _source_ext;
void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); }
static IConfigSource *config_source()
{
auto ret = config_source()->get(key);
if (ret.empty())
{
+ // if env is not set, search from external
+ if (_source_ext.get())
+ {
+ ret = _source_ext.get()->get(key);
+ }
+ }
+ // if not found search from defaults
+ if (ret.empty())
+ {
auto itr = defaults.find(key);
if (itr != defaults.end())
{
public:
DurationEventBuilder(const std::string &ts) : _ts{ts} {}
- DurationEvent build(const std::string &tid, const std::string &name, const std::string &ph) const
+ DurationEvent build(const EventCollector::Event &evt_collected, const std::string &ph) const
{
DurationEvent evt;
- evt.name = name;
- evt.tid = tid;
+ evt.name = evt_collected.label;
+ evt.tid = evt_collected.backend;
evt.ph = ph;
evt.ts = _ts;
+ evt.args = evt_collected.userData;
+
return evt;
}
switch (event.edge)
{
case Edge::BEGIN:
- _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "B"));
+ _rec->emit(DurationEventBuilder(ts).build(event, "B"));
break;
case Edge::END:
- _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "E"));
+ _rec->emit(DurationEventBuilder(ts).build(event, "E"));
break;
}
#include "util/EventRecorder.h"
+#include <vector>
+#include <utility>
+#include <string>
+
class EventCollector
{
public:
struct Event
{
Edge edge;
+ uint32_t session_index;
+ uint32_t subg_index;
std::string backend;
+ uint32_t op_index;
+ std::string op_name;
+ uint32_t op_seq_size; // if this event is for an operation sequence of multiple operations
+
+ // TODO deprecate this. label can be differ by writer. So let the writer decide label.
std::string label;
+
+ // user-defined data: pairs of (key, value)
+ std::vector<std::pair<std::string, std::string>> userData;
+
+ Event(Edge a_edge, const std::string &a_backend, const std::string &a_label)
+ : edge(a_edge), session_index(0), subg_index(0), backend(a_backend), op_index(0),
+ op_seq_size(0), label(a_label)
+ { /* empty */
+ }
};
public:
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EventCollectorGlobal.h"
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-
-#include "util/ConfigSource.h"
-#include "util/EventWriter.h"
-
-namespace onert
-{
-namespace util
-{
-
-EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder}
-{
- // DO NOTHING
-}
-
-EventCollectorGlobal::~EventCollectorGlobal()
-{
- if (!_recorder.empty())
- {
- try
- {
- // TODO Need better way for saved file path than the hardcoded path
- EventWriter{_recorder}.writeToFile("trace.global.json",
- EventWriter::WriteFormat::CHROME_TRACING);
- }
- catch (const std::exception &e)
- {
- std::cerr << "E: Fail to record event in EventCollectorGlobal: " << e.what() << std::endl;
- }
- }
-}
-
-EventCollectorGlobal &EventCollectorGlobal::get()
-{
- static EventCollectorGlobal instance;
- return instance;
-}
-
-EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag}
-{
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-EventDurationBlock::~EventDurationBlock()
-{
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {}
-
-EventDurationManual::~EventDurationManual()
-{
- // Check if it has called begin-end pair
- assert(_pair);
-}
-
-void EventDurationManual::begin()
-{
- _pair = false;
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-
-void EventDurationManual::end()
-{
- assert(!_pair);
- _pair = true;
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-} // namespace util
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-
-#include "util/EventRecorder.h"
-#include "util/EventCollector.h"
-
-namespace onert
-{
-namespace util
-{
-
-/**
- * @brief Singleton class for event collection from anywhere in code
- *
- */
-class EventCollectorGlobal
-{
-public:
- /**
- * @brief Get the singleton object of this class
- *
- * @return EventCollectorGlobal& Singleton object
- */
- static EventCollectorGlobal &get();
-
-public:
- /**
- * @brief Getter for event collector object
- *
- * @return EventCollector& Collector object
- */
- EventCollector &collector() { return _collector; }
-
-private:
- EventCollectorGlobal();
- ~EventCollectorGlobal();
-
-private:
- EventRecorder _recorder;
- EventCollector _collector;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor
- *
- */
-class EventDurationBlock
-{
-public:
- /**
- * @brief Raise a duration event with type of BEGIN
- *
- * @param tag A label for the duration event
- */
- EventDurationBlock(const std::string &tag);
- /**
- * @brief Raise a duration event with type of END
- *
- */
- ~EventDurationBlock();
-
-private:
- std::string _tag;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled manually
- *
- * Usage:
- * {
- * ...
- * EventDurationManual duration("some tag");
- * duration.begin();
- * ...
- * ... // Code for duration
- * ...
- * duration.end();
- * }
- *
- */
-class EventDurationManual
-{
-public:
- /**
- * @brief Construct a new Event Duration Manual object
- *
- * @param tag A label for the duration object
- */
- EventDurationManual(const std::string &tag);
- /**
- * @brief Destroy the Event Duration Manual object
- *
- */
- ~EventDurationManual();
-
- /**
- * @brief Raise a duration event with type of BEGIN
- *
- */
- void begin();
- /**
- * @brief Raise a duration event with type of END
- *
- */
- void end();
-
-private:
- std::string _tag;
- bool _pair;
-};
-
-} // namespace util
-} // namespace onert
-
-/**
- * Helper Macro Definitions
- *
- * HOW TO USE
- *
- * void f(args)
- * {
- * EVENT_DURATION_FUNCTION();
- * ...
- * if(cond)
- * {
- * EVENT_DURATION_REGION("if branch");
- * ...
- * }
- * ...
- * }
- */
-
-#define EVENT_DURATION_FUNCTION() \
- ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ }
-
-#define EVENT_DURATION_REGION(tag) \
- ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag }
-
-#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
{
std::string name;
std::string tid;
- std::string ph; /* REQUIRED */
- std::string ts; /* REQUIRED */
+ std::string ph; /* REQUIRED */
+ std::string ts; /* REQUIRED */
+ std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value)
};
struct DurationEvent : public Event
content.flds.emplace_back("tid", evt.tid);
content.flds.emplace_back("ph", evt.ph);
content.flds.emplace_back("ts", evt.ts);
+ content.args = evt.args;
}
std::string object(const DurationEvent &evt)
} // namespace
-EventWriter::EventWriter(const EventRecorder &recorder) : _recorder(recorder)
-{
- // DO NOTHING
-}
-
-void EventWriter::writeToFiles(const std::string &base_filepath)
-{
- // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
- writeToFile(base_filepath, WriteFormat::SNPE_BENCHMARK);
- writeToFile(base_filepath + ".chrome.json", WriteFormat::CHROME_TRACING);
- writeToFile(base_filepath + ".table.md", WriteFormat::MD_TABLE);
-}
-
-void EventWriter::writeToFile(const std::string &filepath, WriteFormat write_format)
-{
- std::ofstream os{filepath, std::ofstream::out};
- switch (write_format)
- {
- case WriteFormat::CHROME_TRACING:
- writeChromeTrace(os);
- break;
- case WriteFormat::SNPE_BENCHMARK:
- writeSNPEBenchmark(os);
- break;
- case WriteFormat::MD_TABLE:
- writeMDTable(os);
- break;
- default:
- assert(!"Invalid value");
- break;
- }
-}
-
-void EventWriter::writeSNPEBenchmark(std::ostream &os)
+void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
{
Json::Value root;
auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
// Memory
{
std::unordered_map<std::string, Stat> mem_stats;
- for (auto &evt : _recorder.counter_events())
+ for (auto &recorder : recorders)
{
- auto &mem_stat = mem_stats[evt.name];
- uint64_t val = std::stoull(evt.values.at("value"));
- mem_stat.accumulate(val);
+ for (auto &evt : recorder->counter_events())
+ {
+ auto &mem_stat = mem_stats[evt.name];
+ uint64_t val = std::stoull(evt.values.at("value"));
+ mem_stat.accumulate(val);
+ }
}
auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
// 2D keys : stats[tid][name]
std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
- for (auto &evt : _recorder.duration_events())
+ for (auto &recorder : recorders)
{
- auto &stat = stats[evt.tid][evt.name];
- auto &begin_ts = begin_timestamps[evt.tid][evt.name];
- uint64_t timestamp = std::stoull(evt.ts);
- if (evt.ph == "B")
+ for (auto &evt : recorder->duration_events())
{
- if (begin_ts != 0)
- throw std::runtime_error{"Invalid Data"};
- begin_ts = timestamp;
- }
- else if (evt.ph == "E")
- {
- if (begin_ts == 0 || timestamp < begin_ts)
- throw std::runtime_error{"Invalid Data"};
- stat.accumulate(timestamp - begin_ts);
- begin_ts = 0;
+ auto &stat = stats[evt.tid][evt.name];
+ auto &begin_ts = begin_timestamps[evt.tid][evt.name];
+ uint64_t timestamp = std::stoull(evt.ts);
+ if (evt.ph == "B")
+ {
+ if (begin_ts != 0)
+ throw std::runtime_error{"Invalid Data"};
+ begin_ts = timestamp;
+ }
+ else if (evt.ph == "E")
+ {
+ if (begin_ts == 0 || timestamp < begin_ts)
+ throw std::runtime_error{"Invalid Data"};
+ stat.accumulate(timestamp - begin_ts);
+ begin_ts = 0;
+ }
+ else
+ throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
}
- else
- throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
}
for (auto &kv : begin_timestamps)
}
}
- os << root;
+ _os << root;
}
-void EventWriter::writeChromeTrace(std::ostream &os)
+void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
{
- os << "{\n";
- os << " " << quote("traceEvents") << ": [\n";
+ _os << "{\n";
+ _os << " " << quote("traceEvents") << ": [\n";
- for (auto &evt : _recorder.duration_events())
+ for (auto &recorder : recorders)
{
- os << " " << object(evt) << ",\n";
+ flushOneRecord(*recorder);
}
- for (auto &evt : _recorder.counter_events())
+ _os << " { }\n";
+ _os << " ]\n";
+ _os << "}\n";
+}
+
+void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
+{
+ for (auto &evt : recorder.duration_events())
{
- os << " " << object(evt) << ",\n";
+ _os << " " << object(evt) << ",\n";
}
- os << " { }\n";
- os << " ]\n";
- os << "}\n";
+ for (auto &evt : recorder.counter_events())
+ {
+ _os << " " << object(evt) << ",\n";
+ }
}
-void EventWriter::writeMDTable(std::ostream &os)
+void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
+{
+ for (auto &recorder : records)
+ {
+ MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
+ }
+}
+
+// initialization
+std::mutex EventWriter::_mutex;
+
+void EventWriter::readyToFlush(std::unique_ptr<EventRecorder> &&recorder)
{
- MDTableBuilder(_recorder.duration_events(), _recorder.counter_events()).build().write(os);
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
+
+ _recorders.emplace_back(std::move(recorder));
+
+ if (--_ref_count > 0)
+ return;
+ }
+ // The caller of this method is the last instance that uses EventWriter.
+ // Let's write log files.
+
+ // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
+ flush(WriteFormat::SNPE_BENCHMARK);
+ flush(WriteFormat::CHROME_TRACING);
+ flush(WriteFormat::MD_TABLE);
+}
+
+void EventWriter::flush(WriteFormat write_format)
+{
+ auto *writer = _actual_writers[write_format].get();
+ assert(writer);
+
+ writer->flush(_recorders);
}
#include "EventRecorder.h"
#include <string>
-#include <ostream>
+#include <vector>
+#include <unordered_map>
+#include <mutex>
+#include <fstream>
+
+class EventFormatWriter
+{
+public:
+ EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {}
+ virtual ~EventFormatWriter() { /* empty */}
+
+ virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0;
+
+protected:
+ std::ofstream _os;
+};
+
+class SNPEWriter : public EventFormatWriter
+{
+public:
+ SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+};
+
+class ChromeTracingWriter : public EventFormatWriter
+{
+public:
+ ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+
+private:
+ void flushOneRecord(const EventRecorder &);
+};
+
+class MDTableWriter : public EventFormatWriter
+{
+public:
+ MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+
+private:
+ void flushOneRecord(const EventRecorder &);
+};
class EventWriter
{
MD_TABLE,
};
-public:
- EventWriter(const EventRecorder &recorder);
+ /**
+ * @brief Retuens a singleton object
+ */
+ static EventWriter *get(const std::string &filename)
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
-public:
- void writeToFiles(const std::string &base_filepath);
- void writeToFile(const std::string &filepath, WriteFormat write_format);
+ static EventWriter singleton(filename);
+ return &singleton;
+ }
+
+ /**
+ * @brief Call this when observer which use EventWriter starts
+ */
+ void startToUse()
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
+ _ref_count++;
+ }
+
+ /**
+ * @brief Call this when observer which use EventWriter finishes.
+ * After multiple observers calls this method, the reference count will eventually be 0.
+ * Then, EventWriter will write profiling result file.
+ */
+ void readyToFlush(std::unique_ptr<EventRecorder> &&recorder);
private:
- void writeSNPEBenchmark(std::ostream &os);
- void writeChromeTrace(std::ostream &os);
- void writeMDTable(std::ostream &os);
+ EventWriter(const std::string &filepath) : _ref_count(0)
+ {
+ std::string snpe_log_name(filepath);
+ std::string chrome_tracing_log_name(filepath + ".chrome.json");
+ std::string md_table_log_name(filepath + ".table.md");
+
+ _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name);
+ _actual_writers[WriteFormat::CHROME_TRACING] =
+ std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
+ _actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name);
+ };
+
+ void flush(WriteFormat write_format);
private:
- const EventRecorder &_recorder;
+ static std::mutex _mutex;
+
+ // number of observer of an executor that want to write profiling data
+ int32_t _ref_count;
+
+ // one recorder object per executor
+ std::vector<std::unique_ptr<EventRecorder>> _recorders;
+
+ std::unordered_map<WriteFormat, std::unique_ptr<EventFormatWriter>> _actual_writers;
};
#endif // __ONERT_UTIL_EVENT_WRITER_H__
return broadcastShapes(lhs_shape, rhs_shape);
}
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank)
{
if (axis < 0 || axis >= rank)
{
- throw std::runtime_error("ArgMax shape inference: Wrong axis value " + std::to_string(axis));
+ throw std::runtime_error("ArgMinMax shape inference: Wrong axis value " + std::to_string(axis));
}
ir::Shape out_shape;
return out_shape;
}
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf)
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf)
{
- ir::Shape out_shape(in_shape.dim(0));
+ ir::Shape out_shape(fill_shape.dim(0));
for (int out_x = 0; out_x < out_shape.rank(); ++out_x)
{
- out_shape.dim(out_x) = in_buf[out_x];
+ out_shape.dim(out_x) = static_cast<int32_t>(shape_buf[out_x]);
}
return out_shape;
}
+// template instantiation
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int32_t *shape_buf);
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int64_t *shape_buf);
+
ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape)
{
assert(in_shape.rank() >= 2);
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace util
+{
+
+// initializing static member var
+std::mutex TracingCtx::_session_id_mutex;
+
+} // namespace util
+} // namespace onert
--- /dev/null
+../../../.clang-format.8
\ No newline at end of file
* @param graph reference on subgraphs
*/
explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs)
- : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}
+ : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}
{
_use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
}
// Operations
template <typename OpIR, typename... Args>
const OpIR *loadOperationTo(const Operator *op, ir::Graph &subg, Args &&... args);
- void loadConv2D(const Operator *op, ir::Graph &subg);
- void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
- void loadTransposeConv(const Operator *op, ir::Graph &subg);
- void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
- void loadReshape(const Operator *op, ir::Graph &subg);
- void loadSoftmax(const Operator *op, ir::Graph &subg);
- void loadConcatenation(const Operator *op, ir::Graph &subg);
- void loadFC(const Operator *op, ir::Graph &subg);
+
+ void loadAddV2(const Operator *op, ir::Graph &subg);
+ void loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax);
+ void loadBatchMatMul(const Operator *op, ir::Graph &subg);
void loadBinaryArithmetic(const Operator *op, ir::Graph &subg,
ir::operation::BinaryArithmetic::ArithmeticType op_type);
- void loadAddV2(const Operator *op, ir::Graph &subg);
- void loadPack(const Operator *op, ir::Graph &subg);
- void loadResizeBilinear(const Operator *op, ir::Graph &subg);
- void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
- void loadReduce(const Operator *op, ir::Graph &subg,
- ir::operation::Reduce::ReduceType reduce_type);
- void loadReduceAll(const Operator *op, ir::Graph &subg);
+ void loadComparison(const Operator *op, ir::Graph &subg);
+ void loadConcatenation(const Operator *op, ir::Graph &subg);
+ void loadConv2D(const Operator *op, ir::Graph &subg);
+ void loadCustom(const Operator *op, ir::Graph &subg);
+ void loadDepthToSpace(const Operator *op, ir::Graph &subg);
+ void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
+ void loadEinsum(const Operator *op, ir::Graph &subg);
void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
ir::operation::ElementwiseActivation::Type op_type,
float alpha = 0.f, float beta = 0.f);
ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type);
void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
ir::operation::ElementwiseUnary::Type op_type);
+ void loadFC(const Operator *op, ir::Graph &subg);
+ void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
void loadGather(const Operator *op, ir::Graph &subg);
- void loadCustom(const Operator *op, ir::Graph &subg);
- void loadBatchMatMul(const Operator *op, ir::Graph &subg);
- void loadSqueeze(const Operator *op, ir::Graph &subg);
+ void loadIf(const Operator *op, ir::Graph &subg);
+ void loadLeakyRelu(const Operator *op, ir::Graph &subg);
+ void loadLogSoftmax(const Operator *op, ir::Graph &subg);
+ void loadOneHot(const Operator *op, ir::Graph &subg);
+ void loadPack(const Operator *op, ir::Graph &subg);
+ void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
+ void loadReduce(const Operator *op, ir::Graph &subg,
+ ir::operation::Reduce::ReduceType reduce_type);
+ void loadReduceAll(const Operator *op, ir::Graph &subg);
+ void loadReshape(const Operator *op, ir::Graph &subg);
+ void loadResizeBilinear(const Operator *op, ir::Graph &subg);
+ void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
+ void loadSoftmax(const Operator *op, ir::Graph &subg);
+ void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
void loadSplit(const Operator *op, ir::Graph &subg);
void loadSplitV(const Operator *op, ir::Graph &subg);
+ void loadSqueeze(const Operator *op, ir::Graph &subg);
void loadStridedSlice(const Operator *op, ir::Graph &subg);
+ void loadTransposeConv(const Operator *op, ir::Graph &subg);
+ void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg);
void loadUnpack(const Operator *op, ir::Graph &subg);
- void loadComparison(const Operator *op, ir::Graph &subg);
- void loadEinsum(const Operator *op, ir::Graph &subg);
- void loadOneHot(const Operator *op, ir::Graph &subg);
- void loadIf(const Operator *op, ir::Graph &subg);
void loadWhile(const Operator *op, ir::Graph &subg);
- void loadArgMax(const Operator *op, ir::Graph &subg);
- void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
- void loadLogSoftmax(const Operator *op, ir::Graph &subg);
- void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
- void loadLeakyRelu(const Operator *op, ir::Graph &subg);
- void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg);
void verifySubgraphIndex(int subg_index)
{
{
case TensorType::TensorType_FLOAT32:
return ir::DataType::FLOAT32;
+ case TensorType::TensorType_FLOAT16:
+ return ir::DataType::FLOAT16;
case TensorType::TensorType_INT32:
return ir::DataType::INT32;
- case TensorType::TensorType_BOOL:
- return ir::DataType::BOOL8;
case TensorType::TensorType_UINT8:
return ir::DataType::QUANT_UINT8_ASYMM;
- case TensorType::TensorType_INT8:
- return ir::DataType::QUANT_INT8_ASYMM;
case TensorType::TensorType_INT64:
return ir::DataType::INT64;
+ // case TensorType::TensorType_STRING:
+ case TensorType::TensorType_BOOL:
+ return ir::DataType::BOOL8;
+ case TensorType::TensorType_INT16:
+ return ir::DataType::QUANT_INT16_ASYMM;
+ // case TensorType::TensorType_COMPLEX64
+ case TensorType::TensorType_INT8:
+ return ir::DataType::QUANT_INT8_ASYMM;
+ // case TensorType::TensorType_FLOAT64
default:
throw std::runtime_error(
- std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
+ std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
}
}
{
size_t offset = unaligned_offset_start - aligned_offset_start;
uint8_t *mmap_base = static_cast<uint8_t *>(
- mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
+ mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
data_obj = std::make_unique<ir::CachedData>(mmap_base + offset, data_size);
munmap(mmap_base, mmap_size);
}
bool block2D_sparsity = dim_metadata_size == 4 && block_rank == 2;
if (dim_metadata_size != !random_sparsity && !block2D_sparsity)
throw std::runtime_error(
- "sparsity is supported only for 2D tensor with random or 16x1 block sparsity.");
+ "sparsity is supported only for 2D tensor with random or 16x1 block sparsity.");
const auto *src_metadata = src_sparsity->dim_metadata()->Get(0);
if (src_metadata->format() != DimensionType::DimensionType_DENSE)
auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code))
throw std::runtime_error(
- std::string("loader doesn't support optional input tensor yet for ")
- .append(EnumNameBuiltinOperator(builtin_code)));
+ std::string("loader doesn't support optional input tensor yet for ")
+ .append(EnumNameBuiltinOperator(builtin_code)));
};
check_optional_input();
inputs.append(tensorIdxToOperandIdx(idx));
const auto fc = loadOperationTo<ir::operation::FullyConnected>(op, subg, param);
const auto &input_operand =
- subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT));
+ subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT));
auto &weights_operand =
- subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT));
+ subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT));
if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
((weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) ||
weights_operand.typeInfo().type() == ir::DataType::QUANT_INT8_ASYMM))
auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
auto attr_map = data_root.AsMap();
const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
- attr_map["fused_activation_function"].AsInt8());
+ attr_map["fused_activation_function"].AsInt8());
param.activation = convertActivation(fused_activation_func);
}
}
template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDepthToSpace(const Operator *op, ir::Graph &subg)
+{
+ ir::operation::DepthToSpace::Param param;
+ const auto *options = op->builtin_options_as_DepthToSpaceOptions();
+ param.block_size = options->block_size();
+
+ loadOperationTo<ir::operation::DepthToSpace>(op, subg, param);
+}
+
+template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadBinaryArithmetic(
- const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type)
+ const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type)
{
ir::operation::BinaryArithmetic::Param param;
param.arithmetic_type = op_type;
template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadElementwiseActivation(
- const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
- float alpha, float beta)
+ const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+ float alpha, float beta)
{
ir::operation::ElementwiseActivation::Param param;
param.op_type = op_type;
template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadElementwiseBinary(
- const Operator *op, ir::Graph &subg,
- ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+ const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
{
ir::operation::ElementwiseBinary::Param param;
param.op_type = op_type;
}
};
qasymm8ToUint8(
- subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)));
+ subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)));
qasymm8ToUint8(subg.operands().at(eu->getOutputs().at(0)));
}
}
break;
default:
throw std::runtime_error(
- std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) +
- " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
+ std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) +
+ " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
}
loadOperationTo<ir::operation::BatchMatMul>(op, subg, param);
// Mapping from custom op name string to BuiltinOP enum
std::map<std::string, BuiltinOP> builtin_map = {
- {"AddV2", BuiltinOP::AddV2},
- {"All", BuiltinOP::ReduceAll},
- {"MatrixBandPart", BuiltinOP::MatrixBandPart},
- {"BatchMatMulV2", BuiltinOP::BatchMatMul},
- {"Einsum", BuiltinOP::Einsum},
- {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
- {"BroadcastTo", BuiltinOP::BroadcastTo},
- {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
- {"Erf", BuiltinOP::Erf},
+ {"AddV2", BuiltinOP::AddV2},
+ {"All", BuiltinOP::ReduceAll},
+ {"MatrixBandPart", BuiltinOP::MatrixBandPart},
+ {"BatchMatMulV2", BuiltinOP::BatchMatMul},
+ {"Einsum", BuiltinOP::Einsum},
+ {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
+ {"BroadcastTo", BuiltinOP::BroadcastTo},
+ {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+ {"Erf", BuiltinOP::Erf},
};
try
break;
default:
throw std::runtime_error{
- "Loader: Custom OP map is defined but operation loader function is not defined"};
+ "Loader: Custom OP map is defined but operation loader function is not defined"};
}
return;
break;
default:
throw std::runtime_error(
- std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+ std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
}
loadOperationTo<ir::operation::Comparison>(op, subg, param);
}
template <typename LoaderDomain>
-void BaseLoader<LoaderDomain>::loadArgMax(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain>::loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax)
{
- ir::operation::ArgMax::Param param;
- const auto output_type = op->builtin_options_as_ArgMaxOptions()->output_type();
- switch (output_type)
- {
- case TensorType::TensorType_INT32:
- case TensorType::TensorType_INT64:
- param.output_type = tensorTypeToDataType(output_type);
- break;
- default:
- throw std::runtime_error("ArgMax: `output_type` must be either int32 or int64.");
- }
- auto am = loadOperationTo<ir::operation::ArgMax>(op, subg, param);
+ ir::operation::ArgMinMax::Param param;
+ const auto output_type = is_argmax ? op->builtin_options_as_ArgMaxOptions()->output_type()
+ : op->builtin_options_as_ArgMinOptions()->output_type();
+ param.output_type = tensorTypeToDataType(output_type);
+ param.is_arg_max = is_argmax;
- auto &axisOperand = subg.operands().at(am->getInputs().at(ir::operation::ArgMax::Input::AXIS));
- if (!(axisOperand.operandSize() == 4 && (axisOperand.typeInfo().type() == ir::DataType::INT32 ||
- axisOperand.typeInfo().type() == ir::DataType::INT64)))
- throw std::runtime_error("ArgMax: `axis` with an int32 or int64 element is only supported.");
+ loadOperationTo<ir::operation::ArgMinMax>(op, subg, param);
}
template <typename LoaderDomain>
{
auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ")
- .append(EnumNameBuiltinOperator(builtin_code)));
+ .append(EnumNameBuiltinOperator(builtin_code)));
}
for (size_t i = 0; i < ir::operation::LSTM::Output::OUTPUT; ++i)
{
case BuiltinOperator::BuiltinOperator_PACK:
loadPack(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_ELU:
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::ELU);
+ return;
case BuiltinOperator::BuiltinOperator_RELU:
loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
ir::operation::ElementwiseActivation::infinity, 0.f);
case BuiltinOperator::BuiltinOperator_SQRT:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
return;
+ case BuiltinOperator::BuiltinOperator_SQUARE:
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQUARE);
+ return;
case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
loadOperationTo<ir::operation::SquaredDifference>(op, subg);
return;
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
return;
case BuiltinOperator::BuiltinOperator_ARG_MAX:
- loadArgMax(op, subg);
+ loadArgMinMax(op, subg, true);
+ return;
+ case BuiltinOperator::BuiltinOperator_ARG_MIN:
+ loadArgMinMax(op, subg, false);
return;
case BuiltinOperator::BuiltinOperator_LOG:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
return;
+ case BuiltinOperator::BuiltinOperator_LOGICAL_AND:
+ loadElementwiseBinary(op, subg,
+ ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
+ return;
case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
loadElementwiseBinary(op, subg,
ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
case BuiltinOperator::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
loadUnidirectionalSequenceLSTM(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE:
+ loadDepthToSpace(op, subg);
+ return;
default:
throw std::runtime_error(
- std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+ std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
}
}
param.activation = convertActivation(options->fused_activation_function());
std::unique_ptr<ir::Operation> new_op(
- new ir::operation::BCQFullyConnected(inputs, outputs, param));
+ new ir::operation::BCQFullyConnected(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
{
static const ActivationFunctionType values[] = {
- ActivationFunctionType_NONE, ActivationFunctionType_RELU,
- ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
- ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
+ ActivationFunctionType_NONE, ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+ ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
return values;
}
inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[3]
{
static const FullyConnectedOptionsWeightsFormat values[] = {
- FullyConnectedOptionsWeightsFormat_DEFAULT,
- FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8,
- FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32};
+ FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32};
return values;
}
const circle::CustomQuantization *details_as_CustomQuantization() const
{
return details_type() == circle::QuantizationDetails_CustomQuantization
- ? static_cast<const circle::CustomQuantization *>(details())
- : nullptr;
+ ? static_cast<const circle::CustomQuantization *>(details())
+ : nullptr;
}
int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
bool Verify(flatbuffers::Verifier &verifier) const
};
inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
- flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
- flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
- flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
- circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+ flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+ circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
{
QuantizationParametersBuilder builder_(_fbb);
builder_.add_quantized_dimension(quantized_dimension);
}
inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
- const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
- const std::vector<int64_t> *zero_point = nullptr,
- circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+ const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+ const std::vector<int64_t> *zero_point = nullptr,
+ circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
{
auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
const circle::Int32Vector *array_segments_as_Int32Vector() const
{
return array_segments_type() == circle::SparseIndexVector_Int32Vector
- ? static_cast<const circle::Int32Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const circle::Int32Vector *>(array_segments())
+ : nullptr;
}
const circle::Uint16Vector *array_segments_as_Uint16Vector() const
{
return array_segments_type() == circle::SparseIndexVector_Uint16Vector
- ? static_cast<const circle::Uint16Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const circle::Uint16Vector *>(array_segments())
+ : nullptr;
}
const circle::Uint8Vector *array_segments_as_Uint8Vector() const
{
return array_segments_type() == circle::SparseIndexVector_Uint8Vector
- ? static_cast<const circle::Uint8Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const circle::Uint8Vector *>(array_segments())
+ : nullptr;
}
circle::SparseIndexVector array_indices_type() const
{
const circle::Int32Vector *array_indices_as_Int32Vector() const
{
return array_indices_type() == circle::SparseIndexVector_Int32Vector
- ? static_cast<const circle::Int32Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const circle::Int32Vector *>(array_indices())
+ : nullptr;
}
const circle::Uint16Vector *array_indices_as_Uint16Vector() const
{
return array_indices_type() == circle::SparseIndexVector_Uint16Vector
- ? static_cast<const circle::Uint16Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const circle::Uint16Vector *>(array_indices())
+ : nullptr;
}
const circle::Uint8Vector *array_indices_as_Uint8Vector() const
{
return array_indices_type() == circle::SparseIndexVector_Uint8Vector
- ? static_cast<const circle::Uint8Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const circle::Uint8Vector *>(array_indices())
+ : nullptr;
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::DimensionType format = circle::DimensionType_DENSE, int32_t dense_size = 0,
- circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE,
- flatbuffers::Offset<void> array_segments = 0,
- circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE,
- flatbuffers::Offset<void> array_indices = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, circle::DimensionType format = circle::DimensionType_DENSE,
+ int32_t dense_size = 0,
+ circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_segments = 0,
+ circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE,
+ flatbuffers::Offset<void> array_indices = 0)
{
DimensionMetadataBuilder builder_(_fbb);
builder_.add_array_indices(array_indices);
const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *>(
- VT_DIM_METADATA);
+ VT_DIM_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const
{
fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
}
void add_dim_metadata(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
- dim_metadata)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+ dim_metadata)
{
fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
}
};
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
- dim_metadata = 0)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+ dim_metadata = 0)
{
SparsityParametersBuilder builder_(_fbb);
builder_.add_dim_metadata(dim_metadata);
}
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
- const std::vector<int32_t> *block_map = nullptr,
- const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+ const std::vector<int32_t> *block_map = nullptr,
+ const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr)
{
auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
auto dim_metadata__ =
- dim_metadata
- ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata)
- : 0;
+ dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata)
+ : 0;
return circle::CreateSparsityParameters(_fbb, traversal_order__, block_map__, dim_metadata__);
}
}
inline flatbuffers::Offset<Tensor> CreateTensorDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
- circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
- const char *name = nullptr,
- flatbuffers::Offset<circle::QuantizationParameters> quantization = 0, bool is_variable = false,
- flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
- const std::vector<int32_t> *shape_signature = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+ circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
+ const char *name = nullptr, flatbuffers::Offset<circle::QuantizationParameters> quantization = 0,
+ bool is_variable = false, flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
+ const std::vector<int32_t> *shape_signature = nullptr)
{
auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
auto name__ = name ? _fbb.CreateString(name) : 0;
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
};
inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+ flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
{
Conv2DOptionsBuilder builder_(_fbb);
builder_.add_dilation_h_factor(dilation_h_factor);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
Pool2DOptionsBuilder builder_(_fbb);
builder_.add_filter_height(filter_height);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
};
inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
- int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+ flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+ int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
{
DepthwiseConv2DOptionsBuilder builder_(_fbb);
builder_.add_dilation_h_factor(dilation_h_factor);
fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
}
void add_num_columns_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
{
fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
}
void add_embedding_dim_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
{
fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
embedding_dim_per_channel);
};
inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
{
ConcatEmbeddingsOptionsBuilder builder_(_fbb);
builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
{
auto num_columns_per_channel__ =
- num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+ num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
auto embedding_dim_per_channel__ =
- embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+ embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
return circle::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__,
embedding_dim_per_channel__);
}
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
};
inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
SVDFOptionsBuilder builder_(_fbb);
builder_.add_rank(rank);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
};
inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
RNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool asymmetric_quantize_inputs() const
{
};
inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
SequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
bool asymmetric_quantize_inputs() const
};
inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
circle::FullyConnectedOptionsWeightsFormat weights_format() const
{
return static_cast<circle::FullyConnectedOptionsWeightsFormat>(
- GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+ GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
}
bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
bool asymmetric_quantize_inputs() const
};
inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- circle::FullyConnectedOptionsWeightsFormat weights_format =
- circle::FullyConnectedOptionsWeightsFormat_DEFAULT,
- bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ circle::FullyConnectedOptionsWeightsFormat weights_format =
+ circle::FullyConnectedOptionsWeightsFormat_DEFAULT,
+ bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
{
FullyConnectedOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
ConcatenationOptionsBuilder builder_(_fbb);
builder_.add_axis(axis);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<AddOptions> CreateAddOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
AddOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<MulOptions> CreateMulOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
MulOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
L2NormOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
}
explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
};
inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f,
- circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f,
+ circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL,
+ bool asymmetric_quantize_inputs = false)
{
LSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
CreateUnidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+ bool asymmetric_quantize_inputs = false)
{
UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
};
inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
- bool time_major = true, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+ bool time_major = true, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<SubOptions> CreateSubOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
SubOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<DivOptions> CreateDivOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
DivOptionsBuilder builder_(_fbb);
builder_.add_fused_activation_function(fused_activation_function);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
BCQFullyConnectedOptionsBuilder builder_(_fbb);
builder_.add_weights_hidden_size(weights_hidden_size);
circle::ActivationFunctionType fused_activation_function() const
{
return static_cast<circle::ActivationFunctionType>(
- GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+ GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
}
bool Verify(flatbuffers::Verifier &verifier) const
{
};
inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions(
- flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
- circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
+ circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
{
InstanceNormOptionsBuilder builder_(_fbb);
builder_.add_epsilon(epsilon);
const circle::Conv2DOptions *builtin_options_as_Conv2DOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_Conv2DOptions
- ? static_cast<const circle::Conv2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::Conv2DOptions *>(builtin_options())
+ : nullptr;
}
const circle::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DepthwiseConv2DOptions
- ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options())
+ : nullptr;
}
const circle::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ConcatEmbeddingsOptions
- ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options())
+ : nullptr;
}
const circle::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LSHProjectionOptions
- ? static_cast<const circle::LSHProjectionOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LSHProjectionOptions *>(builtin_options())
+ : nullptr;
}
const circle::Pool2DOptions *builtin_options_as_Pool2DOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_Pool2DOptions
- ? static_cast<const circle::Pool2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::Pool2DOptions *>(builtin_options())
+ : nullptr;
}
const circle::SVDFOptions *builtin_options_as_SVDFOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SVDFOptions
- ? static_cast<const circle::SVDFOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SVDFOptions *>(builtin_options())
+ : nullptr;
}
const circle::RNNOptions *builtin_options_as_RNNOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_RNNOptions
- ? static_cast<const circle::RNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::RNNOptions *>(builtin_options())
+ : nullptr;
}
const circle::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FullyConnectedOptions
- ? static_cast<const circle::FullyConnectedOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FullyConnectedOptions *>(builtin_options())
+ : nullptr;
}
const circle::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SoftmaxOptions
- ? static_cast<const circle::SoftmaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SoftmaxOptions *>(builtin_options())
+ : nullptr;
}
const circle::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ConcatenationOptions
- ? static_cast<const circle::ConcatenationOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ConcatenationOptions *>(builtin_options())
+ : nullptr;
}
const circle::AddOptions *builtin_options_as_AddOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_AddOptions
- ? static_cast<const circle::AddOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::AddOptions *>(builtin_options())
+ : nullptr;
}
const circle::L2NormOptions *builtin_options_as_L2NormOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_L2NormOptions
- ? static_cast<const circle::L2NormOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::L2NormOptions *>(builtin_options())
+ : nullptr;
}
const circle::LocalResponseNormalizationOptions *
builtin_options_as_LocalResponseNormalizationOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LocalResponseNormalizationOptions
- ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options())
+ : nullptr;
}
const circle::LSTMOptions *builtin_options_as_LSTMOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LSTMOptions
- ? static_cast<const circle::LSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LSTMOptions *>(builtin_options())
+ : nullptr;
}
const circle::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ResizeBilinearOptions
- ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options())
+ : nullptr;
}
const circle::CallOptions *builtin_options_as_CallOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_CallOptions
- ? static_cast<const circle::CallOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::CallOptions *>(builtin_options())
+ : nullptr;
}
const circle::ReshapeOptions *builtin_options_as_ReshapeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ReshapeOptions
- ? static_cast<const circle::ReshapeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ReshapeOptions *>(builtin_options())
+ : nullptr;
}
const circle::SkipGramOptions *builtin_options_as_SkipGramOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SkipGramOptions
- ? static_cast<const circle::SkipGramOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SkipGramOptions *>(builtin_options())
+ : nullptr;
}
const circle::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SpaceToDepthOptions
- ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options())
+ : nullptr;
}
const circle::EmbeddingLookupSparseOptions *
builtin_options_as_EmbeddingLookupSparseOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_EmbeddingLookupSparseOptions
- ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options())
+ : nullptr;
}
const circle::MulOptions *builtin_options_as_MulOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MulOptions
- ? static_cast<const circle::MulOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MulOptions *>(builtin_options())
+ : nullptr;
}
const circle::PadOptions *builtin_options_as_PadOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_PadOptions
- ? static_cast<const circle::PadOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::PadOptions *>(builtin_options())
+ : nullptr;
}
const circle::GatherOptions *builtin_options_as_GatherOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_GatherOptions
- ? static_cast<const circle::GatherOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::GatherOptions *>(builtin_options())
+ : nullptr;
}
const circle::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BatchToSpaceNDOptions
- ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options())
+ : nullptr;
}
const circle::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SpaceToBatchNDOptions
- ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options())
+ : nullptr;
}
const circle::TransposeOptions *builtin_options_as_TransposeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_TransposeOptions
- ? static_cast<const circle::TransposeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::TransposeOptions *>(builtin_options())
+ : nullptr;
}
const circle::ReducerOptions *builtin_options_as_ReducerOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ReducerOptions
- ? static_cast<const circle::ReducerOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ReducerOptions *>(builtin_options())
+ : nullptr;
}
const circle::SubOptions *builtin_options_as_SubOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SubOptions
- ? static_cast<const circle::SubOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SubOptions *>(builtin_options())
+ : nullptr;
}
const circle::DivOptions *builtin_options_as_DivOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DivOptions
- ? static_cast<const circle::DivOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DivOptions *>(builtin_options())
+ : nullptr;
}
const circle::SqueezeOptions *builtin_options_as_SqueezeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SqueezeOptions
- ? static_cast<const circle::SqueezeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SqueezeOptions *>(builtin_options())
+ : nullptr;
}
const circle::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SequenceRNNOptions
- ? static_cast<const circle::SequenceRNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SequenceRNNOptions *>(builtin_options())
+ : nullptr;
}
const circle::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_StridedSliceOptions
- ? static_cast<const circle::StridedSliceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::StridedSliceOptions *>(builtin_options())
+ : nullptr;
}
const circle::ExpOptions *builtin_options_as_ExpOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ExpOptions
- ? static_cast<const circle::ExpOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ExpOptions *>(builtin_options())
+ : nullptr;
}
const circle::TopKV2Options *builtin_options_as_TopKV2Options() const
{
return builtin_options_type() == circle::BuiltinOptions_TopKV2Options
- ? static_cast<const circle::TopKV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::TopKV2Options *>(builtin_options())
+ : nullptr;
}
const circle::SplitOptions *builtin_options_as_SplitOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SplitOptions
- ? static_cast<const circle::SplitOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SplitOptions *>(builtin_options())
+ : nullptr;
}
const circle::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LogSoftmaxOptions
- ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options())
+ : nullptr;
}
const circle::CastOptions *builtin_options_as_CastOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_CastOptions
- ? static_cast<const circle::CastOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::CastOptions *>(builtin_options())
+ : nullptr;
}
const circle::DequantizeOptions *builtin_options_as_DequantizeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DequantizeOptions
- ? static_cast<const circle::DequantizeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DequantizeOptions *>(builtin_options())
+ : nullptr;
}
const circle::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MaximumMinimumOptions
- ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options())
+ : nullptr;
}
const circle::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ArgMaxOptions
- ? static_cast<const circle::ArgMaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ArgMaxOptions *>(builtin_options())
+ : nullptr;
}
const circle::LessOptions *builtin_options_as_LessOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LessOptions
- ? static_cast<const circle::LessOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LessOptions *>(builtin_options())
+ : nullptr;
}
const circle::NegOptions *builtin_options_as_NegOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_NegOptions
- ? static_cast<const circle::NegOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::NegOptions *>(builtin_options())
+ : nullptr;
}
const circle::PadV2Options *builtin_options_as_PadV2Options() const
{
return builtin_options_type() == circle::BuiltinOptions_PadV2Options
- ? static_cast<const circle::PadV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::PadV2Options *>(builtin_options())
+ : nullptr;
}
const circle::GreaterOptions *builtin_options_as_GreaterOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_GreaterOptions
- ? static_cast<const circle::GreaterOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::GreaterOptions *>(builtin_options())
+ : nullptr;
}
const circle::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_GreaterEqualOptions
- ? static_cast<const circle::GreaterEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::GreaterEqualOptions *>(builtin_options())
+ : nullptr;
}
const circle::LessEqualOptions *builtin_options_as_LessEqualOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LessEqualOptions
- ? static_cast<const circle::LessEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LessEqualOptions *>(builtin_options())
+ : nullptr;
}
const circle::SelectOptions *builtin_options_as_SelectOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SelectOptions
- ? static_cast<const circle::SelectOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SelectOptions *>(builtin_options())
+ : nullptr;
}
const circle::SliceOptions *builtin_options_as_SliceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SliceOptions
- ? static_cast<const circle::SliceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SliceOptions *>(builtin_options())
+ : nullptr;
}
const circle::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_TransposeConvOptions
- ? static_cast<const circle::TransposeConvOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::TransposeConvOptions *>(builtin_options())
+ : nullptr;
}
const circle::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SparseToDenseOptions
- ? static_cast<const circle::SparseToDenseOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SparseToDenseOptions *>(builtin_options())
+ : nullptr;
}
const circle::TileOptions *builtin_options_as_TileOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_TileOptions
- ? static_cast<const circle::TileOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::TileOptions *>(builtin_options())
+ : nullptr;
}
const circle::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ExpandDimsOptions
- ? static_cast<const circle::ExpandDimsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ExpandDimsOptions *>(builtin_options())
+ : nullptr;
}
const circle::EqualOptions *builtin_options_as_EqualOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_EqualOptions
- ? static_cast<const circle::EqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::EqualOptions *>(builtin_options())
+ : nullptr;
}
const circle::NotEqualOptions *builtin_options_as_NotEqualOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_NotEqualOptions
- ? static_cast<const circle::NotEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::NotEqualOptions *>(builtin_options())
+ : nullptr;
}
const circle::ShapeOptions *builtin_options_as_ShapeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ShapeOptions
- ? static_cast<const circle::ShapeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ShapeOptions *>(builtin_options())
+ : nullptr;
}
const circle::PowOptions *builtin_options_as_PowOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_PowOptions
- ? static_cast<const circle::PowOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::PowOptions *>(builtin_options())
+ : nullptr;
}
const circle::ArgMinOptions *builtin_options_as_ArgMinOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ArgMinOptions
- ? static_cast<const circle::ArgMinOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ArgMinOptions *>(builtin_options())
+ : nullptr;
}
const circle::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FakeQuantOptions
- ? static_cast<const circle::FakeQuantOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FakeQuantOptions *>(builtin_options())
+ : nullptr;
}
const circle::PackOptions *builtin_options_as_PackOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_PackOptions
- ? static_cast<const circle::PackOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::PackOptions *>(builtin_options())
+ : nullptr;
}
const circle::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LogicalOrOptions
- ? static_cast<const circle::LogicalOrOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LogicalOrOptions *>(builtin_options())
+ : nullptr;
}
const circle::OneHotOptions *builtin_options_as_OneHotOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_OneHotOptions
- ? static_cast<const circle::OneHotOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::OneHotOptions *>(builtin_options())
+ : nullptr;
}
const circle::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LogicalAndOptions
- ? static_cast<const circle::LogicalAndOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LogicalAndOptions *>(builtin_options())
+ : nullptr;
}
const circle::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LogicalNotOptions
- ? static_cast<const circle::LogicalNotOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LogicalNotOptions *>(builtin_options())
+ : nullptr;
}
const circle::UnpackOptions *builtin_options_as_UnpackOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_UnpackOptions
- ? static_cast<const circle::UnpackOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::UnpackOptions *>(builtin_options())
+ : nullptr;
}
const circle::FloorDivOptions *builtin_options_as_FloorDivOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FloorDivOptions
- ? static_cast<const circle::FloorDivOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FloorDivOptions *>(builtin_options())
+ : nullptr;
}
const circle::SquareOptions *builtin_options_as_SquareOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SquareOptions
- ? static_cast<const circle::SquareOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SquareOptions *>(builtin_options())
+ : nullptr;
}
const circle::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ZerosLikeOptions
- ? static_cast<const circle::ZerosLikeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ZerosLikeOptions *>(builtin_options())
+ : nullptr;
}
const circle::FillOptions *builtin_options_as_FillOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FillOptions
- ? static_cast<const circle::FillOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FillOptions *>(builtin_options())
+ : nullptr;
}
const circle::BidirectionalSequenceLSTMOptions *
builtin_options_as_BidirectionalSequenceLSTMOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceLSTMOptions
- ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
}
const circle::BidirectionalSequenceRNNOptions *
builtin_options_as_BidirectionalSequenceRNNOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceRNNOptions
- ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options())
+ : nullptr;
}
const circle::UnidirectionalSequenceLSTMOptions *
builtin_options_as_UnidirectionalSequenceLSTMOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions
- ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
}
const circle::FloorModOptions *builtin_options_as_FloorModOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_FloorModOptions
- ? static_cast<const circle::FloorModOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::FloorModOptions *>(builtin_options())
+ : nullptr;
}
const circle::RangeOptions *builtin_options_as_RangeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_RangeOptions
- ? static_cast<const circle::RangeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::RangeOptions *>(builtin_options())
+ : nullptr;
}
const circle::ResizeNearestNeighborOptions *
builtin_options_as_ResizeNearestNeighborOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ResizeNearestNeighborOptions
- ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options())
+ : nullptr;
}
const circle::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_LeakyReluOptions
- ? static_cast<const circle::LeakyReluOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::LeakyReluOptions *>(builtin_options())
+ : nullptr;
}
const circle::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SquaredDifferenceOptions
- ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options())
+ : nullptr;
}
const circle::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MirrorPadOptions
- ? static_cast<const circle::MirrorPadOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MirrorPadOptions *>(builtin_options())
+ : nullptr;
}
const circle::AbsOptions *builtin_options_as_AbsOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_AbsOptions
- ? static_cast<const circle::AbsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::AbsOptions *>(builtin_options())
+ : nullptr;
}
const circle::SplitVOptions *builtin_options_as_SplitVOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SplitVOptions
- ? static_cast<const circle::SplitVOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SplitVOptions *>(builtin_options())
+ : nullptr;
}
const circle::UniqueOptions *builtin_options_as_UniqueOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_UniqueOptions
- ? static_cast<const circle::UniqueOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::UniqueOptions *>(builtin_options())
+ : nullptr;
}
const circle::ReverseV2Options *builtin_options_as_ReverseV2Options() const
{
return builtin_options_type() == circle::BuiltinOptions_ReverseV2Options
- ? static_cast<const circle::ReverseV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ReverseV2Options *>(builtin_options())
+ : nullptr;
}
const circle::AddNOptions *builtin_options_as_AddNOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_AddNOptions
- ? static_cast<const circle::AddNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::AddNOptions *>(builtin_options())
+ : nullptr;
}
const circle::GatherNdOptions *builtin_options_as_GatherNdOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_GatherNdOptions
- ? static_cast<const circle::GatherNdOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::GatherNdOptions *>(builtin_options())
+ : nullptr;
}
const circle::CosOptions *builtin_options_as_CosOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_CosOptions
- ? static_cast<const circle::CosOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::CosOptions *>(builtin_options())
+ : nullptr;
}
const circle::WhereOptions *builtin_options_as_WhereOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_WhereOptions
- ? static_cast<const circle::WhereOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::WhereOptions *>(builtin_options())
+ : nullptr;
}
const circle::RankOptions *builtin_options_as_RankOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_RankOptions
- ? static_cast<const circle::RankOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::RankOptions *>(builtin_options())
+ : nullptr;
}
const circle::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ReverseSequenceOptions
- ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options())
+ : nullptr;
}
const circle::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MatrixDiagOptions
- ? static_cast<const circle::MatrixDiagOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MatrixDiagOptions *>(builtin_options())
+ : nullptr;
}
const circle::QuantizeOptions *builtin_options_as_QuantizeOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_QuantizeOptions
- ? static_cast<const circle::QuantizeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::QuantizeOptions *>(builtin_options())
+ : nullptr;
}
const circle::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_MatrixSetDiagOptions
- ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options())
+ : nullptr;
}
const circle::HardSwishOptions *builtin_options_as_HardSwishOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_HardSwishOptions
- ? static_cast<const circle::HardSwishOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::HardSwishOptions *>(builtin_options())
+ : nullptr;
}
const circle::IfOptions *builtin_options_as_IfOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_IfOptions
- ? static_cast<const circle::IfOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::IfOptions *>(builtin_options())
+ : nullptr;
}
const circle::WhileOptions *builtin_options_as_WhileOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_WhileOptions
- ? static_cast<const circle::WhileOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::WhileOptions *>(builtin_options())
+ : nullptr;
}
const circle::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DepthToSpaceOptions
- ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options())
+ : nullptr;
}
const circle::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
{
return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV4Options
- ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options())
+ : nullptr;
}
const circle::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
{
return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV5Options
- ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options())
+ : nullptr;
}
const circle::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_ScatterNdOptions
- ? static_cast<const circle::ScatterNdOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::ScatterNdOptions *>(builtin_options())
+ : nullptr;
}
const circle::SelectV2Options *builtin_options_as_SelectV2Options() const
{
return builtin_options_type() == circle::BuiltinOptions_SelectV2Options
- ? static_cast<const circle::SelectV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SelectV2Options *>(builtin_options())
+ : nullptr;
}
const circle::DensifyOptions *builtin_options_as_DensifyOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_DensifyOptions
- ? static_cast<const circle::DensifyOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::DensifyOptions *>(builtin_options())
+ : nullptr;
}
const circle::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_SegmentSumOptions
- ? static_cast<const circle::SegmentSumOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::SegmentSumOptions *>(builtin_options())
+ : nullptr;
}
const circle::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BatchMatMulOptions
- ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
+ : nullptr;
}
const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions
- ? static_cast<const circle::BCQGatherOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BCQGatherOptions *>(builtin_options())
+ : nullptr;
}
const circle::BCQFullyConnectedOptions *builtin_options_as_BCQFullyConnectedOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_BCQFullyConnectedOptions
- ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options())
+ : nullptr;
}
const circle::InstanceNormOptions *builtin_options_as_InstanceNormOptions() const
{
return builtin_options_type() == circle::BuiltinOptions_InstanceNormOptions
- ? static_cast<const circle::InstanceNormOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const circle::InstanceNormOptions *>(builtin_options())
+ : nullptr;
}
const flatbuffers::Vector<uint8_t> *custom_options() const
{
static_cast<int8_t>(custom_options_format), 0);
}
void add_mutating_variable_inputs(
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
{
fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
}
};
inline flatbuffers::Offset<Operator> CreateOperator(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
- circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+ circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
{
OperatorBuilder builder_(_fbb);
builder_.add_intermediates(intermediates);
}
inline flatbuffers::Offset<Operator> CreateOperatorDirect(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
- const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
- circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
- flatbuffers::Offset<void> builtin_options = 0,
- const std::vector<uint8_t> *custom_options = nullptr,
- circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
- const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
- const std::vector<int32_t> *intermediates = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+ const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+ circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+ flatbuffers::Offset<void> builtin_options = 0,
+ const std::vector<uint8_t> *custom_options = nullptr,
+ circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+ const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+ const std::vector<int32_t> *intermediates = nullptr)
{
auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
auto mutating_variable_inputs__ =
- mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+ mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
return circle::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type,
builtin_options, custom_options__, custom_options_format,
const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *operators() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *>(
- VT_OPERATORS);
+ VT_OPERATORS);
}
const flatbuffers::String *name() const
{
fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
}
void add_operators(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators)
{
fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
}
};
inline flatbuffers::Offset<SubGraph> CreateSubGraph(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0,
- circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0,
+ circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
{
SubGraphBuilder builder_(_fbb);
builder_.add_name(name);
}
inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
- flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr,
- const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
- const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr,
- const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr,
+ const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+ const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr,
+ const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
{
auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<circle::Tensor>>(*tensors) : 0;
auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
auto operators__ =
- operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0;
+ operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0;
auto name__ = name ? _fbb.CreateString(name) : 0;
return circle::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__,
data_format);
const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *>(
- VT_OPERATOR_CODES);
+ VT_OPERATOR_CODES);
}
const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *>(
- VT_SUBGRAPHS);
+ VT_SUBGRAPHS);
}
const flatbuffers::String *description() const
{
const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *metadata() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>(
- VT_METADATA);
+ VT_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const
{
flatbuffers::uoffset_t start_;
void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
void add_operator_codes(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
- operator_codes)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+ operator_codes)
{
fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
}
void add_subgraphs(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs)
{
fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
}
fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
}
void add_metadata(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata)
{
fbb_.AddOffset(Model::VT_METADATA, metadata);
}
};
inline flatbuffers::Offset<Model> CreateModel(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
- operator_codes = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0,
- flatbuffers::Offset<flatbuffers::String> description = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+ operator_codes = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0,
+ flatbuffers::Offset<flatbuffers::String> description = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0)
{
ModelBuilder builder_(_fbb);
builder_.add_metadata(metadata);
}
inline flatbuffers::Offset<Model> CreateModelDirect(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr,
- const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr,
- const char *description = nullptr,
- const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
- const std::vector<int32_t> *metadata_buffer = nullptr,
- const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr,
+ const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr,
+ const char *description = nullptr,
+ const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
+ const std::vector<int32_t> *metadata_buffer = nullptr,
+ const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr)
{
auto operator_codes__ =
- operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
- : 0;
+ operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
+ : 0;
auto subgraphs__ =
- subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0;
+ subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0;
auto description__ = description ? _fbb.CreateString(description) : 0;
auto buffers__ = buffers ? _fbb.CreateVector<flatbuffers::Offset<circle::Buffer>>(*buffers) : 0;
auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
auto metadata__ =
- metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
+ metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__,
metadata_buffer__, metadata__);
}
// TODO Support multiple subgraphs
ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept
- : _subgraphs{model->getSubGraphs()}, _compiler{new onert::compiler::Compiler{_subgraphs}}
+ : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique<onert::util::TracingCtx>(
+ _subgraphs.get())},
+ _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}}
{
if (model->allowedToFp16())
{
#include "ir/Graph.h"
#include "ir/Subgraphs.h"
#include "exec/IExecutor.h"
+#include "util/TracingCtx.h"
struct ANeuralNetworksCompilation
{
private:
std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
+ // TODO Refine the ownership of TracingCtx
+ // In case of nnfw API, nnfw_session has ownership of TracingCtx.
+ // In case of nnapi, there is no concept of session and primary model might have the ownership
+ // of TracingCtx.
+ // Since we don't support multiple models yet with nnapi in ONE, let's implement this later
+ // and let's make it work with one model for now.
+ std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
+
std::shared_ptr<onert::compiler::Compiler> _compiler;
std::shared_ptr<onert::exec::ExecutorMap> _executors;
};
#include "util/logging.h"
ANeuralNetworksEvent::ANeuralNetworksEvent(const std::shared_ptr<onert::exec::Execution> &execution)
- : _execution{execution}
+ : _execution{execution}
{
// DO NOTHING
}
const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
const auto shape = (type != nullptr)
- ? NNAPIConvert::getShape(type)
- : _execution->primary_subgraph().operands().at(operand_index).shape();
+ ? NNAPIConvert::getShape(type)
+ : _execution->primary_subgraph().operands().at(operand_index).shape();
// NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
// words, we can assume that io_layout from nnapi always is the same as layout of the used
const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
const auto shape = (type != nullptr)
- ? NNAPIConvert::getShape(type)
- : _execution->primary_subgraph().operands().at(operand_index).shape();
+ ? NNAPIConvert::getShape(type)
+ : _execution->primary_subgraph().operands().at(operand_index).shape();
// ANeuralNetworksExecution::setInput() uses only shape information
ANeuralNetworksOperandType optional_input_type;
const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
const auto shape = (type != nullptr)
- ? NNAPIConvert::getShape(type)
- : _execution->primary_subgraph().operands().at(operand_index).shape();
+ ? NNAPIConvert::getShape(type)
+ : _execution->primary_subgraph().operands().at(operand_index).shape();
// NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
// words, we can assume that io_layout from nnapi always is the same as layout of the used
{
public:
ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors)
- : _execution{std::make_shared<onert::exec::Execution>(executors)}
+ : _execution{std::make_shared<onert::exec::Execution>(executors)}
{
// DO NOTHING
}
// ANeuralNetworksModel
//
ANeuralNetworksModel::ANeuralNetworksModel() noexcept
- : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
+ : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
{
_graph = std::make_shared<onert::ir::Graph>();
}
if (copy)
{
_graph->operands().at(ind).data(
- std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
+ std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
}
else
{
_graph->operands().at(ind).data(
- std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
+ std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
}
}
catch (const std::exception &e)
if (type == ANEURALNETWORKS_FULLY_CONNECTED)
{
const auto &input_operand =
- _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
+ _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
auto &weights_operand =
- _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
+ _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
if (input_operand.typeInfo().type() == onert::ir::DataType::FLOAT32 &&
weights_operand.typeInfo().type() == onert::ir::DataType::QUANT_UINT8_ASYMM)
{
case ANEURALNETWORKS_BOOL:
case ANEURALNETWORKS_TENSOR_BOOL8:
return DataType::BOOL8;
+ case ANEURALNETWORKS_TENSOR_FLOAT16:
+ case ANEURALNETWORKS_FLOAT16:
+ return DataType::FLOAT16;
+ case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL:
+ return DataType::QUANT_INT8_SYMM_PER_CHANNEL;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
+ return DataType::QUANT_INT8_ASYMM;
default:
throw std::runtime_error("Unsupported type");
}
}
OperationFactory::Generator getElementwiseBinaryGenerator(
- const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+ const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
{
return [op_type](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2);
param.arithmetic_type = op_type;
const auto activation_index = OperandIndex{init_param.inputs[2]};
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
return new operation::BinaryArithmetic{inputs, outputs, param};
};
const auto activation_index = OperandIndex{init_param.inputs[6]};
param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
param.kw = getUint32Scalar(operands, kw_index);
param.kh = operands.at(kh_index).asScalar<uint32_t>();
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else // support explicit padding
{
param.kw = getUint32Scalar(operands, kw_index);
param.kh = getUint32Scalar(operands, kh_index);
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
return new operation::Pool2D{inputs, outputs, param};
};
const auto activation_index = OperandIndex{init_param.inputs[7]};
param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
param.multiplier = getUint32Scalar(operands, multiplier_index);
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else
{
param.stride = makeStride(operands, hstride_index, vstride_index);
param.multiplier = getUint32Scalar(operands, multiplier_index);
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
// TODO set dilation
operation::FullyConnected::Param param;
const auto activation_index = OperandIndex{init_param.inputs[3]};
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
param.weights_format = FullyConnectedWeightsFormat::Default;
return new operation::FullyConnected{inputs, outputs, param};
};
_map[ANEURALNETWORKS_CAST] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
// ANEURALNETWORKS_CAST_EX is deprecated
// TODO Remove ANEURALNETWORKS_CAST_EX
const auto activation_index = OperandIndex{init_param.inputs[6]};
param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
param.dilation.width_factor = 1;
param.dilation.height_factor = 1;
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else if (init_param.input_count == 10) // support explicit padding
{
param.dilation.height_factor = 1;
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else if (init_param.input_count == 13) // support dilation
{
param.dilation.height_factor = height_factor;
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
else
{
};
_map[ANEURALNETWORKS_ADD] =
- getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
_map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
_map[ANEURALNETWORKS_REDUCE_SUM] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
// ANEURALNETWORKS_REDUCE_SUM_EX is deprecated
// TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
_map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
_map[ANEURALNETWORKS_SUB] =
- getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
_map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>();
param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar<std::int32_t>();
param.shrink_axis_mask =
- operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
+ operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
return new operation::StridedSlice{inputs, outputs, param};
};
_map[ANEURALNETWORKS_TRANSPOSE] = createSimpleBinaryOp<operation::Transpose>;
_map[ANEURALNETWORKS_MUL] =
- getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
_map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
Operands &operands) {
};
_map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
- onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
+ onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
_map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
- _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
- onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
+ _map[ANEURALNETWORKS_LOGISTIC] =
+ getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
_map[ANEURALNETWORKS_DIV] =
- getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
_map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
_map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
_map[ANEURALNETWORKS_GREATER] =
- getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+ getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
_map[ANEURALNETWORKS_GREATER_EQUAL] =
- getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+ getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
_map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
_map[ANEURALNETWORKS_LESS_EQUAL] =
- getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+ getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
_map[ANEURALNETWORKS_NOT_EQUAL] =
- getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+ getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
_map[ANEURALNETWORKS_EQUAL] =
- getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
+ getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
// ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
};
_map[ANEURALNETWORKS_REDUCE_ALL] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
_map[ANEURALNETWORKS_REDUCE_ANY] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY);
_map[ANEURALNETWORKS_REDUCE_MAX] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
// ANEURALNETWORKS_REDUCE_MAX_EX is deprecated
// TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
- operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
+ _map[ANEURALNETWORKS_LOGICAL_AND] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
// ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
};
_map[ANEURALNETWORKS_RSQRT] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
_map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
_map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
_map[ANEURALNETWORKS_RELU] =
- getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
- onert::ir::operation::ElementwiseActivation::infinity, 0);
+ getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+ onert::ir::operation::ElementwiseActivation::infinity, 0);
_map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
Operands &operands) {
};
_map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
- onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
_map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
- onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
_map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
operation::RNN::Param param;
const auto activation_index = OperandIndex{init_param.inputs[5]};
param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
return new operation::RNN{inputs, outputs, param};
};
_map[ANEURALNETWORKS_FLOOR] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
_map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
Operands &) {
const auto vstride_index = OperandIndex{init_param.inputs[5]};
param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
return new operation::TransposeConv{inputs, outputs, param};
};
_map[ANEURALNETWORKS_SQRT] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
// ANEURALNETWORKS_SQRT_EX is deprecated
// TODO Remove ANEURALNETWORKS_SQRT_EX
_map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
- _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
- operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
+ _map[ANEURALNETWORKS_LOGICAL_OR] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
// ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
};
_map[ANEURALNETWORKS_LOGICAL_NOT] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
// ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
// 2 -> Cell State Out Tensor Index
const OperandIndex scratch_buffer_index;
OperandIndex output_state_index =
- init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex();
+ init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex();
OperandIndex cell_state_index =
- init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex();
+ init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex();
const OperandIndex output_index = OperandIndex{init_param.outputs[0]};
OperandIndexSequence outputs{scratch_buffer_index, output_state_index, cell_state_index,
output_index};
// 1 -> Axis Tensor Index
OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- operation::ArgMax::Param param;
+ operation::ArgMinMax::Param param;
// NNAPI ARGMAX output type is always int32
param.output_type = DataType::INT32;
+ param.is_arg_max = true;
- return new operation::ArgMax{inputs, outputs, param};
+ return new operation::ArgMinMax{inputs, outputs, param};
};
// ANEURALNETWORKS_ARGMAX_EX is deprecated
// TODO Remove ANEURALNETWORKS_ARGMAX_EX
_map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
+ _map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+ operation::ArgMinMax::Param param;
+ // NNAPI ARGMIN output type is always int32
+ param.output_type = DataType::INT32;
+ param.is_arg_max = false;
+
+ return new operation::ArgMinMax{inputs, outputs, param};
+ };
+
_map[ANEURALNETWORKS_DEQUANTIZE] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
_map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
};
_map[ANEURALNETWORKS_REDUCE_MIN] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
// ANEURALNETWORKS_REDUCE_MIN_EX is deprecated
// TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX
_map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
_map[ANEURALNETWORKS_MINIMUM] =
- getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
_map[ANEURALNETWORKS_MAXIMUM] =
- getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
_map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
Operands &operands) {
};
_map[ANEURALNETWORKS_COS_EX] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
_map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
};
_map[ANEURALNETWORKS_REDUCE_PROD] =
- getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
+ getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
_map[ANEURALNETWORKS_ROUND_EX] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
_map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
_map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
_map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
// Each input should be interpreted as follows:
// 0 -> Input Tensor Index
// 1 -> Multiple Tensor Index
};
_map[ANEURALNETWORKS_QUANTIZE] =
- getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
}
Operation *OperationFactory::create(ANeuralNetworksOperationType type,
public:
using Generator =
- std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
+ std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
public:
static OperationFactory &get();
inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
{
static const ActivationFunctionType values[] = {
- ActivationFunctionType_NONE, ActivationFunctionType_RELU,
- ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
- ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
+ ActivationFunctionType_NONE, ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+ ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
return values;
}
inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
{
static const FullyConnectedOptionsWeightsFormat values[] = {
- FullyConnectedOptionsWeightsFormat_DEFAULT,
- FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
+ FullyConnectedOptionsWeightsFormat_DEFAULT,
+ FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
return values;
}
const CustomQuantization *details_as_CustomQuantization() const
{
return details_type() == QuantizationDetails_CustomQuantization
- ? static_cast<const CustomQuantization *>(details())
- : nullptr;
+ ? static_cast<const CustomQuantization *>(details())
+ : nullptr;
}
int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
bool Verify(flatbuffers::Verifier &verifier) const
}
inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
- const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
- const std::vector<int64_t> *zero_point = nullptr,
- QuantizationDetails details_type = QuantizationDetails_NONE,
- flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+ const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+ const std::vector<int64_t> *zero_point = nullptr,
+ QuantizationDetails details_type = QuantizationDetails_NONE,
+ flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
{
return onert_tflite::CreateQuantizationParameters(
- _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
- scale ? _fbb.CreateVector<float>(*scale) : 0,
- zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
- quantized_dimension);
+ _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
+ scale ? _fbb.CreateVector<float>(*scale) : 0,
+ zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
+ quantized_dimension);
}
struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const Int32Vector *array_segments_as_Int32Vector() const
{
return array_segments_type() == SparseIndexVector_Int32Vector
- ? static_cast<const Int32Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const Int32Vector *>(array_segments())
+ : nullptr;
}
const Uint16Vector *array_segments_as_Uint16Vector() const
{
return array_segments_type() == SparseIndexVector_Uint16Vector
- ? static_cast<const Uint16Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const Uint16Vector *>(array_segments())
+ : nullptr;
}
const Uint8Vector *array_segments_as_Uint8Vector() const
{
return array_segments_type() == SparseIndexVector_Uint8Vector
- ? static_cast<const Uint8Vector *>(array_segments())
- : nullptr;
+ ? static_cast<const Uint8Vector *>(array_segments())
+ : nullptr;
}
SparseIndexVector array_indices_type() const
{
const Int32Vector *array_indices_as_Int32Vector() const
{
return array_indices_type() == SparseIndexVector_Int32Vector
- ? static_cast<const Int32Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const Int32Vector *>(array_indices())
+ : nullptr;
}
const Uint16Vector *array_indices_as_Uint16Vector() const
{
return array_indices_type() == SparseIndexVector_Uint16Vector
- ? static_cast<const Uint16Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const Uint16Vector *>(array_indices())
+ : nullptr;
}
const Uint8Vector *array_indices_as_Uint8Vector() const
{
return array_indices_type() == SparseIndexVector_Uint8Vector
- ? static_cast<const Uint8Vector *>(array_indices())
- : nullptr;
+ ? static_cast<const Uint8Vector *>(array_indices())
+ : nullptr;
}
bool Verify(flatbuffers::Verifier &verifier) const
{
const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
- VT_DIM_METADATA);
+ VT_DIM_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const
{
fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
}
void add_dim_metadata(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
{
fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
}
};
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata =
- 0)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = 0)
{
SparsityParametersBuilder builder_(_fbb);
builder_.add_dim_metadata(dim_metadata);
}
inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
- const std::vector<int32_t> *block_map = nullptr,
- const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+ const std::vector<int32_t> *block_map = nullptr,
+ const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
{
return onert_tflite::CreateSparsityParameters(
- _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
- block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
- dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+ _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
+ block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
+ dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
}
struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
}
inline flatbuffers::Offset<Tensor> CreateTensorDirect(
- flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
- TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
- flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
- flatbuffers::Offset<SparsityParameters> sparsity = 0,
- const std::vector<int32_t> *shape_signature = nullptr)
+ flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+ TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
+ flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
+ flatbuffers::Offset<SparsityParameters> sparsity = 0,
+ const std::vector<int32_t> *shape_signature = nullptr)
{
return onert_tflite::CreateTensor(
- _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
- name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
- shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+ _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
+ name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
+ shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
}
struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
};
inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
- flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
- int32_t stride_h = 0, int32_t depth_multiplier = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+ flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
+ int32_t stride_h = 0, int32_t depth_multiplier = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
{
DepthwiseConv2DOptionsBuilder builder_(_fbb);
builder_.add_dilation_h_factor(dilation_h_factor);
fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
}
void add_num_columns_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
{
fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
}
void add_embedding_dim_per_channel(
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
{
fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
embedding_dim_per_channel);
};
inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
{
ConcatEmbeddingsOptionsBuilder builder_(_fbb);
builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
{
return onert_tflite::CreateConcatEmbeddingsOptions(
- _fbb, num_channels,
- num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
- embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+ _fbb, num_channels,
+ num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
+ embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
}
struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
};
inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool asymmetric_quantize_inputs = false)
{
SequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
};
inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
- flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
};
inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
- bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
+ bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
{
FullyConnectedOptionsBuilder builder_(_fbb);
builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
};
inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
- flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+ flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
{
ConcatenationOptionsBuilder builder_(_fbb);
builder_.add_axis(axis);
fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
}
explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
CreateUnidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
- bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+ bool asymmetric_quantize_inputs = false)
{
UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
}
explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
- : fbb_(_fbb)
+ : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
};
inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
- flatbuffers::FlatBufferBuilder &_fbb,
- ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
- float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
- bool time_major = true, bool asymmetric_quantize_inputs = false)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+ float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+ bool time_major = true, bool asymmetric_quantize_inputs = false)
{
BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
builder_.add_proj_clip(proj_clip);
const std::vector<int32_t> *squeeze_dims = nullptr)
{
return onert_tflite::CreateSqueezeOptions(
- _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+ _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
}
struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const char *custom_code = nullptr, int32_t version = 1)
{
return onert_tflite::CreateOperatorCode(
- _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
+ _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
}
struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const Conv2DOptions *builtin_options_as_Conv2DOptions() const
{
return builtin_options_type() == BuiltinOptions_Conv2DOptions
- ? static_cast<const Conv2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const Conv2DOptions *>(builtin_options())
+ : nullptr;
}
const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
{
return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
- ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+ : nullptr;
}
const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
{
return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
- ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+ : nullptr;
}
const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
{
return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
- ? static_cast<const LSHProjectionOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LSHProjectionOptions *>(builtin_options())
+ : nullptr;
}
const Pool2DOptions *builtin_options_as_Pool2DOptions() const
{
return builtin_options_type() == BuiltinOptions_Pool2DOptions
- ? static_cast<const Pool2DOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const Pool2DOptions *>(builtin_options())
+ : nullptr;
}
const SVDFOptions *builtin_options_as_SVDFOptions() const
{
return builtin_options_type() == BuiltinOptions_SVDFOptions
- ? static_cast<const SVDFOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SVDFOptions *>(builtin_options())
+ : nullptr;
}
const RNNOptions *builtin_options_as_RNNOptions() const
{
return builtin_options_type() == BuiltinOptions_RNNOptions
- ? static_cast<const RNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const RNNOptions *>(builtin_options())
+ : nullptr;
}
const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
{
return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
- ? static_cast<const FullyConnectedOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FullyConnectedOptions *>(builtin_options())
+ : nullptr;
}
const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
{
return builtin_options_type() == BuiltinOptions_SoftmaxOptions
- ? static_cast<const SoftmaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SoftmaxOptions *>(builtin_options())
+ : nullptr;
}
const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
{
return builtin_options_type() == BuiltinOptions_ConcatenationOptions
- ? static_cast<const ConcatenationOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ConcatenationOptions *>(builtin_options())
+ : nullptr;
}
const AddOptions *builtin_options_as_AddOptions() const
{
return builtin_options_type() == BuiltinOptions_AddOptions
- ? static_cast<const AddOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const AddOptions *>(builtin_options())
+ : nullptr;
}
const L2NormOptions *builtin_options_as_L2NormOptions() const
{
return builtin_options_type() == BuiltinOptions_L2NormOptions
- ? static_cast<const L2NormOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const L2NormOptions *>(builtin_options())
+ : nullptr;
}
const LocalResponseNormalizationOptions *
builtin_options_as_LocalResponseNormalizationOptions() const
{
return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
- ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
+ : nullptr;
}
const LSTMOptions *builtin_options_as_LSTMOptions() const
{
return builtin_options_type() == BuiltinOptions_LSTMOptions
- ? static_cast<const LSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LSTMOptions *>(builtin_options())
+ : nullptr;
}
const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
{
return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
- ? static_cast<const ResizeBilinearOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+ : nullptr;
}
const CallOptions *builtin_options_as_CallOptions() const
{
return builtin_options_type() == BuiltinOptions_CallOptions
- ? static_cast<const CallOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const CallOptions *>(builtin_options())
+ : nullptr;
}
const ReshapeOptions *builtin_options_as_ReshapeOptions() const
{
return builtin_options_type() == BuiltinOptions_ReshapeOptions
- ? static_cast<const ReshapeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ReshapeOptions *>(builtin_options())
+ : nullptr;
}
const SkipGramOptions *builtin_options_as_SkipGramOptions() const
{
return builtin_options_type() == BuiltinOptions_SkipGramOptions
- ? static_cast<const SkipGramOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SkipGramOptions *>(builtin_options())
+ : nullptr;
}
const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
{
return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
- ? static_cast<const SpaceToDepthOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+ : nullptr;
}
const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
{
return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
- ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
+ : nullptr;
}
const MulOptions *builtin_options_as_MulOptions() const
{
return builtin_options_type() == BuiltinOptions_MulOptions
- ? static_cast<const MulOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MulOptions *>(builtin_options())
+ : nullptr;
}
const PadOptions *builtin_options_as_PadOptions() const
{
return builtin_options_type() == BuiltinOptions_PadOptions
- ? static_cast<const PadOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const PadOptions *>(builtin_options())
+ : nullptr;
}
const GatherOptions *builtin_options_as_GatherOptions() const
{
return builtin_options_type() == BuiltinOptions_GatherOptions
- ? static_cast<const GatherOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const GatherOptions *>(builtin_options())
+ : nullptr;
}
const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
{
return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
- ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+ : nullptr;
}
const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
{
return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
- ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
+ : nullptr;
}
const TransposeOptions *builtin_options_as_TransposeOptions() const
{
return builtin_options_type() == BuiltinOptions_TransposeOptions
- ? static_cast<const TransposeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const TransposeOptions *>(builtin_options())
+ : nullptr;
}
const ReducerOptions *builtin_options_as_ReducerOptions() const
{
return builtin_options_type() == BuiltinOptions_ReducerOptions
- ? static_cast<const ReducerOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ReducerOptions *>(builtin_options())
+ : nullptr;
}
const SubOptions *builtin_options_as_SubOptions() const
{
return builtin_options_type() == BuiltinOptions_SubOptions
- ? static_cast<const SubOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SubOptions *>(builtin_options())
+ : nullptr;
}
const DivOptions *builtin_options_as_DivOptions() const
{
return builtin_options_type() == BuiltinOptions_DivOptions
- ? static_cast<const DivOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DivOptions *>(builtin_options())
+ : nullptr;
}
const SqueezeOptions *builtin_options_as_SqueezeOptions() const
{
return builtin_options_type() == BuiltinOptions_SqueezeOptions
- ? static_cast<const SqueezeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SqueezeOptions *>(builtin_options())
+ : nullptr;
}
const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
{
return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
- ? static_cast<const SequenceRNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SequenceRNNOptions *>(builtin_options())
+ : nullptr;
}
const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
{
return builtin_options_type() == BuiltinOptions_StridedSliceOptions
- ? static_cast<const StridedSliceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const StridedSliceOptions *>(builtin_options())
+ : nullptr;
}
const ExpOptions *builtin_options_as_ExpOptions() const
{
return builtin_options_type() == BuiltinOptions_ExpOptions
- ? static_cast<const ExpOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ExpOptions *>(builtin_options())
+ : nullptr;
}
const TopKV2Options *builtin_options_as_TopKV2Options() const
{
return builtin_options_type() == BuiltinOptions_TopKV2Options
- ? static_cast<const TopKV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const TopKV2Options *>(builtin_options())
+ : nullptr;
}
const SplitOptions *builtin_options_as_SplitOptions() const
{
return builtin_options_type() == BuiltinOptions_SplitOptions
- ? static_cast<const SplitOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SplitOptions *>(builtin_options())
+ : nullptr;
}
const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
{
return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
- ? static_cast<const LogSoftmaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LogSoftmaxOptions *>(builtin_options())
+ : nullptr;
}
const CastOptions *builtin_options_as_CastOptions() const
{
return builtin_options_type() == BuiltinOptions_CastOptions
- ? static_cast<const CastOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const CastOptions *>(builtin_options())
+ : nullptr;
}
const DequantizeOptions *builtin_options_as_DequantizeOptions() const
{
return builtin_options_type() == BuiltinOptions_DequantizeOptions
- ? static_cast<const DequantizeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DequantizeOptions *>(builtin_options())
+ : nullptr;
}
const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
{
return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
- ? static_cast<const MaximumMinimumOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MaximumMinimumOptions *>(builtin_options())
+ : nullptr;
}
const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
{
return builtin_options_type() == BuiltinOptions_ArgMaxOptions
- ? static_cast<const ArgMaxOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ArgMaxOptions *>(builtin_options())
+ : nullptr;
}
const LessOptions *builtin_options_as_LessOptions() const
{
return builtin_options_type() == BuiltinOptions_LessOptions
- ? static_cast<const LessOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LessOptions *>(builtin_options())
+ : nullptr;
}
const NegOptions *builtin_options_as_NegOptions() const
{
return builtin_options_type() == BuiltinOptions_NegOptions
- ? static_cast<const NegOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const NegOptions *>(builtin_options())
+ : nullptr;
}
const PadV2Options *builtin_options_as_PadV2Options() const
{
return builtin_options_type() == BuiltinOptions_PadV2Options
- ? static_cast<const PadV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const PadV2Options *>(builtin_options())
+ : nullptr;
}
const GreaterOptions *builtin_options_as_GreaterOptions() const
{
return builtin_options_type() == BuiltinOptions_GreaterOptions
- ? static_cast<const GreaterOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const GreaterOptions *>(builtin_options())
+ : nullptr;
}
const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
{
return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
- ? static_cast<const GreaterEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const GreaterEqualOptions *>(builtin_options())
+ : nullptr;
}
const LessEqualOptions *builtin_options_as_LessEqualOptions() const
{
return builtin_options_type() == BuiltinOptions_LessEqualOptions
- ? static_cast<const LessEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LessEqualOptions *>(builtin_options())
+ : nullptr;
}
const SelectOptions *builtin_options_as_SelectOptions() const
{
return builtin_options_type() == BuiltinOptions_SelectOptions
- ? static_cast<const SelectOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SelectOptions *>(builtin_options())
+ : nullptr;
}
const SliceOptions *builtin_options_as_SliceOptions() const
{
return builtin_options_type() == BuiltinOptions_SliceOptions
- ? static_cast<const SliceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SliceOptions *>(builtin_options())
+ : nullptr;
}
const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
{
return builtin_options_type() == BuiltinOptions_TransposeConvOptions
- ? static_cast<const TransposeConvOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const TransposeConvOptions *>(builtin_options())
+ : nullptr;
}
const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
{
return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
- ? static_cast<const SparseToDenseOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SparseToDenseOptions *>(builtin_options())
+ : nullptr;
}
const TileOptions *builtin_options_as_TileOptions() const
{
return builtin_options_type() == BuiltinOptions_TileOptions
- ? static_cast<const TileOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const TileOptions *>(builtin_options())
+ : nullptr;
}
const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
{
return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
- ? static_cast<const ExpandDimsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ExpandDimsOptions *>(builtin_options())
+ : nullptr;
}
const EqualOptions *builtin_options_as_EqualOptions() const
{
return builtin_options_type() == BuiltinOptions_EqualOptions
- ? static_cast<const EqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const EqualOptions *>(builtin_options())
+ : nullptr;
}
const NotEqualOptions *builtin_options_as_NotEqualOptions() const
{
return builtin_options_type() == BuiltinOptions_NotEqualOptions
- ? static_cast<const NotEqualOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const NotEqualOptions *>(builtin_options())
+ : nullptr;
}
const ShapeOptions *builtin_options_as_ShapeOptions() const
{
return builtin_options_type() == BuiltinOptions_ShapeOptions
- ? static_cast<const ShapeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ShapeOptions *>(builtin_options())
+ : nullptr;
}
const PowOptions *builtin_options_as_PowOptions() const
{
return builtin_options_type() == BuiltinOptions_PowOptions
- ? static_cast<const PowOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const PowOptions *>(builtin_options())
+ : nullptr;
}
const ArgMinOptions *builtin_options_as_ArgMinOptions() const
{
return builtin_options_type() == BuiltinOptions_ArgMinOptions
- ? static_cast<const ArgMinOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ArgMinOptions *>(builtin_options())
+ : nullptr;
}
const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
{
return builtin_options_type() == BuiltinOptions_FakeQuantOptions
- ? static_cast<const FakeQuantOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FakeQuantOptions *>(builtin_options())
+ : nullptr;
}
const PackOptions *builtin_options_as_PackOptions() const
{
return builtin_options_type() == BuiltinOptions_PackOptions
- ? static_cast<const PackOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const PackOptions *>(builtin_options())
+ : nullptr;
}
const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
{
return builtin_options_type() == BuiltinOptions_LogicalOrOptions
- ? static_cast<const LogicalOrOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LogicalOrOptions *>(builtin_options())
+ : nullptr;
}
const OneHotOptions *builtin_options_as_OneHotOptions() const
{
return builtin_options_type() == BuiltinOptions_OneHotOptions
- ? static_cast<const OneHotOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const OneHotOptions *>(builtin_options())
+ : nullptr;
}
const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
{
return builtin_options_type() == BuiltinOptions_LogicalAndOptions
- ? static_cast<const LogicalAndOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LogicalAndOptions *>(builtin_options())
+ : nullptr;
}
const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
{
return builtin_options_type() == BuiltinOptions_LogicalNotOptions
- ? static_cast<const LogicalNotOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LogicalNotOptions *>(builtin_options())
+ : nullptr;
}
const UnpackOptions *builtin_options_as_UnpackOptions() const
{
return builtin_options_type() == BuiltinOptions_UnpackOptions
- ? static_cast<const UnpackOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const UnpackOptions *>(builtin_options())
+ : nullptr;
}
const FloorDivOptions *builtin_options_as_FloorDivOptions() const
{
return builtin_options_type() == BuiltinOptions_FloorDivOptions
- ? static_cast<const FloorDivOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FloorDivOptions *>(builtin_options())
+ : nullptr;
}
const SquareOptions *builtin_options_as_SquareOptions() const
{
return builtin_options_type() == BuiltinOptions_SquareOptions
- ? static_cast<const SquareOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SquareOptions *>(builtin_options())
+ : nullptr;
}
const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
{
return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
- ? static_cast<const ZerosLikeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ZerosLikeOptions *>(builtin_options())
+ : nullptr;
}
const FillOptions *builtin_options_as_FillOptions() const
{
return builtin_options_type() == BuiltinOptions_FillOptions
- ? static_cast<const FillOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FillOptions *>(builtin_options())
+ : nullptr;
}
const BidirectionalSequenceLSTMOptions *
builtin_options_as_BidirectionalSequenceLSTMOptions() const
{
return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
- ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
}
const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
{
return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
- ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
+ : nullptr;
}
const UnidirectionalSequenceLSTMOptions *
builtin_options_as_UnidirectionalSequenceLSTMOptions() const
{
return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
- ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
+ : nullptr;
}
const FloorModOptions *builtin_options_as_FloorModOptions() const
{
return builtin_options_type() == BuiltinOptions_FloorModOptions
- ? static_cast<const FloorModOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const FloorModOptions *>(builtin_options())
+ : nullptr;
}
const RangeOptions *builtin_options_as_RangeOptions() const
{
return builtin_options_type() == BuiltinOptions_RangeOptions
- ? static_cast<const RangeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const RangeOptions *>(builtin_options())
+ : nullptr;
}
const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
{
return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
- ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
+ : nullptr;
}
const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
{
return builtin_options_type() == BuiltinOptions_LeakyReluOptions
- ? static_cast<const LeakyReluOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const LeakyReluOptions *>(builtin_options())
+ : nullptr;
}
const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
{
return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
- ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
+ : nullptr;
}
const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
{
return builtin_options_type() == BuiltinOptions_MirrorPadOptions
- ? static_cast<const MirrorPadOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MirrorPadOptions *>(builtin_options())
+ : nullptr;
}
const AbsOptions *builtin_options_as_AbsOptions() const
{
return builtin_options_type() == BuiltinOptions_AbsOptions
- ? static_cast<const AbsOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const AbsOptions *>(builtin_options())
+ : nullptr;
}
const SplitVOptions *builtin_options_as_SplitVOptions() const
{
return builtin_options_type() == BuiltinOptions_SplitVOptions
- ? static_cast<const SplitVOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SplitVOptions *>(builtin_options())
+ : nullptr;
}
const UniqueOptions *builtin_options_as_UniqueOptions() const
{
return builtin_options_type() == BuiltinOptions_UniqueOptions
- ? static_cast<const UniqueOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const UniqueOptions *>(builtin_options())
+ : nullptr;
}
const ReverseV2Options *builtin_options_as_ReverseV2Options() const
{
return builtin_options_type() == BuiltinOptions_ReverseV2Options
- ? static_cast<const ReverseV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const ReverseV2Options *>(builtin_options())
+ : nullptr;
}
const AddNOptions *builtin_options_as_AddNOptions() const
{
return builtin_options_type() == BuiltinOptions_AddNOptions
- ? static_cast<const AddNOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const AddNOptions *>(builtin_options())
+ : nullptr;
}
const GatherNdOptions *builtin_options_as_GatherNdOptions() const
{
return builtin_options_type() == BuiltinOptions_GatherNdOptions
- ? static_cast<const GatherNdOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const GatherNdOptions *>(builtin_options())
+ : nullptr;
}
const CosOptions *builtin_options_as_CosOptions() const
{
return builtin_options_type() == BuiltinOptions_CosOptions
- ? static_cast<const CosOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const CosOptions *>(builtin_options())
+ : nullptr;
}
const WhereOptions *builtin_options_as_WhereOptions() const
{
return builtin_options_type() == BuiltinOptions_WhereOptions
- ? static_cast<const WhereOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const WhereOptions *>(builtin_options())
+ : nullptr;
}
const RankOptions *builtin_options_as_RankOptions() const
{
return builtin_options_type() == BuiltinOptions_RankOptions
- ? static_cast<const RankOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const RankOptions *>(builtin_options())
+ : nullptr;
}
const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
{
return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
- ? static_cast<const ReverseSequenceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ReverseSequenceOptions *>(builtin_options())
+ : nullptr;
}
const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
{
return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
- ? static_cast<const MatrixDiagOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MatrixDiagOptions *>(builtin_options())
+ : nullptr;
}
const QuantizeOptions *builtin_options_as_QuantizeOptions() const
{
return builtin_options_type() == BuiltinOptions_QuantizeOptions
- ? static_cast<const QuantizeOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const QuantizeOptions *>(builtin_options())
+ : nullptr;
}
const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
{
return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
- ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
+ : nullptr;
}
const HardSwishOptions *builtin_options_as_HardSwishOptions() const
{
return builtin_options_type() == BuiltinOptions_HardSwishOptions
- ? static_cast<const HardSwishOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const HardSwishOptions *>(builtin_options())
+ : nullptr;
}
const IfOptions *builtin_options_as_IfOptions() const
{
return builtin_options_type() == BuiltinOptions_IfOptions
- ? static_cast<const IfOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const IfOptions *>(builtin_options())
+ : nullptr;
}
const WhileOptions *builtin_options_as_WhileOptions() const
{
return builtin_options_type() == BuiltinOptions_WhileOptions
- ? static_cast<const WhileOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const WhileOptions *>(builtin_options())
+ : nullptr;
}
const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
{
return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
- ? static_cast<const DepthToSpaceOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DepthToSpaceOptions *>(builtin_options())
+ : nullptr;
}
const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
{
return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
- ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
+ : nullptr;
}
const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
{
return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
- ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
+ : nullptr;
}
const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
{
return builtin_options_type() == BuiltinOptions_ScatterNdOptions
- ? static_cast<const ScatterNdOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const ScatterNdOptions *>(builtin_options())
+ : nullptr;
}
const SelectV2Options *builtin_options_as_SelectV2Options() const
{
return builtin_options_type() == BuiltinOptions_SelectV2Options
- ? static_cast<const SelectV2Options *>(builtin_options())
- : nullptr;
+ ? static_cast<const SelectV2Options *>(builtin_options())
+ : nullptr;
}
const DensifyOptions *builtin_options_as_DensifyOptions() const
{
return builtin_options_type() == BuiltinOptions_DensifyOptions
- ? static_cast<const DensifyOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const DensifyOptions *>(builtin_options())
+ : nullptr;
}
const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
{
return builtin_options_type() == BuiltinOptions_SegmentSumOptions
- ? static_cast<const SegmentSumOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const SegmentSumOptions *>(builtin_options())
+ : nullptr;
}
const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
{
return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
- ? static_cast<const BatchMatMulOptions *>(builtin_options())
- : nullptr;
+ ? static_cast<const BatchMatMulOptions *>(builtin_options())
+ : nullptr;
}
const flatbuffers::Vector<uint8_t> *custom_options() const
{
static_cast<int8_t>(custom_options_format), 0);
}
void add_mutating_variable_inputs(
- flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+ flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
{
fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
}
const std::vector<int32_t> *intermediates = nullptr)
{
return onert_tflite::CreateOperator(
- _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
- custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
- mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
- intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+ _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+ outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
+ custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
+ mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
+ intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
}
struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
};
inline flatbuffers::Offset<SubGraph> CreateSubGraph(
- flatbuffers::FlatBufferBuilder &_fbb,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
- flatbuffers::Offset<flatbuffers::String> name = 0)
+ flatbuffers::FlatBufferBuilder &_fbb,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
+ flatbuffers::Offset<flatbuffers::String> name = 0)
{
SubGraphBuilder builder_(_fbb);
builder_.add_name(name);
return builder_.Finish();
}
-inline flatbuffers::Offset<SubGraph>
-CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
- const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
- const std::vector<int32_t> *inputs = nullptr,
- const std::vector<int32_t> *outputs = nullptr,
- const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
- const char *name = nullptr)
+inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
+ flatbuffers::FlatBufferBuilder &_fbb,
+ const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
+ const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+ const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, const char *name = nullptr)
{
return onert_tflite::CreateSubGraph(
- _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
- inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
- outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
- operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
- name ? _fbb.CreateString(name) : 0);
+ _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
+ inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+ outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
+ operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
+ name ? _fbb.CreateString(name) : 0);
}
struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
{
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
- VT_OPERATOR_CODES);
+ VT_OPERATOR_CODES);
}
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
{
flatbuffers::uoffset_t start_;
void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
void add_operator_codes(
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
{
fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
}
};
inline flatbuffers::Offset<Model> CreateModel(
- flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
- flatbuffers::Offset<flatbuffers::String> description = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
- flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+ flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
+ flatbuffers::Offset<flatbuffers::String> description = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
{
ModelBuilder builder_(_fbb);
builder_.add_metadata(metadata);
const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
{
return onert_tflite::CreateModel(
- _fbb, version,
- operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
- subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
- description ? _fbb.CreateString(description) : 0,
- buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
- metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
- metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+ _fbb, version,
+ operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
+ subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
+ description ? _fbb.CreateString(description) : 0,
+ buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
+ metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
+ metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
}
inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
--- /dev/null
+../../../.clang-format.8
\ No newline at end of file
--- /dev/null
+../../../.clang-format.8
\ No newline at end of file
std::unique_ptr<BackendContext>
newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
{
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+ return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
}
};
std::unique_ptr<BackendContext>
newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
{
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+ return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
}
};
std::unique_ptr<BackendContext>
newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
{
- return std::unique_ptr<BackendContext>(
- new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+ return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
}
};
for (auto &backend : backends)
setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
}
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
// Set permute time from one backend to another. This method is needed since ExecutionTime has only
setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
}
}
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
//
//
// SetUp/TearDown methods runs before/after each test and performs actions common for each test
-class SchedulerTest : public ::testing::Test
+class HESchedulerTest : public ::testing::Test
{
protected:
void SetUp() override
std::string _original_profiling_mode;
};
-class SchedulerTestWithExecutorParam : public SchedulerTest,
- public testing::WithParamInterface<std::string>
+class HESchedulerTestWithExecutorParam : public HESchedulerTest,
+ public testing::WithParamInterface<std::string>
{
};
//
// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
-TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
+TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
{
setExecutor(GetParam());
setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
}
// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
-TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
+TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
{
const int64_t NPU_ET = 5000;
setExecutor(GetParam());
auto graph(createBranchedGraph());
subgs.push(ir::SubgraphIndex{0}, graph);
OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
- sub_op_idx(5);
+ sub_op_idx(5);
// Set default execution and transfer time
setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
if (GetParam() == PARALLEL)
{
branch1_expected_backend =
- br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
+ br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
}
* branching or scheduler assigns another backend to a node*/
setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam,
+INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
testing::Values(LINEAR, DATAFLOW, PARALLEL));
// Test scheduler behavior for branched graph and enabled profiling mode
-TEST_F(SchedulerTest, branched_graph_profiling_mode)
+TEST_F(HESchedulerTest, branched_graph_profiling_mode)
{
const int ET = 1e5;
auto graph(createBranchedGraph());
subgs.push(ir::SubgraphIndex{0}, graph);
OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
- sub_op_idx(5);
+ sub_op_idx(5);
// Test 1
// Expected behaviour: scheduler assigns backends to nodes with unknown execution time
setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
// Test scheduler
auto backend_contexts = buildBackendContexts(*graph);
#include "compiler/Compiler.h"
#include "exec/Execution.h"
#include "ir/operation/BinaryArithmetic.h"
+#include "util/TracingCtx.h"
namespace
{
auto operand_rhs2 = graph->addOperand(shape, type);
auto operand_result2 = graph->addOperand(shape, type);
graph->operands()
- .at(operand_rhs2)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
// 2nd add operations (result2 <= result1 + rhs2)
operation::BinaryArithmetic::Param param1;
param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
operation::BinaryArithmetic::Param param2;
param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param2.activation = Activation::NONE;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
graph->addInput(operand_lhs);
graph->addInput(operand_rhs1);
// Compile
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, graph);
- onert::compiler::Compiler compiler{subgs};
+ tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
+ onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
executors = compiler.compile();
}
public:
std::shared_ptr<Graph> graph;
std::shared_ptr<onert::exec::ExecutorMap> executors;
+ std::unique_ptr<onert::util::TracingCtx> tracing_ctx;
};
TEST(ExecInstance, simple)
// Make new executor: compile again
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, graph);
- onert::compiler::Compiler compiler{subgs};
+ auto tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
+ onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
onert::exec::Execution execution2{executors2};
public:
Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
std::shared_ptr<onert::exec::ExecutorMap> &executors)
- : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
+ : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
{
// DO NOTHING
}
et.updateOperationExecTime(b, "op1", true, 100, 100);
et.updateOperationExecTime(b, "op1", true, 200, 200);
et.updateOperationExecTime(b, "op1", false, 100, 888);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
{
ExecTime et(bs);
ASSERT_EQ(time, 150);
time = et.getOperationExecTime(b, "op1", false, 100);
ASSERT_EQ(time, 888);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
// clean up
EXPECT_EQ(remove("exec_time.json"), 0);
ExecTime et(bs);
et.updateOperationExecTime(b, "op1", true, 100, 100);
et.updateOperationExecTime(b, "op1", true, 200, 200);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
{
ExecTime et(bs);
// Check interpolation
time = et.getOperationExecTime(b, "op1", true, 200);
ASSERT_EQ(time, 200);
- et.uploadOperationsExecTime();
+ et.storeOperationsExecTime();
}
// clean up
EXPECT_EQ(remove("exec_time.json"), 0);
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
_graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
_executors = std::make_shared<ExecutorMap>();
_executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
}
void CreateTwoStepModel()
auto operand_rhs2 = _graph->addOperand(shape, type);
auto operand_result2 = _graph->addOperand(shape, type);
_graph->operands()
- .at(operand_rhs2)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
// 2nd add operations (result2 <= result1 + rhs2)
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
_graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
operation::BinaryArithmetic::Param param2;
param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
_graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
_executors = std::make_shared<ExecutorMap>();
_executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
}
void CreateUnspecifiedDimensionsModel()
auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
_graph->operands()
- .at(operand_activation)
- .data(
- std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
+ .at(operand_activation)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
auto operand_result = _graph->addOperand(shape, type);
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
_graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
_executors = std::make_shared<ExecutorMap>();
_executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+ std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
}
void createExecution() { _execution = std::make_unique<Execution>(_executors); }
public:
SimpleMock(const onert::ir::OperandIndexSequence &inputs,
const onert::ir::OperandIndexSequence &outputs)
- : Operation{onert::ir::OperandConstraint::createAny()}
+ : Operation{onert::ir::OperandConstraint::createAny()}
{
setInputs(inputs);
setOutputs(outputs);
// MockNode1
auto operand_index1 = graph.addOperand(shape, type);
auto mocknode_index1 =
- graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
// MockNode2
auto operand_index2 = graph.addOperand(shape, type);
auto mocknode_index2 =
- graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
// MockNode3(two input)
auto multiinput_index = graph.addOperation(
- std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+ std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
graph.finishBuilding();
Padding padding{PaddingType::SAME};
operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
Padding padding{PaddingType::VALID};
operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
Padding padding{4, 3, 2, 1};
operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
Padding padding{PaddingType::SAME};
operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
std::runtime_error);
}
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
param =
- operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+ operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
Activation::NONE, Dilation{1, 1}};
auto infered_out_shape =
- onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+ onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
ASSERT_EQ(in_shape.rank(), perm.size());
ASSERT_EQ(expected.rank(), perm.size());
auto inferred_out_shape =
- onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+ onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
// post-conditions
ASSERT_EQ(inferred_out_shape.rank(), perm.size());
for (int32_t dim = 0; dim < expected.rank(); dim++)
{
auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
Shape &expected) {
- auto actual = onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape,
- cluster.data());
+ auto actual =
+ onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
ASSERT_EQ(actual.rank(), expected.rank());
for (int32_t dim = 0; dim < expected.rank(); dim++)
--- /dev/null
+../.clang-format.8
\ No newline at end of file
template <typename InIter1, typename InIter2>
static auto findMaxDifference(InIter1 first1, InIter1 last1, InIter2 first2)
- -> decltype(*first1 - *first2)
+ -> decltype(*first1 - *first2)
{
auto max_difference = std::abs(*first1 - *first2);
for (; first1 != last1; ++first1, ++first2)
const float tolerance = 0.01f;
auto max_difference =
- findMaxDifference(outputs[0].begin(), outputs[0].end(), std::begin(ref_data));
+ findMaxDifference(outputs[0].begin(), outputs[0].end(), std::begin(ref_data));
int exit_code = 0;
if (max_difference > tolerance)
return()
endif(NOT BUILD_ONERT)
+# GCC Compiler under 6.2 is not support this test build
+if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2)
+ return()
+endif()
+
+if (ANDROID_BOOST_ROOT)
+ set(BOOST_ROOT ${ANDROID_BOOST_ROOT})
+endif (ANDROID_BOOST_ROOT)
+
+nnfw_find_package(Boost REQUIRED)
nnfw_find_package(GTest)
set(RUNTIME_NNAPI_TEST_SRC_INC ${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src)
target_include_directories(${RUNTIME_NNAPI_TEST} PRIVATE ${RUNTIME_NNAPI_TEST_SRC_INC})
+target_include_directories(${RUNTIME_NNAPI_TEST} PRIVATE ${Boost_INCLUDE_DIRS})
# Define NNTEST_ONLY_PUBLIC_API to avoid android dependency
target_compile_definitions(${RUNTIME_NNAPI_TEST} PRIVATE NNTEST_ONLY_PUBLIC_API)
--- /dev/null
+GeneratedTests.abs_
+GeneratedTests.abs_dynamic_nnfw
+GeneratedTests.add_dynamic_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
+GeneratedTests.argmax_dynamic_nnfw
+GeneratedTests.batch_matmul_ex_dynamic_nnfw
+GeneratedTests.batch_matmul_ex_float_adj_x
+GeneratedTests.batch_matmul_ex_float_adj_y
+GeneratedTests.batch_matmul_ex_float_batch2
+GeneratedTests.batch_matmul_ex_float_broadcast
+GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy
+GeneratedTests.batch_matmul_ex_float_broadcast_adj_x
+GeneratedTests.batch_matmul_ex_float_simple
+GeneratedTests.broadcast_to_ex_1D_nnfw
+GeneratedTests.broadcast_to_ex_2D_nnfw
+GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw
+GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw
+GeneratedTests.cast_dynamic_float32_to_int32_nnfw
+GeneratedTests.cast_float16_to_float16
+GeneratedTests.cast_float16_to_float32
+GeneratedTests.cast_float16_to_float32_relaxed
+GeneratedTests.cast_float16_to_int32
+GeneratedTests.cast_float16_to_quant8
+GeneratedTests.cast_float16_to_quant8_overflow
+GeneratedTests.cast_float32_to_float16
+GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_float32_to_int32_nnfw
+GeneratedTests.cast_int32_to_float16
+GeneratedTests.cast_quant8_to_float16
+GeneratedTests.concat_dynamic_nnfw
+GeneratedTests.conv_dynamic_nnfw
+GeneratedTests.conv_float_channels_weights_as_inputs
+GeneratedTests.conv_float_channels_weights_as_inputs_relaxed
+GeneratedTests.conv_float_large_weights_as_inputs
+GeneratedTests.conv_float_large_weights_as_inputs_relaxed
+GeneratedTests.conv_float_weights_as_inputs
+GeneratedTests.conv_float_weights_as_inputs_relaxed
+GeneratedTests.conv_quant8_channels_weights_as_inputs
+GeneratedTests.conv_quant8_large_weights_as_inputs
+GeneratedTests.conv_quant8_overflow_weights_as_inputs
+GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
+GeneratedTests.cos_ex_1D_float_nnfw
+GeneratedTests.cos_ex_4D_float_nnfw
+GeneratedTests.cos_ex_dynamic_nnfw
+GeneratedTests.dequantize_v1_2_3d_quant8_symm
+GeneratedTests.dequantize_v1_2_4d_quant8_symm
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
+GeneratedTests.div_dynamic_nnfw
+GeneratedTests.einsum_ex_float_matmul_2x2_2
+GeneratedTests.einsum_ex_float_matmul_3x2_3
+GeneratedTests.einsum_ex_float_matmul_3x3_4
+GeneratedTests.einsum_ex_float_matmul_4x4_4
+GeneratedTests.einsum_ex_float_matmul_4x4_4_2
+GeneratedTests.equal_dynamic_float_nnfw
+GeneratedTests.exp_
+GeneratedTests.exp_dynamic_nnfw
+GeneratedTests.expand_dims_dynamic_nnfw_1
+GeneratedTests.expand_dims_dynamic_nnfw_2
+GeneratedTests.fill_ex_1D_float
+GeneratedTests.fill_ex_4D_float
+GeneratedTests.fill_ex_dynamic_nnfw
+GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_float_2_weights_as_inputs
+GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
+GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
+GeneratedTests.gather_dynamic_nnfw
+GeneratedTests.gather_float16
+GeneratedTests.gather_float16_2
+GeneratedTests.gather_float16_3
+GeneratedTests.gather_float16_4
+GeneratedTests.gather_float16_5
+GeneratedTests.gather_float16_6
+GeneratedTests.gather_float16_7
+GeneratedTests.gather_float16_8
+GeneratedTests.greater_dynamic_float_nnfw
+GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
+GeneratedTests.less_dynamic_float_nnfw
+GeneratedTests.less_equal_dynamic_float_nnfw
+GeneratedTests.log_4D_float_nnfw
+GeneratedTests.log_dynamic_nnfw
+GeneratedTests.log_softmax_nnfw
+GeneratedTests.log_softmax_nnfw_2
+GeneratedTests.log_softmax_nnfw_3
+GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
+GeneratedTests.logical_not
+GeneratedTests.logical_not_1D_nnfw
+GeneratedTests.logical_not_4D_nnfw
+GeneratedTests.logical_not_dynamic_nnfw
+GeneratedTests.logical_or_broadcast
+GeneratedTests.logical_or_dynamic_nnfw
+GeneratedTests.logistic_dynamic_nnfw
+GeneratedTests.lsh_projection
+GeneratedTests.lsh_projection_2
+GeneratedTests.lsh_projection_weights_as_inputs
+GeneratedTests.lstm
+GeneratedTests.lstm2
+GeneratedTests.lstm2_state
+GeneratedTests.lstm2_state2
+GeneratedTests.lstm3
+GeneratedTests.lstm3_state
+GeneratedTests.lstm3_state2
+GeneratedTests.lstm3_state3
+GeneratedTests.lstm_state
+GeneratedTests.lstm_state2
+GeneratedTests.matrix_band_part_ex_4D_float
+GeneratedTests.matrix_band_part_ex_dynamic_nnfw
+GeneratedTests.maximum_dynamic_nnfw
+GeneratedTests.minimum_dynamic_nnfw
+GeneratedTests.minimum_int32
+GeneratedTests.mul_dynamic_nnfw
+GeneratedTests.neg
+GeneratedTests.neg_dynamic_nnfw
+GeneratedTests.not_equal_dynamic_float_nnfw
+GeneratedTests.one_hot_ex_dynamic_nnfw
+GeneratedTests.pack_ex_dynamic_nnfw
+GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
+GeneratedTests.pow_2D_float_nnfw
+GeneratedTests.pow_broadcast_float_nnfw
+GeneratedTests.pow_broadcast_float_nnfw_2
+GeneratedTests.pow_broadcast_float_nnfw_3
+GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
+GeneratedTests.range_ex_float_1
+GeneratedTests.range_ex_float_1_all_constant_inputs
+GeneratedTests.range_ex_float_1_dynamic_nnfw
+GeneratedTests.range_ex_float_2
+GeneratedTests.range_ex_float_2_dynamic_nnfw
+GeneratedTests.reduce_all
+GeneratedTests.reduce_all_2
+GeneratedTests.reduce_all_2D_nnfw
+GeneratedTests.reduce_all_3
+GeneratedTests.reduce_all_4D_nnfw
+GeneratedTests.reduce_all_dynamic_nnfw
+GeneratedTests.reduce_any
+GeneratedTests.reduce_any_2
+GeneratedTests.reduce_any_2D_nnfw
+GeneratedTests.reduce_any_3
+GeneratedTests.reduce_any_4D_nnfw
+GeneratedTests.reduce_mean_dynamic_1_nnfw
+GeneratedTests.reduce_mean_dynamic_2_nnfw
+GeneratedTests.reduce_min_dynamic_nnfw
+GeneratedTests.reduce_prod
+GeneratedTests.reduce_prod_2
+GeneratedTests.reduce_prod_2D_float_nnfw
+GeneratedTests.reduce_prod_3
+GeneratedTests.reduce_prod_4
+GeneratedTests.reduce_prod_4D_float_nnfw
+GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw
+GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw
+GeneratedTests.reduce_prod_dynamic_1_nnfw
+GeneratedTests.reduce_prod_dynamic_2_nnfw
+GeneratedTests.reduce_sum_dynamic_1_nnfw
+GeneratedTests.reduce_sum_dynamic_2_nnfw
+GeneratedTests.reshape_dynamic_nnfw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
+GeneratedTests.reverse_ex_1d
+GeneratedTests.reverse_ex_3d
+GeneratedTests.reverse_ex_dynamic_1D
+GeneratedTests.reverse_ex_dynamic_3D
+GeneratedTests.rnn
+GeneratedTests.rnn_state
+GeneratedTests.round_ex_1D_float
+GeneratedTests.round_ex_4D_float
+GeneratedTests.round_ex_dynamic_nnfw
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_dynamic_nnfw
+GeneratedTests.select_v1_2_five_dim
+GeneratedTests.select_v1_2_five_dim_quant8
+GeneratedTests.select_v1_2_one_dim
+GeneratedTests.select_v1_2_one_dim_quant8
+GeneratedTests.select_v1_2_two_dim
+GeneratedTests.select_v1_2_two_dim_quant8
+GeneratedTests.select_v2_ex_broadcast_1d_single_value
+GeneratedTests.select_v2_ex_broadcast_2d_one
+GeneratedTests.select_v2_ex_broadcast_2d_two
+GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw
+GeneratedTests.select_v2_ex_broadcast_less_4d
+GeneratedTests.select_v2_ex_float
+GeneratedTests.shape_ex_dynamic_nnfw
+GeneratedTests.sin_1D_float_nnfw
+GeneratedTests.sin_4D_float_nnfw
+GeneratedTests.sin_dynamic_nnfw
+GeneratedTests.slice
+GeneratedTests.slice_2
+GeneratedTests.slice_3
+GeneratedTests.slice_4
+GeneratedTests.slice_5
+GeneratedTests.slice_6
+GeneratedTests.slice_7
+GeneratedTests.slice_8
+GeneratedTests.slice_dynamic_nnfw
+GeneratedTests.slice_zero_sized
+GeneratedTests.slice_zero_sized_quant8
+GeneratedTests.softmax_dynamic_nnfw
+GeneratedTests.space_to_batch_dynamic_float_nnfw
+GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_float_5_axis_as_input_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
+GeneratedTests.sqrt_
+GeneratedTests.squared_difference_ex_dynamic_nnfw
+GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
+GeneratedTests.strided_slice_dynamic_nnfw
+GeneratedTests.sub_dynamic_nnfw
+GeneratedTests.sub_v1_2_zero_sized
+GeneratedTests.sub_v1_2_zero_sized_quant8
+GeneratedTests.svdf
+GeneratedTests.svdf2
+GeneratedTests.svdf_bias_present
+GeneratedTests.svdf_state
+GeneratedTests.tanh_v1_2
+GeneratedTests.tanh_v1_2_zero_sized
+GeneratedTests.tanh_v1_2_zero_sized_quant8
+GeneratedTests.tanh_v1_dynamic_nnfw
+GeneratedTests.tile_1
+GeneratedTests.tile_1_dynamic_float32_nnfw
+GeneratedTests.tile_1_float16
+GeneratedTests.tile_1_quant8
+GeneratedTests.tile_2
+GeneratedTests.tile_2_dynamic_float32_nnfw
+GeneratedTests.tile_2_float16
+GeneratedTests.tile_2_int32
+GeneratedTests.tile_2_quant8
+GeneratedTests.tile_3
+GeneratedTests.tile_3_dynamic_float32_nnfw
+GeneratedTests.tile_3_float16
+GeneratedTests.tile_3_int32
+GeneratedTests.tile_3_quant8
+GeneratedTests.transpose_dynamic_nnfw
+GeneratedTests.transpose_float_1_perms_as_input_nnfw
+GeneratedTests.transpose_v1_2_zero_sized
+GeneratedTests.transpose_v1_2_zero_sized_quant8
+GeneratedTests.unidirectional_sequence_lstm_1step
+GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
+GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
+GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
+GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
+GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
+GeneratedTests.unpack_ex_dynamic_nnfw
+GeneratedTests.zeros_like_ex_2D_float
+GeneratedTests.zeros_like_ex_4D_int32
+GeneratedTests.zeros_like_ex_dynamic_float32
--- /dev/null
+GeneratedTests.abs_
+GeneratedTests.abs_dynamic_nnfw
+GeneratedTests.add_dynamic_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw
+GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
+GeneratedTests.argmax_dynamic_nnfw
+GeneratedTests.batch_matmul_ex_dynamic_nnfw
+GeneratedTests.batch_matmul_ex_float_adj_x
+GeneratedTests.batch_matmul_ex_float_adj_y
+GeneratedTests.batch_matmul_ex_float_batch2
+GeneratedTests.batch_matmul_ex_float_broadcast
+GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy
+GeneratedTests.batch_matmul_ex_float_broadcast_adj_x
+GeneratedTests.batch_matmul_ex_float_simple
+GeneratedTests.broadcast_to_ex_1D_nnfw
+GeneratedTests.broadcast_to_ex_2D_nnfw
+GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw
+GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw
+GeneratedTests.cast_dynamic_float32_to_int32_nnfw
+GeneratedTests.cast_float16_to_float16
+GeneratedTests.cast_float16_to_float32
+GeneratedTests.cast_float16_to_float32_relaxed
+GeneratedTests.cast_float16_to_int32
+GeneratedTests.cast_float16_to_quant8
+GeneratedTests.cast_float16_to_quant8_overflow
+GeneratedTests.cast_float32_to_float16
+GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_int32_to_float16
+GeneratedTests.cast_quant8_to_float16
+GeneratedTests.concat_dynamic_nnfw
+GeneratedTests.conv_dynamic_nnfw
+GeneratedTests.conv_float_channels_weights_as_inputs
+GeneratedTests.conv_float_channels_weights_as_inputs_relaxed
+GeneratedTests.conv_float_large_weights_as_inputs
+GeneratedTests.conv_float_large_weights_as_inputs_relaxed
+GeneratedTests.conv_float_weights_as_inputs
+GeneratedTests.conv_float_weights_as_inputs_relaxed
+GeneratedTests.conv_quant8_channels_weights_as_inputs
+GeneratedTests.conv_quant8_large_weights_as_inputs
+GeneratedTests.conv_quant8_overflow_weights_as_inputs
+GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
+GeneratedTests.cos_ex_1D_float_nnfw
+GeneratedTests.cos_ex_4D_float_nnfw
+GeneratedTests.cos_ex_dynamic_nnfw
+GeneratedTests.dequantize_v1_2_3d_quant8_symm
+GeneratedTests.dequantize_v1_2_4d_quant8_symm
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
+GeneratedTests.div_dynamic_nnfw
+GeneratedTests.einsum_ex_float_matmul_2x2_2
+GeneratedTests.einsum_ex_float_matmul_3x2_3
+GeneratedTests.einsum_ex_float_matmul_3x3_4
+GeneratedTests.einsum_ex_float_matmul_4x4_4
+GeneratedTests.einsum_ex_float_matmul_4x4_4_2
+GeneratedTests.equal_boolean
+GeneratedTests.equal_dynamic_float_nnfw
+GeneratedTests.exp_
+GeneratedTests.exp_2D_float_nnfw
+GeneratedTests.exp_dynamic_nnfw
+GeneratedTests.expand_dims_dynamic_nnfw_1
+GeneratedTests.expand_dims_dynamic_nnfw_2
+GeneratedTests.fill_ex_1D_float
+GeneratedTests.fill_ex_4D_float
+GeneratedTests.fill_ex_dynamic_nnfw
+GeneratedTests.fully_connected_dynamic_nnfw
+GeneratedTests.fully_connected_hybrid_1_nnfw
+GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
+GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
+GeneratedTests.gather_dynamic_nnfw
+GeneratedTests.gather_float16
+GeneratedTests.gather_float16_2
+GeneratedTests.gather_float16_3
+GeneratedTests.gather_float16_4
+GeneratedTests.gather_float16_5
+GeneratedTests.gather_float16_6
+GeneratedTests.gather_float16_7
+GeneratedTests.gather_float16_8
+GeneratedTests.greater_dynamic_float_nnfw
+GeneratedTests.greater_equal_boolean
+GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
+GeneratedTests.less_boolean
+GeneratedTests.less_dynamic_float_nnfw
+GeneratedTests.less_equal_dynamic_float_nnfw
+GeneratedTests.log_4D_float_nnfw
+GeneratedTests.log_dynamic_nnfw
+GeneratedTests.log_softmax_nnfw
+GeneratedTests.log_softmax_nnfw_2
+GeneratedTests.log_softmax_nnfw_3
+GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
+GeneratedTests.logical_not
+GeneratedTests.logical_not_1D_nnfw
+GeneratedTests.logical_not_4D_nnfw
+GeneratedTests.logical_not_dynamic_nnfw
+GeneratedTests.logical_or_dynamic_nnfw
+GeneratedTests.logistic_dynamic_nnfw
+GeneratedTests.lsh_projection
+GeneratedTests.lsh_projection_2
+GeneratedTests.lsh_projection_weights_as_inputs
+GeneratedTests.lstm
+GeneratedTests.lstm2
+GeneratedTests.lstm2_state
+GeneratedTests.lstm2_state2
+GeneratedTests.lstm3
+GeneratedTests.lstm3_state
+GeneratedTests.lstm3_state2
+GeneratedTests.lstm3_state3
+GeneratedTests.lstm_state
+GeneratedTests.lstm_state2
+GeneratedTests.matrix_band_part_ex_4D_float
+GeneratedTests.matrix_band_part_ex_dynamic_nnfw
+GeneratedTests.maximum_dynamic_nnfw
+GeneratedTests.minimum_dynamic_nnfw
+GeneratedTests.mul_dynamic_nnfw
+GeneratedTests.neg
+GeneratedTests.neg_dynamic_nnfw
+GeneratedTests.not_equal_boolean
+GeneratedTests.not_equal_dynamic_float_nnfw
+GeneratedTests.one_hot_ex_dynamic_nnfw
+GeneratedTests.pack_ex_dynamic_nnfw
+GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
+GeneratedTests.pow_2D_float_nnfw
+GeneratedTests.pow_broadcast_float_nnfw
+GeneratedTests.pow_broadcast_float_nnfw_2
+GeneratedTests.pow_broadcast_float_nnfw_3
+GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
+GeneratedTests.range_ex_float_1
+GeneratedTests.range_ex_float_1_all_constant_inputs
+GeneratedTests.range_ex_float_1_dynamic_nnfw
+GeneratedTests.range_ex_float_2
+GeneratedTests.range_ex_float_2_dynamic_nnfw
+GeneratedTests.reduce_all
+GeneratedTests.reduce_all_2
+GeneratedTests.reduce_all_2D_nnfw
+GeneratedTests.reduce_all_3
+GeneratedTests.reduce_all_4D_nnfw
+GeneratedTests.reduce_all_dynamic_nnfw
+GeneratedTests.reduce_any
+GeneratedTests.reduce_any_2
+GeneratedTests.reduce_any_2D_nnfw
+GeneratedTests.reduce_any_3
+GeneratedTests.reduce_any_4D_nnfw
+GeneratedTests.reduce_max_2D_int32_nnfw
+GeneratedTests.reduce_max_quant8
+GeneratedTests.reduce_mean_dynamic_1_nnfw
+GeneratedTests.reduce_mean_dynamic_2_nnfw
+GeneratedTests.reduce_min_dynamic_nnfw
+GeneratedTests.reduce_prod
+GeneratedTests.reduce_prod_2
+GeneratedTests.reduce_prod_2D_float_nnfw
+GeneratedTests.reduce_prod_3
+GeneratedTests.reduce_prod_4
+GeneratedTests.reduce_prod_4D_float_nnfw
+GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw
+GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw
+GeneratedTests.reduce_prod_dynamic_1_nnfw
+GeneratedTests.reduce_prod_dynamic_2_nnfw
+GeneratedTests.reduce_sum_dynamic_1_nnfw
+GeneratedTests.reduce_sum_dynamic_2_nnfw
+GeneratedTests.reshape_dynamic_nnfw
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
+GeneratedTests.reverse_ex_1d
+GeneratedTests.reverse_ex_3d
+GeneratedTests.reverse_ex_dynamic_1D
+GeneratedTests.reverse_ex_dynamic_3D
+GeneratedTests.rnn
+GeneratedTests.rnn_state
+GeneratedTests.round_ex_1D_float
+GeneratedTests.round_ex_4D_float
+GeneratedTests.round_ex_dynamic_nnfw
+GeneratedTests.rsqrt
+GeneratedTests.rsqrt_dynamic_nnfw
+GeneratedTests.select_v1_2_five_dim
+GeneratedTests.select_v1_2_five_dim_quant8
+GeneratedTests.select_v1_2_one_dim
+GeneratedTests.select_v1_2_one_dim_quant8
+GeneratedTests.select_v1_2_two_dim
+GeneratedTests.select_v1_2_two_dim_quant8
+GeneratedTests.select_v2_ex_broadcast_1d_single_value
+GeneratedTests.select_v2_ex_broadcast_2d_one
+GeneratedTests.select_v2_ex_broadcast_2d_two
+GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw
+GeneratedTests.select_v2_ex_broadcast_less_4d
+GeneratedTests.select_v2_ex_float
+GeneratedTests.shape_ex_dynamic_nnfw
+GeneratedTests.sin_1D_float_nnfw
+GeneratedTests.sin_4D_float_nnfw
+GeneratedTests.sin_dynamic_nnfw
+GeneratedTests.slice
+GeneratedTests.slice_2
+GeneratedTests.slice_3
+GeneratedTests.slice_4
+GeneratedTests.slice_5
+GeneratedTests.slice_6
+GeneratedTests.slice_7
+GeneratedTests.slice_8
+GeneratedTests.slice_dynamic_nnfw
+GeneratedTests.slice_zero_sized
+GeneratedTests.slice_zero_sized_quant8
+GeneratedTests.softmax_dynamic_nnfw
+GeneratedTests.space_to_batch_float_1_nnfw
+GeneratedTests.space_to_batch_float_2
+GeneratedTests.space_to_batch_float_3
+GeneratedTests.space_to_batch_dynamic_float_nnfw
+GeneratedTests.space_to_batch_quant8_1_nnfw
+GeneratedTests.space_to_batch_quant8_2
+GeneratedTests.space_to_batch_quant8_2_nnfw
+GeneratedTests.space_to_batch_quant8_3
+GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_float_5_axis_as_input_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
+GeneratedTests.sqrt_
+GeneratedTests.squared_difference_ex_dynamic_nnfw
+GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
+GeneratedTests.strided_slice_dynamic_nnfw
+GeneratedTests.sub_dynamic_nnfw
+GeneratedTests.sub_v1_2_zero_sized
+GeneratedTests.sub_v1_2_zero_sized_quant8
+GeneratedTests.svdf
+GeneratedTests.svdf2
+GeneratedTests.svdf_bias_present
+GeneratedTests.svdf_state
+GeneratedTests.tanh_v1_2
+GeneratedTests.tanh_v1_2_zero_sized
+GeneratedTests.tanh_v1_2_zero_sized_quant8
+GeneratedTests.tanh_v1_dynamic_nnfw
+GeneratedTests.tile_1
+GeneratedTests.tile_1_dynamic_float32_nnfw
+GeneratedTests.tile_1_float16
+GeneratedTests.tile_1_quant8
+GeneratedTests.tile_2
+GeneratedTests.tile_2_dynamic_float32_nnfw
+GeneratedTests.tile_2_float16
+GeneratedTests.tile_2_int32
+GeneratedTests.tile_2_quant8
+GeneratedTests.tile_3
+GeneratedTests.tile_3_dynamic_float32_nnfw
+GeneratedTests.tile_3_float16
+GeneratedTests.tile_3_int32
+GeneratedTests.tile_3_quant8
+GeneratedTests.topk_v2
+GeneratedTests.topk_v2_1D_float_nnfw
+GeneratedTests.topk_v2_1D_int32_nnfw
+GeneratedTests.topk_v2_1D_quant8_nnfw
+GeneratedTests.topk_v2_2
+GeneratedTests.topk_v2_2D_float_nnfw
+GeneratedTests.topk_v2_2D_int32_nnfw
+GeneratedTests.topk_v2_2D_quant8_nnfw
+GeneratedTests.topk_v2_3
+GeneratedTests.topk_v2_4
+GeneratedTests.topk_v2_5
+GeneratedTests.topk_v2_6
+GeneratedTests.transpose_dynamic_nnfw
+GeneratedTests.transpose_float_1_perms_as_input_nnfw
+GeneratedTests.transpose_v1_2_zero_sized
+GeneratedTests.transpose_v1_2_zero_sized_quant8
+GeneratedTests.unidirectional_sequence_lstm_1step
+GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
+GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
+GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
+GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
+GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
+GeneratedTests.unpack_ex_dynamic_nnfw
+GeneratedTests.zeros_like_ex_2D_float
+GeneratedTests.zeros_like_ex_4D_int32
+GeneratedTests.zeros_like_ex_dynamic_float32
--- /dev/null
+GeneratedTests.abs_
+GeneratedTests.cast_float16_to_float16
+GeneratedTests.cast_float16_to_float32
+GeneratedTests.cast_float16_to_float32_relaxed
+GeneratedTests.cast_float16_to_int32
+GeneratedTests.cast_float16_to_quant8
+GeneratedTests.cast_float16_to_quant8_overflow
+GeneratedTests.cast_float32_to_float16
+GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_float32_to_quant8_overflow
+GeneratedTests.cast_float32_to_quant8_overflow_relaxed
+GeneratedTests.cast_int32_to_float16
+GeneratedTests.cast_int32_to_quant8_overflow
+GeneratedTests.cast_quant8_to_float16
+GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
+GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
+GeneratedTests.dequantize_v1_2
+GeneratedTests.dequantize_v1_2_zero_sized
+GeneratedTests.dequantize_v1_2_zero_sized_float16
+GeneratedTests.embedding_lookup
+GeneratedTests.embedding_lookup_2d_nnfw
+GeneratedTests.embedding_lookup_4d_nnfw
+GeneratedTests.equal_broadcast_float_nnfw
+GeneratedTests.exp_
+GeneratedTests.floor_
+GeneratedTests.gather_float16
+GeneratedTests.gather_float16_2
+GeneratedTests.gather_float16_3
+GeneratedTests.gather_float16_4
+GeneratedTests.gather_float16_5
+GeneratedTests.gather_float16_6
+GeneratedTests.gather_float16_7
+GeneratedTests.gather_float16_8
+GeneratedTests.hashtable_lookup_float
+GeneratedTests.hashtable_lookup_float_4D_nnfw
+GeneratedTests.hashtable_lookup_quant8
+GeneratedTests.l2_pool_float
+GeneratedTests.l2_pool_float_2
+GeneratedTests.l2_pool_float_large
+GeneratedTests.local_response_norm_float_1
+GeneratedTests.local_response_norm_float_2
+GeneratedTests.local_response_norm_float_3
+GeneratedTests.local_response_norm_float_4
+GeneratedTests.logical_not
+GeneratedTests.lsh_projection
+GeneratedTests.lsh_projection_2
+GeneratedTests.lsh_projection_weights_as_inputs
+GeneratedTests.lstm2
+GeneratedTests.lstm2_state
+GeneratedTests.lstm2_state2
+GeneratedTests.maximum_broadcast_quant8
+GeneratedTests.maximum_overflow
+GeneratedTests.maximum_simple_quant8
+GeneratedTests.minimum_broadcast_quant8
+GeneratedTests.minimum_overflow
+GeneratedTests.minimum_simple_quant8
+GeneratedTests.neg
+GeneratedTests.neg_3D_int_nnfw
+GeneratedTests.neg_4D_int_nnfw
+GeneratedTests.prelu
+GeneratedTests.prelu_broadcast_float_1_nnfw
+GeneratedTests.prelu_broadcast_quant8_1_nnfw
+GeneratedTests.prelu_float_1_nnfw
+GeneratedTests.prelu_quant8
+GeneratedTests.prelu_quant8_1_nnfw
+GeneratedTests.prelu_quant8_2
+GeneratedTests.prelu_quant8_3
+GeneratedTests.prelu_quant8_4
+GeneratedTests.prelu_weight_as_input
+GeneratedTests.prelu_weight_as_input_quant8
+GeneratedTests.prelu_weight_as_input_quant8_2
+GeneratedTests.prelu_weight_as_input_quant8_3
+GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
+GeneratedTests.reduce_max_quant8
+GeneratedTests.reduce_max_quant8_1_nnfw
+GeneratedTests.reduce_max_quant8_2
+GeneratedTests.reduce_max_quant8_2_nnfw
+GeneratedTests.reduce_max_quant8_3
+GeneratedTests.reduce_max_quant8_4
+GeneratedTests.reduce_min_quant8
+GeneratedTests.reduce_min_quant8_2
+GeneratedTests.reduce_min_quant8_3
+GeneratedTests.reduce_min_quant8_4
+GeneratedTests.relu1_float_1
+GeneratedTests.relu1_float_2
+GeneratedTests.relu1_quant8_1
+GeneratedTests.relu1_quant8_2
+GeneratedTests.relu6_quant8_1
+GeneratedTests.relu6_quant8_2
+GeneratedTests.relu_quant8_1
+GeneratedTests.relu_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nchw
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_scale_nchw
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_2
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_2
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_3
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_3
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_4
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_4
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_5
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_5
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_6
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_6
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_7
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_7
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
+GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_8
+GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
+GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_8
+GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
+GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
+GeneratedTests.rnn
+GeneratedTests.rnn_state
+GeneratedTests.rsqrt
+GeneratedTests.select_v1_2_five_dim
+GeneratedTests.select_v1_2_five_dim_quant8
+GeneratedTests.select_v1_2_one_dim_quant8
+GeneratedTests.select_v1_2_two_dim_quant8
+GeneratedTests.slice_5
+GeneratedTests.slice_6
+GeneratedTests.slice_8
+GeneratedTests.slice_zero_sized
+GeneratedTests.slice_zero_sized_quant8
+GeneratedTests.sqrt_
+GeneratedTests.sqrt_1D_float_nnfw
+GeneratedTests.sqrt_2D_float_nnfw
+GeneratedTests.sqrt_3D_float_nnfw
+GeneratedTests.sqrt_4D_float_nnfw
+GeneratedTests.strided_slice_qaunt8_10
+GeneratedTests.strided_slice_qaunt8_11
+GeneratedTests.strided_slice_quant8_1
+GeneratedTests.strided_slice_quant8_2
+GeneratedTests.strided_slice_quant8_3
+GeneratedTests.strided_slice_quant8_4
+GeneratedTests.strided_slice_quant8_5
+GeneratedTests.strided_slice_quant8_6
+GeneratedTests.strided_slice_quant8_7
+GeneratedTests.strided_slice_quant8_8
+GeneratedTests.strided_slice_quant8_9
+GeneratedTests.sub_v1_2_zero_sized
+GeneratedTests.sub_v1_2_zero_sized_quant8
+GeneratedTests.svdf
+GeneratedTests.svdf2
+GeneratedTests.svdf_bias_present
+GeneratedTests.svdf_state
+GeneratedTests.tanh_v1_2
+GeneratedTests.tanh_v1_2_zero_sized
+GeneratedTests.tanh_v1_2_zero_sized_quant8
+GeneratedTests.tile_1_float16
+GeneratedTests.tile_1_quant8
+GeneratedTests.tile_2_float16
+GeneratedTests.tile_2_int32
+GeneratedTests.tile_2_quant8
+GeneratedTests.tile_3_float16
+GeneratedTests.tile_3_int32
+GeneratedTests.tile_3_quant8
+GeneratedTests.topk_v2
+GeneratedTests.topk_v2_1D_float_nnfw
+GeneratedTests.topk_v2_1D_int32_nnfw
+GeneratedTests.topk_v2_1D_quant8_nnfw
+GeneratedTests.topk_v2_2
+GeneratedTests.topk_v2_2D_float_nnfw
+GeneratedTests.topk_v2_2D_int32_nnfw
+GeneratedTests.topk_v2_2D_quant8_nnfw
+GeneratedTests.topk_v2_3
+GeneratedTests.topk_v2_4
+GeneratedTests.topk_v2_5
+GeneratedTests.topk_v2_6
+GeneratedTests.transpose_conv_ex_float_1
+GeneratedTests.transpose_conv_ex_float_2
+GeneratedTests.transpose_conv_ex_float_3
+GeneratedTests.transpose_conv_ex_float_4
+GeneratedTests.transpose_v1_2_zero_sized
+GeneratedTests.transpose_v1_2_zero_sized_quant8
GeneratedTests.cast_int32_to_float16
GeneratedTests.cast_int32_to_quant8_overflow
GeneratedTests.cast_quant8_to_float16
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
GeneratedTests.dequantize_v1_2
GeneratedTests.local_response_norm_float_2
GeneratedTests.local_response_norm_float_3
GeneratedTests.local_response_norm_float_4
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
GeneratedTests.logical_not
GeneratedTests.lsh_projection
GeneratedTests.lsh_projection_2
GeneratedTests.cast_int32_to_float16
GeneratedTests.cast_int32_to_quant8_overflow
GeneratedTests.cast_quant8_to_float16
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
GeneratedTests.dequantize_v1_2
GeneratedTests.local_response_norm_float_2
GeneratedTests.local_response_norm_float_3
GeneratedTests.local_response_norm_float_4
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
GeneratedTests.logical_not
GeneratedTests.lsh_projection
GeneratedTests.lsh_projection_2
GeneratedTests.argmax_neg_axis_int32_nnfw
GeneratedTests.argmax_quant8_neg_axis_nnfw
GeneratedTests.argmax_quant8_nnfw
+GeneratedTests.argmin_1
+GeneratedTests.argmin_1_quant8
+GeneratedTests.argmin_2
+GeneratedTests.argmin_2_quant8
+GeneratedTests.argmin_3
+GeneratedTests.argmin_3_quant8
GeneratedTests.avg_pool_quant8_1
GeneratedTests.avg_pool_quant8_2
GeneratedTests.avg_pool_quant8_3
GeneratedTests.cast_int32_to_float16
GeneratedTests.cast_int32_to_quant8_overflow
GeneratedTests.cast_quant8_to_float16
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
GeneratedTests.dequantize_v1_2
GeneratedTests.local_response_norm_float_2
GeneratedTests.local_response_norm_float_3
GeneratedTests.local_response_norm_float_4
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
GeneratedTests.logical_not
GeneratedTests.lsh_projection
GeneratedTests.lsh_projection_2
mOldComputeMode = Execution::setComputeMode(GetParam());
#endif
// Fix for onert: Fix file path for linux
+#ifndef __ANDROID__
char cacheDirTemp[] = "/tmp/TestCompilationCachingXXXXXX";
- //char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
+#else
+ char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
+#endif
char* cacheDir = mkdtemp(cacheDirTemp);
ASSERT_NE(cacheDir, nullptr);
mCacheDir = cacheDir;
// This file tests all the validations done by the Neural Networks API.
namespace {
+#ifndef PATH_MAX
#define PATH_MAX 256
+#endif
static int shmem_num = 0;
static int shmem_create_region(size_t size)
{
char temp[PATH_MAX];
+#ifndef __ANDROID__
snprintf(temp, sizeof(temp), "/tmp/nn-shmem-%d-%d-XXXXXXXXX", getpid(), shmem_num++);
+#else
+ snprintf(temp, sizeof(temp), "/data/local/tmp/nn-shmem-%d-%d-XXXXXXXXX", getpid(), shmem_num++);
+#endif
// Set umask and recover after generate temporary file to avoid security issue
mode_t umaskPrev = umask(S_IRUSR|S_IWUSR);
target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_ACL_BACKEND)
endif(ARMCompute_FOUND)
+nnfw_find_package(Xnnpack QUIET)
+if(Xnnpack_FOUND)
+ target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_XNNPACK_BACKEND)
+endif(Xnnpack_FOUND)
+
set(RUNTIME_NNFW_API_TEST_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src)
target_include_directories(${RUNTIME_NNFW_API_TEST} PRIVATE ${RUNTIME_NNFW_API_TEST_INCLUDE})
for (auto &ctx : _subgraph_contexts)
subgraphs.push_back(buildSubGraph(ctx));
auto model =
- circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
+ circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
_fbb.Finish(model);
return CircleBuffer{std::move(_fbb)};
}
circle::BuiltinOptions_ArgMaxOptions, options);
}
+uint32_t CircleGen::addOperatorArgMin(const OperatorParams ¶ms, circle::TensorType output_type)
+{
+ auto options = circle::CreateArgMaxOptions(_fbb, output_type).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_ARG_MIN,
+ circle::BuiltinOptions_ArgMinOptions, options);
+}
+
uint32_t CircleGen::addOperatorAveragePool2D(const OperatorParams ¶ms, circle::Padding padding,
int stride_w, int stride_h, int filter_w, int filter_h,
circle::ActivationFunctionType actfn)
{
auto options =
- circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
- .Union();
+ circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
+ .Union();
return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
circle::BuiltinOptions_Pool2DOptions, options);
}
circle::BuiltinOptions_ConcatenationOptions, options);
}
+uint32_t CircleGen::addOperatorConv2D(const OperatorParams ¶ms, circle::Padding padding,
+ int stride_w, int stride_h,
+ circle::ActivationFunctionType actfn, int dilation_w,
+ int dilation_h)
+{
+ auto options =
+ circle::CreateConv2DOptions(_fbb, padding, stride_w, stride_h, actfn, dilation_w, dilation_h)
+ .Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_CONV_2D,
+ circle::BuiltinOptions_Conv2DOptions, options);
+}
+
uint32_t CircleGen::addOperatorCos(const OperatorParams ¶ms)
{
auto options = circle::CreateCosOptions(_fbb).Union();
circle::BuiltinOptions_CosOptions, options);
}
+uint32_t CircleGen::addOperatorDepthToSpace(const OperatorParams ¶ms, int32_t block_size)
+{
+ auto options = circle::CreateDepthToSpaceOptions(_fbb, block_size).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_DEPTH_TO_SPACE,
+ circle::BuiltinOptions_DepthToSpaceOptions, options);
+}
+
uint32_t CircleGen::addOperatorDepthwiseConv2D(const OperatorParams ¶ms,
circle::Padding padding, int stride_w, int stride_h,
int depth_multiplier,
int dilation_h)
{
auto options =
- circle::CreateDepthwiseConv2DOptions(_fbb, padding, stride_w, stride_h, depth_multiplier,
- actfn, dilation_w, dilation_h)
- .Union();
+ circle::CreateDepthwiseConv2DOptions(_fbb, padding, stride_w, stride_h, depth_multiplier, actfn,
+ dilation_w, dilation_h)
+ .Union();
return addOperatorWithOptions(params, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
circle::BuiltinOptions_DepthwiseConv2DOptions, options);
}
+uint32_t CircleGen::addOperatorElu(const OperatorParams ¶ms)
+{
+ return addOperatorWithOptions(params, circle::BuiltinOperator_ELU, circle::BuiltinOptions_NONE,
+ 0);
+}
+
uint32_t CircleGen::addOperatorEqual(const OperatorParams ¶ms)
{
auto options = circle::CreateEqualOptions(_fbb).Union();
circle::BuiltinOptions_EqualOptions, options);
}
+uint32_t CircleGen::addOperatorExpandDims(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateEqualOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_EXPAND_DIMS,
+ circle::BuiltinOptions_ExpandDimsOptions, options);
+}
+
uint32_t
CircleGen::addOperatorFullyConnected(const OperatorParams ¶ms,
circle::FullyConnectedOptionsWeightsFormat weights_format)
{
auto options =
- circle::CreateFullyConnectedOptions(_fbb, circle::ActivationFunctionType_NONE, weights_format)
- .Union();
+ circle::CreateFullyConnectedOptions(_fbb, circle::ActivationFunctionType_NONE, weights_format)
+ .Union();
return addOperatorWithOptions(params, circle::BuiltinOperator_FULLY_CONNECTED,
circle::BuiltinOptions_FullyConnectedOptions, options);
}
circle::BuiltinOptions_LogSoftmaxOptions, options);
}
+uint32_t CircleGen::addOperatorMean(const OperatorParams ¶ms, bool keep_dims)
+{
+ auto options = circle::CreateReducerOptions(_fbb, keep_dims).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_MEAN,
+ circle::BuiltinOptions_ReducerOptions, options);
+}
+
uint32_t CircleGen::addOperatorNeg(const OperatorParams ¶ms)
{
auto options = circle::CreatePadOptions(_fbb).Union();
bool half_pixel_centers)
{
auto options =
- circle::CreateResizeBilinearOptions(_fbb, align_corners, half_pixel_centers).Union();
+ circle::CreateResizeBilinearOptions(_fbb, align_corners, half_pixel_centers).Union();
return addOperatorWithOptions(params, circle::BuiltinOperator_RESIZE_BILINEAR,
circle::BuiltinOptions_ResizeBilinearOptions, options);
}
{
auto options = circle::CreateStridedSliceOptions(_fbb, begin_mask, end_mask, ellipsis_mask,
new_axis_mask, shrink_axis_mask)
- .Union();
+ .Union();
return addOperatorWithOptions(params, circle::BuiltinOperator_STRIDED_SLICE,
circle::BuiltinOptions_StridedSliceOptions, options);
}
circle::BuiltinOptions_TransposeOptions, options);
}
+uint32_t CircleGen::addOperatorSqrt(const OperatorParams ¶ms)
+{
+ return addOperatorWithOptions(params, circle::BuiltinOperator_SQRT, circle::BuiltinOptions_NONE,
+ 0);
+}
+
+uint32_t CircleGen::addOperatorSquare(const OperatorParams ¶ms)
+{
+ auto options = circle::CreateSquareOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_SQUARE,
+ circle::BuiltinOptions_SquareOptions, options);
+}
+
// NOTE Please add addOperator functions ABOVE this lie
//
// % How to add a new addOperatorXXX fuction
// 2. Change enum BuiltinOperator
// 3. Change enum BuiltinOptions
// 4. Change CreateXXXOptions accordingly
+//
+// If operator don't have option table, remove CreateXXXOptions call,
+// call addOperatorWithOptions with options_type = circle::BuiltinOptions_NONE and options = 0
// ===== Add Operator methods end =====
flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
- dim_metadata;
+ dim_metadata;
traversal_order = _fbb.CreateVector(sp.traversal_order);
block_map = _fbb.CreateVector(sp.block_map);
auto fb_array_segments = circle::CreateUint16VectorDirect(_fbb, &it._array_segments.u16);
auto fb_array_indices = circle::CreateUint16VectorDirect(_fbb, &it._array_indices.u16);
auto dim_metadata = circle::CreateDimensionMetadata(
- _fbb, it._format, it._dense_size, it._array_segments_type, fb_array_segments.Union(),
- it._array_indices_type, fb_array_indices.Union());
+ _fbb, it._format, it._dense_size, it._array_segments_type, fb_array_segments.Union(),
+ it._array_indices_type, fb_array_indices.Union());
dim_metadata_vec.emplace_back(dim_metadata);
}
dim_metadata = _fbb.CreateVector(dim_metadata_vec);
DimMetaData() = delete;
DimMetaData(SparseDimensionType format, std::vector<uint16_t> array_segments,
std::vector<uint16_t> array_indices)
- : _format{format},
- _array_segments_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector),
- _array_indices_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector)
+ : _format{format},
+ _array_segments_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector),
+ _array_indices_type(SparseIndexVectorType::SparseIndexVector_Uint16Vector)
{
_array_segments.u16 = array_segments;
_array_indices.u16 = array_indices;
}
DimMetaData(SparseDimensionType format, int32_t dense_size)
- : _format{format}, _dense_size{dense_size}
+ : _format{format}, _dense_size{dense_size}
{
}
SparseDimensionType _format{circle::DimensionType_DENSE};
uint32_t addOperatorAddN(const OperatorParams ¶ms);
uint32_t addOperatorArgMax(const OperatorParams ¶ms,
circle::TensorType output_type = circle::TensorType::TensorType_INT32);
+ uint32_t addOperatorArgMin(const OperatorParams ¶ms,
+ circle::TensorType output_type = circle::TensorType::TensorType_INT32);
uint32_t addOperatorAveragePool2D(const OperatorParams ¶ms, circle::Padding padding,
int stride_w, int stride_h, int filter_w, int filter_h,
circle::ActivationFunctionType actfn);
circle::TensorType output_type);
uint32_t addOperatorConcatenation(const OperatorParams ¶ms, int axis,
circle::ActivationFunctionType actfn);
+ uint32_t addOperatorConv2D(const OperatorParams ¶ms, circle::Padding padding, int stride_w,
+ int stride_h, circle::ActivationFunctionType actfn, int dilation_w = 1,
+ int dilation_h = 1);
uint32_t addOperatorCos(const OperatorParams ¶ms);
+ uint32_t addOperatorDepthToSpace(const OperatorParams ¶ms, int32_t block_size);
uint32_t addOperatorDepthwiseConv2D(const OperatorParams ¶ms, circle::Padding padding,
int stride_w, int stride_h, int depth_multiplier,
circle::ActivationFunctionType actfn, int dilation_w = 1,
int dilation_h = 1);
+ uint32_t addOperatorElu(const OperatorParams ¶ms);
uint32_t addOperatorEqual(const OperatorParams ¶ms);
+ uint32_t addOperatorExpandDims(const OperatorParams ¶ms);
uint32_t addOperatorFill(const OperatorParams ¶ms);
uint32_t addOperatorFloor(const OperatorParams ¶ms);
uint32_t addOperatorFullyConnected(const OperatorParams ¶ms,
circle::FullyConnectedOptionsWeightsFormat weights_format =
- circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
+ circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
uint32_t addOperatorIf(const OperatorParams ¶ms, uint32_t then_subg, uint32_t else_subg);
uint32_t addOperatorInstanceNorm(const OperatorParams ¶ms, float epsilon,
circle::ActivationFunctionType actfn);
uint32_t addOperatorLeakyRelu(const OperatorParams ¶ms, float alpha);
uint32_t addOperatorLess(const OperatorParams ¶ms);
uint32_t addOperatorLogSoftmax(const OperatorParams ¶ms);
+ uint32_t addOperatorMean(const OperatorParams ¶ms, bool keep_dims);
uint32_t addOperatorNeg(const OperatorParams ¶ms);
uint32_t addOperatorOneHot(const OperatorParams ¶ms, int32_t axis);
uint32_t addOperatorPad(const OperatorParams ¶ms);
uint32_t addOperatorSelect(const OperatorParams ¶ms);
uint32_t addOperatorSelectV2(const OperatorParams ¶ms);
uint32_t addOperatorSplit(const OperatorParams ¶ms, int32_t num_split);
+ uint32_t addOperatorSqrt(const OperatorParams ¶ms);
+ uint32_t addOperatorSquare(const OperatorParams ¶ms);
uint32_t addOperatorStridedSlice(const OperatorParams ¶ms, int32_t begin_mask = 0,
int32_t end_mask = 0, int32_t ellipsis_mask = 0,
int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0);
* limitations under the License.
*/
+#ifndef __NNFW_API_TEST_GEN_MODEL_TEST_H__
+#define __NNFW_API_TEST_GEN_MODEL_TEST_H__
+
#include <gtest/gtest.h>
#include <nnfw_internal.h>
_backends.push_back(backend);
}
#endif
- if (backend == "cpu")
+ if (backend == "cpu" || backend == "ruy")
{
_backends.push_back(backend);
}
+#ifdef TEST_XNNPACK_BACKEND
+ if (backend == "xnnpack")
+ {
+ _backends.push_back(backend);
+ }
+#endif
}
}
*/
void expectFailCompile() { _expected_fail_compile = true; }
+ /**
+ * @brief Expect failure while execution
+ */
+ void expectFailExecution() { _expected_fail_execution = true; }
+
private:
CircleBuffer _cbuf;
std::vector<TestCaseData> _test_cases;
std::unordered_map<uint32_t, size_t> _output_sizes;
bool _expected_fail_model_load{false};
bool _expected_fail_compile{false};
+ bool _expected_fail_execution{false};
};
/**
NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session));
auto &cbuf = _context->cbuf();
auto model_load_result =
- nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size());
+ nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size());
if (_context->expected_fail_model_load())
{
ASSERT_NE(model_load_result, NNFW_STATUS_NO_ERROR);
if (_context->expected_fail_compile())
{
- ASSERT_EQ(nnfw_prepare(_so.session), NNFW_STATUS_ERROR);
+ ASSERT_NE(nnfw_prepare(_so.session), NNFW_STATUS_NO_ERROR);
NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
continue;
if (test_case.expected_fail_run())
{
- ASSERT_EQ(nnfw_run(_so.session), NNFW_STATUS_ERROR);
+ ASSERT_NE(nnfw_run(_so.session), NNFW_STATUS_NO_ERROR);
continue;
}
SessionObjectGeneric _so;
std::unique_ptr<GenModelTestContext> _context;
};
+
+#endif // __NNFW_API_TEST_GEN_MODEL_TEST_H__
#include "fixtures.h"
#include "CircleGen.h"
#include "GenModelTest.h"
+#include "NNPackages.h"
// This macro can be used instead of using NNFW_ENSURE_SUCCESS especially with negative test.
// E.g., setInputOutput() is written with this macro and the following check is available to check
// NOTE Must match `enum TestPackages`
const char *TEST_PACKAGE_NAMES[] = {
- // for validation test
- "add", "add_no_manifest", "add_invalid_manifest",
+ // for validation test
+ "add",
+ "add_no_manifest",
+ "add_invalid_manifest",
- // for dynamic tensor test
- "while_dynamic", "if_dynamic",
+ // for dynamic tensor test
+ "while_dynamic",
+ "if_dynamic",
};
NNPackages &NNPackages::get()
char raw_dir[1024];
char cwd[1024];
strncpy(raw_dir, argv0, sizeof(raw_dir) - 1);
- dirname(raw_dir);
- if (raw_dir[0] == '/')
+ char *dir_path = dirname(raw_dir);
+ if (dir_path[0] == '/')
{
// If it is an absolute path, just use it
- _base_path = raw_dir;
+ _base_path = dir_path;
}
else
{
getcwd(cwd, sizeof(cwd));
_base_path = cwd;
_base_path += "/";
- _base_path += raw_dir;
+ _base_path += dir_path;
}
}
uint8_t input_buf[new_dim * sizeof(float)];
NNFW_ENSURE_SUCCESS(
- nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float)));
+ nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float)));
uint8_t output_buf[new_dim * sizeof(float)];
NNFW_ENSURE_SUCCESS(
- nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float)));
+ nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float)));
NNFW_ENSURE_SUCCESS(nnfw_run(session));
// seems weird calling but anyway nnstreamer people case calls this again.
// Anyways, runtime should work
NNFW_ENSURE_SUCCESS(
- nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float)));
+ nnfw_set_input(session, 0, t_input.dtype, &input_buf, new_dim * sizeof(float)));
NNFW_ENSURE_SUCCESS(
- nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float)));
+ nnfw_set_output(session, 0, t_output.dtype, &output_buf, new_dim * sizeof(float)));
NNFW_ENSURE_SUCCESS(nnfw_run(session));
}
std::vector<float> out_buf{-1, -1};
NNFW_ENSURE_SUCCESS(
- nnfw_set_input(session, 0, ti_new.dtype, in_buf.data(), in_buf.size() * sizeof(float)));
+ nnfw_set_input(session, 0, ti_new.dtype, in_buf.data(), in_buf.size() * sizeof(float)));
NNFW_ENSURE_SUCCESS(
- nnfw_set_output(session, 0, ti_new.dtype, out_buf.data(), out_buf.size() * sizeof(float)));
+ nnfw_set_output(session, 0, ti_new.dtype, out_buf.data(), out_buf.size() * sizeof(float)));
NNFW_ENSURE_SUCCESS(nnfw_run(session));
{
// load model twice
ASSERT_EQ(nnfw_load_model_from_file(
- _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
+ _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
NNFW_STATUS_INVALID_STATE);
}
{
// Load model twice
ASSERT_EQ(nnfw_load_model_from_file(
- _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
+ _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
NNFW_STATUS_INVALID_STATE);
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fixtures.h"
+
+TEST_F(ValidationTestTwoSessions, neg_two_sessions_create)
+{
+ ASSERT_EQ(nnfw_create_session(&_session1), NNFW_STATUS_NO_ERROR);
+ ASSERT_EQ(nnfw_create_session(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
+
+ ASSERT_EQ(nnfw_close_session(_session1), NNFW_STATUS_NO_ERROR);
+}
+
+class AveragePoolModel
+{
+public:
+ AveragePoolModel(int N, int H, int W, int C)
+ {
+ CircleGen cgen;
+ int in = cgen.addTensor({{N, H, W, C}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{N, H / 2, W / 2, C}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ cbuf = cgen.finish();
+ };
+
+ CircleBuffer cbuf;
+};
+
+TEST_F(ValidationTestTwoSessionsCreated, two_sessions_run_simple_model)
+{
+ constexpr int N = 64, H = 64, W = 64, C = 3;
+ AveragePoolModel model(N, H, W, C);
+
+ NNFW_ENSURE_SUCCESS(
+ nnfw_load_circle_from_buffer(_session1, model.cbuf.buffer(), model.cbuf.size()));
+ NNFW_ENSURE_SUCCESS(
+ nnfw_load_circle_from_buffer(_session2, model.cbuf.buffer(), model.cbuf.size()));
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session1, "cpu"));
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session2, "cpu"));
+
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session1));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session2));
+
+ constexpr int input_count = N * H * W * C;
+ constexpr int output_count = N * H / 2 * W / 2 * C;
+
+ std::vector<float> in_buf1(input_count); // any value
+ std::vector<float> out_buf1(output_count);
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_input(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf1.data(),
+ in_buf1.size() * sizeof(float)));
+ NNFW_ENSURE_SUCCESS(nnfw_set_output(_session1, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf1.data(),
+ out_buf1.size() * sizeof(float)));
+
+ std::vector<float> in_buf2(input_count); // any value
+ std::vector<float> out_buf2(output_count);
+
+ NNFW_ENSURE_SUCCESS(nnfw_set_input(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, in_buf2.data(),
+ in_buf2.size() * sizeof(float)));
+ NNFW_ENSURE_SUCCESS(nnfw_set_output(_session2, 0, NNFW_TYPE_TENSOR_FLOAT32, out_buf2.data(),
+ out_buf2.size() * sizeof(float)));
+
+ NNFW_ENSURE_SUCCESS(nnfw_run_async(_session1));
+ NNFW_ENSURE_SUCCESS(nnfw_run_async(_session2));
+
+ NNFW_ENSURE_SUCCESS(nnfw_await(_session1));
+ NNFW_ENSURE_SUCCESS(nnfw_await(_session2));
+
+ SUCCEED();
+}
+
+// TODO Write two-session-test with large models run by threads
{
// Existing model must
ASSERT_EQ(nnfw_load_model_from_file(
- _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
+ _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
NNFW_STATUS_NO_ERROR);
}
TEST_F(ValidationTestSessionCreated, neg_load_session_1)
{
ASSERT_EQ(nnfw_load_model_from_file(
- _session, NNPackages::get().getModelAbsolutePath("nonexisting_directory").c_str()),
+ _session, NNPackages::get().getModelAbsolutePath("nonexisting_directory").c_str()),
NNFW_STATUS_ERROR);
}
// Too long path
const std::string long_path(1024, 'x');
ASSERT_EQ(nnfw_load_model_from_file(
- _session, NNPackages::get().getModelAbsolutePath(long_path.c_str()).c_str()),
+ _session, NNPackages::get().getModelAbsolutePath(long_path.c_str()).c_str()),
NNFW_STATUS_ERROR);
}
TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_1)
{
ASSERT_EQ(
- nnfw_load_model_from_file(
- _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_NO_MANIFEST).c_str()),
- NNFW_STATUS_ERROR);
+ nnfw_load_model_from_file(
+ _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_NO_MANIFEST).c_str()),
+ NNFW_STATUS_ERROR);
ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_2)
{
- ASSERT_EQ(nnfw_load_model_from_file(
- _session,
- NNPackages::get().getModelAbsolutePath(NNPackages::ADD_INVALID_MANIFEST).c_str()),
- NNFW_STATUS_ERROR);
+ ASSERT_EQ(
+ nnfw_load_model_from_file(
+ _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_INVALID_MANIFEST).c_str()),
+ NNFW_STATUS_ERROR);
ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
}
{
// Invalid state
ASSERT_EQ(nnfw_load_model_from_file(
- nullptr, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
+ nullptr, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
NNFW_STATUS_UNEXPECTED_NULL);
}
#include <nnfw_internal.h>
#include "NNPackages.h"
+#include "CircleGen.h"
#define NNFW_ENSURE_SUCCESS(EXPR) ASSERT_EQ((EXPR), NNFW_STATUS_NO_ERROR)
{
ValidationTestSingleSession::SetUp();
ASSERT_EQ(nnfw_create_session(&_session), NNFW_STATUS_NO_ERROR);
+ ASSERT_NE(_session, nullptr);
}
void TearDown() override
}
};
+inline CircleBuffer genAddModel()
+{
+ CircleGen cgen;
+ std::vector<float> rhs_data{2};
+ uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+ int lhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, 0, "X_input"});
+ int rhs = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, rhs_buf, "y_var"});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, 0, "ADD_TOP"});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs}, {out});
+ return cgen.finish();
+}
+
template <int PackageNo> class ValidationTestModelLoaded : public ValidationTestSessionCreated
{
protected:
void SetUp() override
{
ValidationTestSessionCreated::SetUp();
- ASSERT_EQ(nnfw_load_model_from_file(_session,
- NNPackages::get().getModelAbsolutePath(PackageNo).c_str()),
- NNFW_STATUS_NO_ERROR);
- ASSERT_NE(_session, nullptr);
+ if (PackageNo == NNPackages::ADD)
+ {
+ auto cbuf = genAddModel();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_session, cbuf.buffer(), cbuf.size()));
+ }
+ else
+ {
+ // TODO Eventually, downloaded model tests are removed.
+ NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(
+ _session, NNPackages::get().getModelAbsolutePath(PackageNo).c_str()));
+ }
}
void TearDown() override { ValidationTestSessionCreated::TearDown(); }
EXPECT_EQ(input_elements, 1);
_input.resize(input_elements);
ASSERT_EQ(
- nnfw_set_input(_session, 0, ti_input.dtype, _input.data(), sizeof(float) * input_elements),
- NNFW_STATUS_NO_ERROR);
+ nnfw_set_input(_session, 0, ti_input.dtype, _input.data(), sizeof(float) * input_elements),
+ NNFW_STATUS_NO_ERROR);
nnfw_tensorinfo ti_output;
ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti_output), NNFW_STATUS_NO_ERROR);
uint64_t input_elements = num_elems(ti_input);
_input.resize(input_elements);
ASSERT_EQ(
- nnfw_set_input(_session, 0, ti_input->dtype, _input.data(), sizeof(float) * input_elements),
- NNFW_STATUS_NO_ERROR);
+ nnfw_set_input(_session, 0, ti_input->dtype, _input.data(), sizeof(float) * input_elements),
+ NNFW_STATUS_NO_ERROR);
_output.resize(40000); // Give sufficient size for the output
- ASSERT_EQ(nnfw_set_output(_session, 0, ti_input->dtype, _output.data(),
- sizeof(float) * _output.size()),
- NNFW_STATUS_NO_ERROR);
+ ASSERT_EQ(
+ nnfw_set_output(_session, 0, ti_input->dtype, _output.data(), sizeof(float) * _output.size()),
+ NNFW_STATUS_NO_ERROR);
}
protected:
{
ValidationTest::SetUp();
- auto model_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
for (auto &obj : _objects)
{
ASSERT_EQ(nnfw_create_session(&obj.session), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_load_model_from_file(obj.session, model_path.c_str()), NNFW_STATUS_NO_ERROR);
+
+ auto cbuf = genAddModel();
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(obj.session, cbuf.buffer(), cbuf.size()));
ASSERT_EQ(nnfw_prepare(obj.session), NNFW_STATUS_NO_ERROR);
uint32_t num_inputs;
std::array<SessionObject, NUM_SESSIONS> _objects;
};
+class ValidationTestTwoSessions : public ValidationTest
+{
+protected:
+ nnfw_session *_session1 = nullptr;
+ nnfw_session *_session2 = nullptr;
+};
+
+class ValidationTestTwoSessionsCreated : public ValidationTestTwoSessions
+{
+protected:
+ void SetUp() override
+ {
+ ValidationTestTwoSessions::SetUp();
+ ASSERT_EQ(nnfw_create_session(&_session1), NNFW_STATUS_NO_ERROR);
+ ASSERT_EQ(nnfw_create_session(&_session2), NNFW_STATUS_NO_ERROR);
+ ASSERT_NE(_session1, nullptr);
+ ASSERT_NE(_session2, nullptr);
+ }
+
+ void TearDown() override
+ {
+ ASSERT_EQ(nnfw_close_session(_session1), NNFW_STATUS_NO_ERROR);
+ ASSERT_EQ(nnfw_close_session(_session2), NNFW_STATUS_NO_ERROR);
+ ValidationTestTwoSessions::TearDown();
+ }
+};
+
#endif // __NNFW_API_TEST_FIXTURES_H__
}
catch (std::runtime_error &e)
{
+ std::cerr << "[WARNING] Test models are not loaded, so some tests will fail" << std::endl;
std::cerr << e.what() << std::endl;
- return -1;
}
return RUN_ALL_TESTS();
cgen.setInputsAndOutputs({in1, in2, in3}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AddN_TypeDiff)
+{
+ CircleGen cgen;
+
+ int in1 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+ int in2 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+ int in3 = cgen.addTensor({{8}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{8}, circle::TensorType::TensorType_INT32});
+
+ cgen.addOperatorAddN({{in1, in2, in3}, {out}});
+ cgen.setInputsAndOutputs({in1, in2, in3}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->expectFailModelLoad();
SUCCEED();
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_ArgMax_AxisToConst)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<int32_t> axis_data{1};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT64;
- std::vector<int32_t> axis_data{1};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int64_t>({1, 0}));
- _context->setBackends({"acl_cl"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in, axis}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(TestCaseData{}
- .addInput<float>({1, 4, 2, 3})
- .addInput<int32_t>({-3})
- .addOutput<int32_t>({1, 0}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis0)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<int32_t> axis_data{4};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis1)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<int32_t> axis_data{-3};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{2}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+struct ArgMinMaxVariationParam
+{
+ TestCaseData tcd;
+ bool is_argmax = true;
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class ArgMinMaxVariation : public GenModelTest,
+ public ::testing::WithParamInterface<ArgMinMaxVariationParam>
+{
+};
+
+// Input shape: {1, 2, 2, 1}
+// Reduce axis: 1
+// Output shape: {1, 2, 1}
+// Output type: Int32
+TEST_P(ArgMinMaxVariation, Test)
+{
+ auto ¶m = GetParam();
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{1};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+// Test with different input type and value
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, ArgMinMaxVariation,
+ ::testing::Values(
+ // ArgMax, float input
+ ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}),
+ true},
+ // ArgMax, int32 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+ circle::TensorType::TensorType_INT32},
+ // ArgMax, uint8 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+ circle::TensorType::TensorType_UINT8, 1.0, 1},
+ // ArgMax, int8 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+ circle::TensorType::TensorType_INT8, 1.0, 1},
+ // ArgMin, float input
+ ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}),
+ false},
+ // ArgMin, int32 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+ circle::TensorType::TensorType_INT32},
+ // ArgMin, uint8 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+ circle::TensorType::TensorType_UINT8, 1.0, 1},
+ // ArgMin, int8 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+ circle::TensorType::TensorType_INT8, 1.0, 1}));
+
+TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT64;
+ std::vector<int32_t> axis_data{1};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int64_t>({1, 0}));
+ _context->setBackends({"acl_cl", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in, axis}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}
+ .addInput<float>({1, 4, 2, 3})
+ .addInput<int32_t>({-3})
+ .addOutput<int32_t>({1, 0}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis0)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis1)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{-3};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{2}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_InType)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_AxisType)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_FLOAT32;
+ std::vector<float> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_FLOAT32;
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_paramType)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, circle::TensorType::TensorType_INT64);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}.addInput<int32_t>({1, 2, 3, 4}).addOutput<float>({1, 2, 3, 4}));
+ TestCaseData{}.addInput<int32_t>({1, 2, 3, 4}).addOutput<float>({1, 2, 3, 4}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<int32_t>({1, 2, 3, 4}));
+ TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<int32_t>({1, 2, 3, 4}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<float>({1, 0, 1, 1}));
+ TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<float>({1, 0, 1, 1}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<bool>({true, false, true, true})
- .addOutput(std::vector<uint8_t>{1, 0, 1, 1}));
+ .addInput<bool>({true, false, true, true})
+ .addOutput(std::vector<uint8_t>{1, 0, 1, 1}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<int32_t>({1, 0, 1, 1}));
+ TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<int32_t>({1, 0, 1, 1}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(uniformTCD<float>(
- {{1, 3, 2, 4}, {5, 4, 7, 4}},
- {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}}));
+ {{1, 3, 2, 4}, {5, 4, 7, 4}},
+ {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
}
-TEST_F(GenModelTest, OneOp_Concat)
+struct ConcatVariationParam
{
- CircleGen cgen;
+ TestCaseData tcd;
+ circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class ConcatVariation : public GenModelTest,
+ public ::testing::WithParamInterface<ConcatVariationParam>
+{
+};
- int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- int input2 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- int output = cgen.addTensor({{4, 3}, circle::TensorType::TensorType_FLOAT32});
+// Input shape: {2, 3} / {2, 3}
+// Output shape: {4, 3}
+TEST_P(ConcatVariation, Test)
+{
+ auto ¶m = GetParam();
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({input1, input2}, {output});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
- {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}));
+ _context->addTestCase(param.tcd);
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
}
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, ConcatVariation,
+ ::testing::Values(
+ // Float
+ ConcatVariationParam{uniformTCD<float>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})},
+ // Uint8
+ ConcatVariationParam{uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ circle::TensorType::TensorType_UINT8, 1.0f, -2},
+ // Int8
+ ConcatVariationParam{uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ circle::TensorType::TensorType_INT8, 1.0f, -2},
+ // Int16
+ // TODO Enable when nnfw api support int16 type
+ // ConcatVariationParam{
+ // uniformTCD<int16_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ // {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ // circle::TensorType::TensorType_INT16, 1.0f, 0},
+ // Int32
+ ConcatVariationParam{uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ circle::TensorType::TensorType_INT32},
+ // Int64
+ ConcatVariationParam{uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ circle::TensorType::TensorType_INT64}));
+
TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
{
CircleGen cgen;
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(uniformTCD<float>(
- {
- // inputs
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // in2
- },
- {
- // outputs
- {1, 2, 3, 4, 5}, // s_out1
- {6, 7, 8, 9, 10}, // s_out2
- {11, 12, 13, 14, 15}, // s_out3
- {16, 17, 18, 19, 20}, // s_out4
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // c_out1
- {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2
- {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // a_out1
- {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2
- {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3
- {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3,
- 4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out
- }));
+ {
+ // inputs
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // in2
+ },
+ {
+ // outputs
+ {1, 2, 3, 4, 5}, // s_out1
+ {6, 7, 8, 9, 10}, // s_out2
+ {11, 12, 13, 14, 15}, // s_out3
+ {16, 17, 18, 19, 20}, // s_out4
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // c_out1
+ {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2
+ {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // a_out1
+ {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2
+ {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3
+ {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3,
+ 4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out
+ }));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Conv2D)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE, 1, 1);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+ {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_Stride)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2,
+ circle::ActivationFunctionType_NONE, 1, 1);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+ {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_Dilation)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE, 2, 2);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+ {{-52, 7}}));
+ _context->setBackends({"cpu", "ruy", "xnnpack"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE, 1, 1);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0,
+ circle::ActivationFunctionType_NONE, 1, 1);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE, 0, 0);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct DepthToSpaceVariationParam
+{
+ TestCaseData tcd;
+ circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class DepthToSpaceVariation : public GenModelTest,
+ public ::testing::WithParamInterface<DepthToSpaceVariationParam>
+{
+};
+
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, DepthToSpaceVariation,
+ ::testing::Values(
+ // Float
+ DepthToSpaceVariationParam{
+ uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})},
+ // Int32
+ DepthToSpaceVariationParam{
+ uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+ circle::TensorType::TensorType_INT32},
+ // Int64
+ DepthToSpaceVariationParam{
+ uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+ circle::TensorType::TensorType_INT64},
+ // Uint8
+ DepthToSpaceVariationParam{
+ uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+ circle::TensorType::TensorType_UINT8, 1.0f, -2},
+ // Int8
+ DepthToSpaceVariationParam{
+ uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+ circle::TensorType::TensorType_INT8, 1.0f, -2}));
+
+// Input shape: {1, 1, 2, 4}
+// Block size: 2
+// Output shape: {1, 2, 4, 1}
+TEST_P(DepthToSpaceVariation, Test)
+{
+ auto ¶m = GetParam();
+
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
+ cgen.addOperatorDepthToSpace({{in}, {out}}, 2);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthToSpace_Blocksize)
+{
+ CircleGen cgen;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+ int in = cgen.addTensor({{1, 1, 2, 4}, data_type});
+ int out = cgen.addTensor({{1, 2, 4, 1}, data_type});
+ cgen.addOperatorDepthToSpace({{in}, {out}}, -2);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(uniformTCD<float>({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}},
{{71, -34, 99, -20, 91, -26, 127, -4}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
SUCCEED();
}
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(uniformTCD<float>({{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
- 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+ 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}},
{{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
SUCCEED();
}
_context->addTestCase(uniformTCD<float>({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
{{4, 0, 3, 0, 0, 0, 2, 0, 1}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
SUCCEED();
}
CircleGen cgen;
uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
- 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
- 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
TEST_P(DepthwiseConv2DVariation, Test)
{
// Same input is used for all tests but output differs
- static const std::vector<uint8_t> input64{0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
- 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8,
- 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+ static const std::vector<uint8_t> input64{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+ 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
auto ¶m = GetParam();
_context = std::make_unique<GenModelTestContext>(genSimpleDepthwiseConv2DQuantizedModel(
- param.stride, param.input_depth, param.depth_multiplier));
+ param.stride, param.input_depth, param.depth_multiplier));
std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
_context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
// kernels.
INSTANTIATE_TEST_CASE_P(
- GenModelTest, DepthwiseConv2DVariation,
- ::testing::Values(
- // Stride == 1
- DepthwiseConv2DVariationParam{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DVariationParam{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
- DepthwiseConv2DVariationParam{
- 1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
- DepthwiseConv2DVariationParam{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
- DepthwiseConv2DVariationParam{1, 2, 1, std::vector<uint8_t>{2, 5}},
- DepthwiseConv2DVariationParam{1, 1, 2, std::vector<uint8_t>{2, 4}},
- DepthwiseConv2DVariationParam{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
- DepthwiseConv2DVariationParam{
- 1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
- DepthwiseConv2DVariationParam{1, 12, 1,
- std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
- // Stride == 2
- DepthwiseConv2DVariationParam{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
- DepthwiseConv2DVariationParam{2, 2, 1, std::vector<uint8_t>{2, 5}},
- DepthwiseConv2DVariationParam{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{
- 2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5,
- 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
- 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{
- 2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DVariationParam{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DVariationParam{
- 2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DVariationParam{
- 2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+ GenModelTest, DepthwiseConv2DVariation,
+ ::testing::Values(
+ // Stride == 1
+ DepthwiseConv2DVariationParam{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DVariationParam{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+ DepthwiseConv2DVariationParam{
+ 1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+ DepthwiseConv2DVariationParam{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
+ DepthwiseConv2DVariationParam{1, 2, 1, std::vector<uint8_t>{2, 5}},
+ DepthwiseConv2DVariationParam{1, 1, 2, std::vector<uint8_t>{2, 4}},
+ DepthwiseConv2DVariationParam{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
+ DepthwiseConv2DVariationParam{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DVariationParam{
+ 1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+ DepthwiseConv2DVariationParam{1, 12, 1,
+ std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+ // Stride == 2
+ DepthwiseConv2DVariationParam{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DVariationParam{2, 2, 1, std::vector<uint8_t>{2, 5}},
+ DepthwiseConv2DVariationParam{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DVariationParam{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+ 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+ 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DVariationParam{
+ 2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DVariationParam{
+ 2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DVariationParam{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DVariationParam{
+ 2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DVariationParam{
+ 2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
{
_context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
- static_cast<circle::Padding>(99), 1, 1, 1, circle::ActivationFunctionType_NONE));
+ static_cast<circle::Padding>(99), 1, 1, 1, circle::ActivationFunctionType_NONE));
_context->expectFailModelLoad();
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
SUCCEED();
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Elu)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorElu({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ uniformTCD<float>({{0, -6, 2, -4, 3, -2, 10, -0.1}},
+ {{0.0, -0.997521, 2.0, -0.981684, 3.0, -0.864665, 10.0, -0.0951626}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Elu_Type)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_UINT8}, 1.0f, 0);
+ int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorElu({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<float>({0.1, 0.3, 0.5, 0.7})
- .addInput<float>({0.1, 0.2, 0.3, 0.4})
- .addOutput<bool>({true, false, false, false}));
+ .addInput<float>({0.1, 0.3, 0.5, 0.7})
+ .addInput<float>({0.1, 0.2, 0.3, 0.4})
+ .addOutput<bool>({true, false, false, false}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_ExpandDims)
+{
+ CircleGen cgen;
+
+ std::vector<int32_t> axis_data{1};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorExpandDims({{in, axis}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<float>({0.1, 0.3, 0.5, 0.7}).addOutput<float>({0.1, 0.3, 0.5, 0.7}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ExpandDims_Int64AxisNeg)
+{
+ CircleGen cgen;
+
+ std::vector<int64_t> axis_data{-1};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT64, axis_buf});
+ int out = cgen.addTensor({{1, 4, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorExpandDims({{in, axis}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<float>({0.1, 0.3, 0.5, 0.7}).addOutput<float>({0.1, 0.3, 0.5, 0.7}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_neg_ExpandDims_Axis)
+{
+ CircleGen cgen;
+
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorExpandDims({{in, axis}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_neg_ExpandDims_AxisNegInput)
+{
+ CircleGen cgen;
+
+ int in = cgen.addTensor({{1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1, 1, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorExpandDims({{in, axis}, {out}});
+ cgen.setInputsAndOutputs({in, axis}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.5, 0.7})
+ .addInput<int32_t>({-5})
+ .addOutput<float>({0.1, 0.3, 0.5, 0.7})
+ .expectFailRun());
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
#include "GenModelTest.h"
-TEST_F(GenModelTest, OneOp_Fill_Int32)
+struct FillVariationParam
{
- CircleGen cgen;
- std::vector<int32_t> value_data{13};
- uint32_t value_buf = cgen.addBuffer(value_data);
-
- int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
- int value = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, value_buf});
- int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32});
- cgen.addOperatorFill({{in, value}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
+ TestCaseData tcd;
+ const uint8_t *value_data = nullptr;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+};
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
+class FillVariation : public GenModelTest, public ::testing::WithParamInterface<FillVariationParam>
+{
+};
-TEST_F(GenModelTest, OneOp_Fill_Int64)
+// value is constant
+TEST_P(FillVariation, Test)
{
+ auto ¶m = GetParam();
+
CircleGen cgen;
- std::vector<int64_t> value_data{13};
- uint32_t value_buf = cgen.addBuffer(value_data);
- int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
- int value = cgen.addTensor({{1}, circle::TensorType::TensorType_INT64, value_buf});
- int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64});
- cgen.addOperatorFill({{in, value}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
+ size_t value_size =
+ (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t);
+ uint32_t value_buf = cgen.addBuffer(param.value_data, value_size);
+
+ int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ int value = cgen.addTensor({{1}, param.data_type, value_buf});
+ int out = cgen.addTensor({{2, 3}, param.data_type});
+ cgen.addOperatorFill({{dims, value}, {out}});
+ cgen.setInputsAndOutputs({dims}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({13, 13, 13, 13, 13, 13}));
+ _context->addTestCase(param.tcd);
_context->setBackends({"cpu"});
SUCCEED();
}
-TEST_F(GenModelTest, OneOp_Fill_Float32)
+const int32_t test_int32 = 13;
+const int64_t test_int64 = 1052;
+const float test_float = 5.2;
+
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, FillVariation,
+ ::testing::Values(
+ // float value
+ FillVariationParam{
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}),
+ reinterpret_cast<const uint8_t *>(&test_float)},
+ // int32 value
+ FillVariationParam{
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}),
+ reinterpret_cast<const uint8_t *>(&test_int32), circle::TensorType::TensorType_INT32},
+ // uint8 value
+ FillVariationParam{
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({1052, 1052, 1052, 1052, 1052,
+ 1052}),
+ reinterpret_cast<const uint8_t *>(&test_int64), circle::TensorType::TensorType_INT64}));
+
+TEST_F(GenModelTest, OneOp_Fill_Int64_Shape)
{
CircleGen cgen;
std::vector<float> value_data{1.3};
uint32_t value_buf = cgen.addBuffer(value_data);
- int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64});
int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf});
int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorFill({{in, value}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
+ cgen.addOperatorFill({{dims, value}, {out}});
+ cgen.setInputsAndOutputs({dims}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
+ TestCaseData{}.addInput<int64_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
_context->setBackends({"cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}));
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}));
_context->setBackends({"cpu"});
_context->expectFailModelLoad();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({13, 13, 13, 13, 13, 13}));
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({13, 13, 13, 13, 13, 13}));
_context->setBackends({"cpu"});
_context->expectFailModelLoad();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
_context->setBackends({"cpu"});
_context->expectFailModelLoad();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
- _context->setBackends({"cpu", "acl_neon"});
+ uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
+ _context->setBackends({"cpu", "acl_neon", "xnnpack", "ruy"});
SUCCEED();
}
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
+ uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
_context->setBackends({"cpu"});
SUCCEED();
uint32_t bias_buf = cgen.addBuffer(bias_data);
int input = cgen.addTensor({{1, 4}, circle::TensorType::TensorType_FLOAT32});
CircleGen::SparsityParams sp{
- {0, 1, 2, 3},
- {0, 1},
- {{CircleGen::SparseDimensionType::DimensionType_DENSE, 1},
- {CircleGen::SparseDimensionType::DimensionType_SPARSE_CSR, {0, 2}, {0, 3}},
- {CircleGen::SparseDimensionType::DimensionType_DENSE, 16},
- {CircleGen::SparseDimensionType::DimensionType_DENSE, 1}}};
+ {0, 1, 2, 3},
+ {0, 1},
+ {{CircleGen::SparseDimensionType::DimensionType_DENSE, 1},
+ {CircleGen::SparseDimensionType::DimensionType_SPARSE_CSR, {0, 2}, {0, 3}},
+ {CircleGen::SparseDimensionType::DimensionType_DENSE, 16},
+ {CircleGen::SparseDimensionType::DimensionType_DENSE, 1}}};
int weight = cgen.addTensor({{16, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf}, sp);
int bias = cgen.addTensor({{16}, circle::TensorType::TensorType_FLOAT32, bias_buf});
int output = cgen.addTensor({{1, 16}, circle::TensorType::TensorType_FLOAT32});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
+ uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
_context->setBackends({"cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
- {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
+ {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "ruy"});
SUCCEED();
}
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
- {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
+ {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "ruy"});
_context->expectFailCompile();
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}},
- {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0,
- 0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}}));
+ uniformTCD<float>({{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}},
+ {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0,
+ 0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}}));
- _context->setBackends({"acl_cl", "acl_neon"});
+ uniformTCD<float>({{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}}));
+ _context->setBackends({"cpu", "acl_cl", "acl_neon"});
SUCCEED();
}
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon"});
+ _context->setBackends({"cpu", "acl_cl", "acl_neon"});
_context->expectFailModelLoad();
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->setBackends({"cpu"});
_context->addTestCase(uniformTCD<float>(
- {{0, -6, 2, 4, 3, -2, 10, 1}},
- {{-.00247565, -6.00247, -2.12692, -.126928, -.00671534, -5.00671, -.000123374, -9.00012}}));
+ {{0, -6, 2, 4, 3, -2, 10, 1}},
+ {{-.00247565, -6.00247, -2.12692, -.126928, -.00671534, -5.00671, -.000123374, -9.00012}}));
SUCCEED();
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleBuffer genSimpleMeanModel()
+{
+ CircleGen cgen;
+ uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{1, 2});
+ int in = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ int axis = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, axis_buf});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorMean({{in, axis}, {out}}, true);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen.finish();
+}
+
+TEST_F(GenModelTest, OneOp_Mean)
+{
+ auto model = genSimpleMeanModel();
+ _context = std::make_unique<GenModelTestContext>(std::move(model));
+ _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8, 9}}, {{5}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+CircleBuffer genWrongMeanModel()
+{
+ CircleGen cgen;
+ uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{1, 2});
+ int in = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_BOOL});
+ int axis = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, axis_buf});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_BOOL});
+ cgen.addOperatorMean({{in, axis}, {out}}, true);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen.finish();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Mean)
+{
+ auto model = genWrongMeanModel();
+ _context = std::make_unique<GenModelTestContext>(std::move(model));
+ _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8, 9}}, {{5}}));
+ _context->setBackends({"cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<int32_t>({1, 2, 0, 2})
- .addInput<float>({1})
- .addOutput<float>({0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1}));
+ .addInput<int32_t>({1, 2, 0, 2})
+ .addInput<float>({1})
+ .addOutput<float>({0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<int32_t>({1, 2, 0, 2})
- .addInput<float>({1})
- .addInput<float>({-1})
- .addOutput<float>({-1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1}));
+ .addInput<int32_t>({1, 2, 0, 2})
+ .addInput<float>({1})
+ .addInput<float>({-1})
+ .addOutput<float>({-1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<int32_t>({1, 2, 0, -1})
- .addInput<float>({1})
- .addOutput<float>({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}));
+ .addInput<int32_t>({1, 2, 0, -1})
+ .addInput<float>({1})
+ .addOutput<float>({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<int32_t>({1, 2, 0, -1})
- .addInput<float>({1})
- .addInput<float>({0})
- .addOutput<float>({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}));
+ .addInput<int32_t>({1, 2, 0, -1})
+ .addInput<float>({1})
+ .addInput<float>({0})
+ .addOutput<float>({0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}}));
+ uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
std::vector<float> padding_value_data{3.0};
uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
int padding_value =
- cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}}));
+ uniformTCD<float>({{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}}));
_context->setBackends({"cpu"});
SUCCEED();
std::vector<float> padding_value_data{3.0};
uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
int padding_value =
- cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
std::vector<float> padding_value_data{3.0};
uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
int padding_value =
- cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
std::vector<float> padding_value_data{3.0};
uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
int padding_value =
- cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+ cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}
- .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
- .addOutput<int32_t>({4}));
+ TestCaseData{}
+ .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
+ .addOutput<int32_t>({4}));
_context->setBackends({"cpu"});
SUCCEED();
cgen.addOperatorRank({{in}, {out}});
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<int32_t>(
- {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{4}}));
+ _context->addTestCase(
+ uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{4}}));
_context->setBackends({"cpu"});
SUCCEED();
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>(
- {{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}}));
+ _context->addTestCase(
+ uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{3, 4, 6, 10, 9, 10, 12, 16}},
- {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}}));
+ uniformTCD<float>({{3, 4, 6, 10, 9, 10, 12, 16}},
+ {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}}));
_context->setBackends({"acl_cl"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->setBackends({"acl_cl", "cpu"});
_context->addTestCase(uniformTCD<float>(
- {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}},
- {{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8, 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}}));
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}},
+ {{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8, 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}}));
SUCCEED();
}
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}
- .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
- .addOutput<int32_t>({1, 3, 3, 2}));
+ TestCaseData{}
+ .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
+ .addOutput<int32_t>({1, 3, 3, 2}));
_context->setBackends({"cpu"});
SUCCEED();
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- TestCaseData{}
- .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
- .addOutput<int64_t>({1, 3, 3, 2}));
+ TestCaseData{}
+ .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18})
+ .addOutput<int64_t>({1, 3, 3, 2}));
_context->setBackends({"cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6}, {3, 4, 7, 8}}));
+ uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6}, {3, 4, 7, 8}}));
_context->setBackends({"cpu", "acl_cl", "acl_neon"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<int32_t>({1})
- .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8})
- .addOutput<float>({1, 2, 5, 6})
- .addOutput<float>({3, 4, 7, 8}));
+ .addInput<int32_t>({1})
+ .addInput<float>({1, 2, 3, 4, 5, 6, 7, 8})
+ .addOutput<float>({1, 2, 5, 6})
+ .addOutput<float>({3, 4, 7, 8}));
_context->setBackends({"cpu"});
SUCCEED();
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleSqrtModel(circle::TensorType type)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, type});
+ int out = cgen.addTensor({{1, 2, 2, 1}, type});
+ cgen.addOperatorSqrt({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Sqrt_f32)
+{
+ CircleGen cgen = genSimpleSqrtModel(circle::TensorType::TensorType_FLOAT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<float>({1, 4, 9, 16}).addOutput<float>({1, 2, 3, 4}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Sqrt_i32)
+{
+ CircleGen cgen = genSimpleSqrtModel(circle::TensorType::TensorType_INT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}.addInput<int>({1, 4, 9, 16}).addOutput<float>({1, 2, 3, 4}));
+ _context->setBackends({"cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleSquareModel(circle::TensorType type)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, type});
+ int out = cgen.addTensor({{1, 2, 2, 1}, type});
+ cgen.addOperatorSquare({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Square_f32)
+{
+ CircleGen cgen = genSimpleSquareModel(circle::TensorType::TensorType_FLOAT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<float>({1, 4, 9, 16}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Square_i32)
+{
+ CircleGen cgen = genSimpleSquareModel(circle::TensorType::TensorType_INT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}.addInput<int>({1, 2, 3, 4}).addOutput<float>({1, 4, 9, 16}));
+ _context->setBackends({"cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
- uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}}));
+ uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}}));
_context->setBackends({"cpu"});
SUCCEED();
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>(
- {{11, 12, 13, 21, 22, 23}},
- {{11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23,
- 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23}}));
+ _context->addTestCase(
+ uniformTCD<float>({{11, 12, 13, 21, 22, 23}},
+ {{11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23,
+ 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23}}));
_context->setBackends({"cpu"});
SUCCEED();
cgen.setInputsAndOutputs({in, multiplies}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(TestCaseData{}
- .addInput<float>({11, 12, 13, 21, 22, 23})
- .addInput<int32_t>({2, 3, 1})
- .addOutput<float>({11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23,
- 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23,
- 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23}));
+ _context->addTestCase(
+ TestCaseData{}
+ .addInput<float>({11, 12, 13, 21, 22, 23})
+ .addInput<int32_t>({2, 3, 1})
+ .addOutput<float>({11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23,
+ 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23, 11, 12, 13, 21, 22, 23}));
_context->setBackends({"cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<float>({1, 2, 3, 4, 5, 6})
- .addInput<int32_t>({1, 2})
- .addOutput<float>({1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}));
+ .addInput<float>({1, 2, 3, 4, 5, 6})
+ .addInput<int32_t>({1, 2})
+ .addOutput<float>({1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}));
_context->setBackends({"cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(uniformTCD<float>(
- {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
- 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
- 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
- 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
- 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
- 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}},
- {{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44, 60, 61, 62,
- 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104, 5, 6, 7, 8, 9, 25,
- 26, 27, 28, 29, 45, 46, 47, 48, 49, 65, 66, 67, 68, 69, 85, 86, 87, 88,
- 89, 105, 106, 107, 108, 109, 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51,
- 52, 53, 54, 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
- 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59, 75, 76, 77,
- 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}}));
+ {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
+ 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
+ 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}},
+ {{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44, 60, 61, 62, 63, 64,
+ 80, 81, 82, 83, 84, 100, 101, 102, 103, 104, 5, 6, 7, 8, 9, 25, 26, 27, 28, 29,
+ 45, 46, 47, 48, 49, 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+ 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54, 70, 71, 72, 73, 74,
+ 90, 91, 92, 93, 94, 110, 111, 112, 113, 114, 15, 16, 17, 18, 19, 35, 36, 37, 38, 39,
+ 55, 56, 57, 58, 59, 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<float>({1, 2, 3, 4, 5, 6})
- .addInput<int32_t>({0, 2, 1, 3})
- .addOutput<float>({1, 4, 2, 5, 3, 6}));
+ .addInput<float>({1, 2, 3, 4, 5, 6})
+ .addInput<int32_t>({0, 2, 1, 3})
+ .addOutput<float>({1, 4, 2, 5, 3, 6}));
_context->setBackends({"cpu"});
SUCCEED();
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(TestCaseData{}
- .addInput<float>({1, 2, 3, 4, 5, 6})
- .addInput<int32_t>({})
- .addOutput<float>({1, 4, 2, 5, 3, 6}));
+ .addInput<float>({1, 2, 3, 4, 5, 6})
+ .addInput<int32_t>({})
+ .addOutput<float>({1, 4, 2, 5, 3, 6}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
$RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
RET=$?
if [[ $RET -ne 0 ]]; then
- echo "Profiling $MODEL aborted in run#$j... exit code: $RET"xX
+ echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
exit $RET
fi
echo "finished"
$RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
RET=$?
if [[ $RET -ne 0 ]]; then
- echo "Profiling $MODEL aborted in run#$j... exit code: $RET"xX
+ echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
exit $RET
fi
echo "finished"
$SHELL_CMD $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
RET=$?
if [[ $RET -ne 0 ]]; then
- echo "Profiling $MODEL aborted in run#$j... exit code: $RET"xX
+ echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
exit $RET
fi
echo "finished"
~Allocation() { free(data_); }
void *data() const { return data_; }
void *alloc(uint64_t sz) { return data_ = malloc(sz); }
+
private:
void *data_;
};
data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
else
throw std::runtime_error(
- "model input type is qasymm8, bool or uint8. But h5 data type is different.");
+ "model input type is qasymm8, bool or uint8. But h5 data type is different.");
break;
default:
throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
case NNFW_TYPE_TENSOR_FLOAT32:
{
H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
+ value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
break;
}
case NNFW_TYPE_TENSOR_INT32:
{
H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
break;
}
case NNFW_TYPE_TENSOR_INT64:
{
H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
break;
}
case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
{
H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
break;
}
case NNFW_TYPE_TENSOR_BOOL:
{
H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
break;
}
uint64_t bufsize_for(const nnfw_tensorinfo *ti)
{
static int elmsize[] = {
- sizeof(float), /* NNFW_TYPE_TENSOR_FLOAT32 */
- sizeof(int), /* NNFW_TYPE_TENSOR_INT32 */
- sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
- sizeof(bool), /* NNFW_TYPE_TENSOR_BOOL = 3 */
- sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
- sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
+ sizeof(float), /* NNFW_TYPE_TENSOR_FLOAT32 */
+ sizeof(int), /* NNFW_TYPE_TENSOR_INT32 */
+ sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
+ sizeof(bool), /* NNFW_TYPE_TENSOR_BOOL = 3 */
+ sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
+ sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
};
return elmsize[ti->dtype] * num_elems(ti);
// TODO Apply verbose level to phases
const int verbose = args.getVerboseLevel();
benchmark::Phases phases(
- benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+ benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
nnfw_session *session = nullptr;
NNPR_ENSURE_STATUS(nnfw_create_session(&session));
}
outputs[i].alloc(output_size_in_bytes);
NNPR_ENSURE_STATUS(
- nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
+ nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
}
// only warmup.
if (verbose == 0)
{
- phases.run("WARMUP",
- [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
- args.getWarmupRuns());
- phases.run("EXECUTE",
- [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
- args.getNumRuns(), true);
+ phases.run(
+ "WARMUP",
+ [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+ args.getWarmupRuns());
+ phases.run(
+ "EXECUTE",
+ [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+ args.getNumRuns(), true);
}
else
{
- phases.run("WARMUP",
- [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getWarmupRuns());
- phases.run("EXECUTE",
- [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getNumRuns(), true);
+ phases.run(
+ "WARMUP",
+ [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+ [&](const benchmark::Phase &phase, uint32_t nth) {
+ std::cout << "... "
+ << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+ << std::endl;
+ },
+ args.getWarmupRuns());
+ phases.run(
+ "EXECUTE",
+ [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+ [&](const benchmark::Phase &phase, uint32_t nth) {
+ std::cout << "... "
+ << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+ << std::endl;
+ },
+ args.getNumRuns(), true);
}
#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
std::exit(-1);
}
NNPR_ENSURE_STATUS(
- nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
+ nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
}
};
add_executable(tflite_loader_test_tool ${SOURCES})
target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS})
-target_link_libraries(tflite_loader_test_tool onert_core onert tflite_loader)
+target_link_libraries(tflite_loader_test_tool nnfw-dev)
target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite nnfw_lib_misc)
target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
* limitations under the License.
*/
-#include "tflite/ext/kernels/register.h"
-
#include "args.h"
-#include "tflite/InterpreterSession.h"
-#include "tflite/Assert.h"
-#include "tflite/Diff.h"
-#include "misc/tensor/IndexIterator.h"
-#include <iostream>
-#include <fstream>
+#include <nnfw_experimental.h>
+#include <nnfw_internal.h>
-#include "compiler/Compiler.h"
-#include "exec/Execution.h"
-#include "ir/Graph.h"
+#include <misc/EnvVar.h>
+#include <misc/RandomGenerator.h>
-#include "tflite_loader.h"
+#include <tflite/Assert.h>
+#include <tflite/InterpreterSession.h>
+#include <tflite/ext/kernels/register.h>
+#include <iostream>
+#include <fstream>
#include <memory>
const int RUN_FAILED = 1;
const int FILE_ERROR = 2;
const float DIFFERENCE_THRESHOLD = 10e-5;
+#define NNFW_ASSERT_FAIL(expr, msg) \
+ if ((expr) != NNFW_STATUS_NO_ERROR) \
+ { \
+ std::cerr << msg << std::endl; \
+ exit(-1); \
+ }
+
// Read vector of floats from selected file
-std::vector<float> readData(const string &path)
+void readData(const string &path, std::vector<uint8_t> &dest)
{
std::ifstream in(path);
if (!in.good())
in.seekg(0, std::ifstream::end);
size_t len = in.tellg();
in.seekg(0, std::ifstream::beg);
- assert(len % sizeof(float) == 0);
- size_t size = len / sizeof(float);
- std::vector<float> vec(size);
- for (size_t i = 0; i < size; ++i)
+
+ assert(dest.size() == len);
+ in.read(reinterpret_cast<char *>(dest.data()), len);
+}
+
+template <typename T>
+void randomData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+ size_t elements = dest.size() / sizeof(T);
+ assert(dest.size() % sizeof(T) == 0);
+
+ std::vector<T> vec(elements);
+ for (uint64_t i = 0; i < elements; i++)
{
- in.read(reinterpret_cast<char *>(&vec[i]), sizeof(float));
+ vec[i] = randgen.generate<T>();
}
- return vec;
+ memcpy(dest.data(), vec.data(), elements * sizeof(T));
}
-std::vector<float> randomData(nnfw::misc::RandomGenerator &randgen, const uint64_t size)
+void randomBoolData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
{
- std::vector<float> vec(size);
- for (uint64_t i = 0; i < size; i++)
+ size_t elements = dest.size();
+ std::vector<uint8_t> vec(elements);
+ for (uint64_t i = 0; i < elements; i++)
{
- vec[i] = randgen.generate<float>();
+ bool value = randgen.generate<bool>();
+ dest[i] = value ? 1 : 0;
}
- return vec;
}
-void executeGraph(const std::shared_ptr<onert::ir::Graph> &g,
- const std::vector<std::vector<float>> &inputs,
- std::vector<std::vector<float>> &outputs)
+inline uint64_t num_elems(const nnfw_tensorinfo *ti)
{
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, g);
- auto compiler = new onert::compiler::Compiler(subgs);
- std::shared_ptr<onert::exec::ExecutorMap> executors;
- // Compilation
- try
+ uint64_t n = 1;
+ for (uint32_t i = 0; i < ti->rank; ++i)
{
- executors = compiler->compile();
+ n *= ti->dims[i];
}
- catch (const std::exception &e)
+ return n;
+}
+
+inline size_t sizeOfNnfwType(NNFW_TYPE type)
+{
+ switch (type)
{
- std::cerr << "[Execution] Can't compile model" << std::endl;
- std::cerr << e.what() << std::endl;
- exit(-1);
+ case NNFW_TYPE_TENSOR_BOOL:
+ case NNFW_TYPE_TENSOR_UINT8:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ return 1;
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ case NNFW_TYPE_TENSOR_INT32:
+ return 4;
+ case NNFW_TYPE_TENSOR_INT64:
+ return 8;
+ default:
+ throw std::runtime_error{"Invalid tensor type"};
}
+}
- std::cout << "[Execution] Graph compiled!" << std::endl;
-
- auto execution = std::make_shared<onert::exec::Execution>(executors);
-
- // Setting IO
- try
+template <typename T>
+bool compareBuffersExact(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+ bool match = true;
+ for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
{
- // Verify input shapes
- auto num_inputs = inputs.size();
- for (size_t i = 0; i < num_inputs; i++)
- {
- auto input_operand_idx = g->getInputs().at(i);
- auto input_shape = g->operands().at(input_operand_idx).shape();
- assert(inputs[i].size() == input_shape.num_elements());
- }
+ T ref = ref_buf[e];
+ T act = reinterpret_cast<const T *>(act_buf.data())[e];
- // Set output shapes
- auto num_outputs = g->getOutputs().size();
- outputs.resize(num_outputs);
- for (uint32_t i = 0; i < num_outputs; i++)
+ if (ref != act)
{
- auto output_operand_idx = g->getOutputs().at(i);
- auto output_shape = g->operands().at(output_operand_idx).shape();
- outputs[i].resize(output_shape.num_elements());
+ std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+ << ", act: " << act << std::endl;
+ match = false;
}
-
- for (size_t i = 0; i < num_inputs; i++)
- execution->setInput(onert::ir::IOIndex(i), inputs[i].data(),
- inputs[i].size() * sizeof(float));
- for (uint32_t i = 0; i < num_outputs; i++)
- execution->setOutput(onert::ir::IOIndex(i), outputs[i].data(),
- outputs[i].size() * sizeof(float));
- }
- catch (const std::exception &e)
- {
- std::cerr << "[Execution] Can't set model IO" << std::endl;
- std::cerr << e.what() << '\n';
- exit(-1);
}
- try
- {
- execution->execute();
- }
- catch (const std::exception &e)
+ return match;
+}
+
+bool compareBuffersExactBool(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf,
+ uint32_t index)
+{
+ bool match = true;
+ for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
{
- std::cerr << "[Execution] Can't execute" << std::endl;
- std::cerr << e.what() << '\n';
- exit(-1);
+ uint8_t ref_raw = ref_buf[e];
+ bool ref = (ref_raw != 0 ? true : false);
+ uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
+ bool act = (act_raw != 0 ? true : false);
+ if (ref != act)
+ {
+ std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+ << ", act: " << act << std::endl;
+ match = false;
+ }
}
- std::cout << "[Execution] Done!" << std::endl;
-
- delete compiler;
+ return match;
}
int main(const int argc, char **argv)
}
std::cout << "[Execution] Stage start!" << std::endl;
- std::shared_ptr<onert::ir::Graph> test_graph;
// Loading
- try
+ nnfw_session *onert_session = nullptr;
+ NNFW_ASSERT_FAIL(nnfw_create_session(&onert_session), "[ ERROR ] Failure during model load");
+ if (onert_session == nullptr)
{
- test_graph =
- onert::tflite_loader::loadModel(tflite_file.c_str())->at(onert::ir::SubgraphIndex{0});
- }
- catch (std::exception &e)
- {
- std::cerr << "[ ERROR ] "
- << "Failure during model load" << std::endl;
- std::cerr << e.what() << std::endl;
+ std::cerr << "[ ERROR ] Failure to open session" << std::endl;
exit(-1);
}
- // TODO: Support another input/output types
- for (const auto &input_idx : test_graph->getInputs())
- {
- const auto input_type = test_graph->operands().at(input_idx).typeInfo().type();
- assert(input_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 inputs are supported");
- }
- for (const auto &output_idx : test_graph->getOutputs())
- {
- const auto output_type = test_graph->operands().at(output_idx).typeInfo().type();
- assert(output_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 outputs are supported");
- }
+ NNFW_ASSERT_FAIL(nnfw_load_model_from_modelfile(onert_session, tflite_file.c_str()),
+ "[ ERROR ] Failure during model load");
+
+ uint32_t num_inputs;
+ uint32_t num_outputs;
+ NNFW_ASSERT_FAIL(nnfw_input_size(onert_session, &num_inputs),
+ "[ ERROR ] Failure during get model inputs");
+ NNFW_ASSERT_FAIL(nnfw_output_size(onert_session, &num_outputs),
+ "[ ERROR ] Failure during get model outputs");
std::cout << "[Execution] Model is deserialized!" << std::endl;
- auto num_inputs = test_graph->getInputs().size();
- std::vector<std::vector<float>> inputs(num_inputs);
+
+ // Compile
+ nnfw_prepare(onert_session);
+
+ std::cout << "[Execution] Model compiled!" << std::endl;
+
+ // Prepare input/output data
+ std::vector<std::vector<uint8_t>> inputs(num_inputs);
+ std::vector<std::vector<uint8_t>> outputs(num_outputs);
+
bool generate_data = data_files.empty();
bool read_data = data_files.size() == num_inputs;
- if (num_inputs == 0)
- {
- std::cerr << "[ ERROR ] "
- << "No inputs in model => execution is not possible" << std::endl;
- exit(1);
- }
if (!generate_data && !read_data)
{
std::cerr << "[ ERROR ] "
const int seed = 1; /* TODO Add an option for seed value */
nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
- try
+
+ for (uint32_t i = 0; i < num_inputs; i++)
{
- for (uint32_t i = 0; i < num_inputs; i++)
+ nnfw_tensorinfo ti_input;
+ NNFW_ASSERT_FAIL(nnfw_input_tensorinfo(onert_session, i, &ti_input),
+ "[ ERROR ] Failure during get input data info");
+ size_t input_size = num_elems(&ti_input) * sizeOfNnfwType(ti_input.dtype);
+
+ inputs[i].resize(input_size);
+
+ if (generate_data)
{
- if (generate_data)
+ switch (ti_input.dtype)
{
- uint64_t sz =
- test_graph->operands().at(test_graph->getInputs().at(i)).shape().num_elements();
- inputs[i] = randomData(randgen, sz);
+ case NNFW_TYPE_TENSOR_BOOL:
+ randomBoolData(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_UINT8:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ randomData<uint8_t>(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ randomData<int8_t>(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ randomData<float>(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_INT32:
+ randomData<int32_t>(randgen, inputs[i]);
+ break;
+ case NNFW_TYPE_TENSOR_INT64:
+ randomData<uint64_t>(randgen, inputs[i]);
+ break;
+ default:
+ std::cerr << "[ ERROR ] "
+ << "Unspported input data type" << std::endl;
+ exit(-1);
+ break;
}
- else /* read_data */
- inputs[i] = readData(data_files[i]);
}
+ else /* read_data */
+ readData(data_files[i], inputs[i]);
+
+ NNFW_ASSERT_FAIL(nnfw_set_input(onert_session, i, ti_input.dtype, inputs[i].data(), input_size),
+ "[ ERROR ] Failure to set input tensor buffer");
}
- catch (std::exception &e)
+
+ std::cout << "[Execution] Input data is defined!" << std::endl;
+
+ for (uint32_t i = 0; i < num_outputs; i++)
{
- std::cerr << "[ ERROR ] "
- << "Failure during input data generation" << std::endl;
- std::cerr << e.what() << std::endl;
- exit(-1);
+ nnfw_tensorinfo ti_output;
+ NNFW_ASSERT_FAIL(nnfw_output_tensorinfo(onert_session, i, &ti_output),
+ "[ ERROR ] Failure during get output tensor info");
+
+ uint64_t output_elements = num_elems(&ti_output);
+ size_t output_size = output_elements * sizeOfNnfwType(ti_output.dtype);
+ outputs[i].resize(output_size);
+
+ NNFW_ASSERT_FAIL(
+ nnfw_set_output(onert_session, i, ti_output.dtype, outputs[i].data(), output_size),
+ "[ ERROR ] Failure to set output tensor buffer");
}
- std::cout << "[Execution] Input data is defined!" << std::endl;
- std::vector<std::vector<float>> outputs;
- // Run graph
- executeGraph(test_graph, inputs, outputs);
+ // Execute
+ NNFW_ASSERT_FAIL(nnfw_run(onert_session), "[Execution] Can't execute");
+
+ std::cout << "[Execution] Done!" << std::endl;
+
// Compare with tflite
std::cout << "[Comparison] Stage start!" << std::endl;
// Read tflite model
std::cerr << e.what() << std::endl;
exit(FILE_ERROR);
}
- interpreter->SetNumThreads(2);
+ interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
sess->prepare();
for (uint32_t i = 0; i < num_inputs; i++)
{
auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
- memcpy(input_tensor->data.f, inputs[i].data(), inputs[i].size() * sizeof(float));
+ memcpy(input_tensor->data.uint8, inputs[i].data(), inputs[i].size());
}
if (!sess->run())
{
std::cout << "[Comparison] TFLite run done!" << std::endl;
// Calculate max difference over all outputs
- float max_difference = 0.0f;
- auto num_outputs = test_graph->getOutputs().size();
+ float max_float_difference = 0.0f;
+ bool find_unmatched_output = false;
+
for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
{
- const auto &tflite_output_tensor = interpreter->tensor(interpreter->outputs().at(out_idx));
- const auto &nnfw_output_tensor = outputs[out_idx];
-
- if (nnfw_output_tensor.size() != tflite_output_tensor->bytes / sizeof(float))
- std::cout << "[Comparison] Different size of outputs!" << std::endl;
- // Check max difference
- float *tflite_out_ptr = tflite_output_tensor->data.f;
- for (const auto &nnfw_out : nnfw_output_tensor)
- {
- if (std::abs(nnfw_out - *tflite_out_ptr) > max_difference)
- max_difference = std::abs(nnfw_out - *tflite_out_ptr);
+ nnfw_tensorinfo ti;
+ nnfw_output_tensorinfo(onert_session, out_idx, &ti);
+
+ bool matched = true;
+ // Check output tensor values
+
+ const auto &ref_output = interpreter->tensor(interpreter->outputs().at(out_idx))->data;
+ const auto &output = outputs[out_idx];
- tflite_out_ptr++;
+ switch (ti.dtype)
+ {
+ case NNFW_TYPE_TENSOR_BOOL:
+ matched = compareBuffersExactBool(ref_output.uint8, output, out_idx);
+ break;
+ case NNFW_TYPE_TENSOR_UINT8:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ matched = compareBuffersExact<uint8_t>(ref_output.uint8, output, out_idx);
+ break;
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ matched = compareBuffersExact<int8_t>(ref_output.int8, output, out_idx);
+ break;
+ case NNFW_TYPE_TENSOR_INT32:
+ matched = compareBuffersExact<int32_t>(ref_output.i32, output, out_idx);
+ break;
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ // TODO better way for handling FP error?
+ for (uint32_t e = 0; e < num_elems(&ti); e++)
+ {
+ float refval = ref_output.f[e];
+ float val = reinterpret_cast<const float *>(output.data())[e];
+ if (std::abs(refval - val) > max_float_difference)
+ max_float_difference = std::abs(refval - val);
+
+ if (max_float_difference > DIFFERENCE_THRESHOLD)
+ matched = false;
+ }
+ break;
+ case NNFW_TYPE_TENSOR_INT64:
+ matched = compareBuffersExact<int64_t>(ref_output.i64, output, out_idx);
+ break;
+ default:
+ throw std::runtime_error{"Invalid tensor type"};
}
+
+ if (!matched)
+ find_unmatched_output = true;
}
// Print results
- std::cout << "[Comparison] Max difference: " << max_difference << std::endl;
+ std::cout << "[Comparison] Max float difference: " << max_float_difference << std::endl;
int ret = 0;
- if (max_difference > DIFFERENCE_THRESHOLD)
+ if (find_unmatched_output)
{
- std::cout << "[Comparison] Outputs is not equal!" << std::endl;
+ std::cout << "[Comparison] outputs is not equal!" << std::endl;
+ if (max_float_difference > DIFFERENCE_THRESHOLD)
+ {
+ std::cout << "[Comparison] Float outputs is not equal!" << std::endl;
+ }
ret = 1;
}
else
}
std::cout << "[Comparison] Done!" << std::endl;
+ nnfw_close_session(onert_session);
+
return ret;
}
#include "bin_image.h"
BinImage::BinImage(unsigned int width, unsigned int height, unsigned int channels)
- : _width(width), _height(height), _channels(channels)
+ : _width(width), _height(height), _channels(channels)
{
}
{
TensorLoader::TensorLoader(tflite::Interpreter &interpreter)
- : _interpreter(interpreter), _raw_data(nullptr)
+ : _interpreter(interpreter), _raw_data(nullptr)
{
}
// TODO Apply verbose level to phases
const int verbose = args.getVerboseLevel();
benchmark::Phases phases(
- benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+ benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
std::unique_ptr<FlatBufferModel> model;
std::unique_ptr<Interpreter> interpreter;
for (uint32_t axis = 0; axis < tensor->dims->size; axis++, offset++)
{
new_dim[axis] =
- ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]);
+ ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]);
}
interpreter->ResizeInputTensor(id, new_dim);
int32_t value = 0;
nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- // Gather operation: index should be within input coverage.
- tensor_view.at(ind) = value;
- value++;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ // Gather operation: index should be within input coverage.
+ tensor_view.at(ind) = value;
+ value++;
+ };
}
else if (tensor->type == kTfLiteUInt8)
{
auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<uint8_t>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<uint8_t>);
const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
std::bind(fp, randgen, _1, _2));
nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
- tensor_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
+ tensor_view.at(ind) = value;
+ };
}
else if (tensor->type == kTfLiteBool)
{
auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<bool>);
const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
std::bind(fp, randgen, _1, _2));
nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
- tensor_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
+ tensor_view.at(ind) = value;
+ };
}
else
{
// only warmup.
if (verbose == 0)
{
- phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
- args.getWarmupRuns());
- phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
- args.getNumRuns(), true);
+ phases.run(
+ "WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, args.getWarmupRuns());
+ phases.run(
+ "EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, args.getNumRuns(), true);
}
else
{
- phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getWarmupRuns());
- phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getNumRuns(), true);
+ phases.run(
+ "WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
+ [&](const benchmark::Phase &phase, uint32_t nth) {
+ std::cout << "... "
+ << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+ << std::endl;
+ },
+ args.getWarmupRuns());
+ phases.run(
+ "EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
+ [&](const benchmark::Phase &phase, uint32_t nth) {
+ std::cout << "... "
+ << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" << std::endl;
+ },
+ args.getNumRuns(), true);
}
sess->teardown();
// TODO Apply verbose level to phases
const int verbose = args.getVerboseLevel();
benchmark::Phases phases(
- benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+ benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
std::unique_ptr<tflite::FlatBufferModel> model;
std::unique_ptr<tflite::Interpreter> interpreter;
}
else
{
- model = tflite::FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(),
- &error_reporter);
+ model =
+ tflite::FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
}
if (model == nullptr)
{
int32_t value = 0;
nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- // Gather operation: index should be within input coverage.
- tensor_view.at(ind) = value;
- value++;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ // Gather operation: index should be within input coverage.
+ tensor_view.at(ind) = value;
+ value++;
+ };
}
else if (tensor->type == kTfLiteUInt8)
{
uint8_t value = 0;
nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tensor_view.at(ind) = value;
- value = (value + 1) & 0xFF;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ tensor_view.at(ind) = value;
+ value = (value + 1) & 0xFF;
+ };
}
else if (tensor->type == kTfLiteBool)
{
auto tensor_view = TFLiteVanillaRun::TensorView<bool>::make(*interpreter, o);
auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
+ const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+ &nnfw::misc::RandomGenerator::generate<bool>);
const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
std::bind(fp, randgen, _1, _2));
nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
- tensor_view.at(ind) = value;
- };
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
+ tensor_view.at(ind) = value;
+ };
}
else
{
// only warmup.
if (verbose == 0)
{
- phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
- args.getWarmupRuns());
- phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
- args.getNumRuns(), true);
+ phases.run(
+ "WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
+ args.getWarmupRuns());
+ phases.run(
+ "EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
+ args.getNumRuns(), true);
}
else
{
- phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getWarmupRuns());
- phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getNumRuns(), true);
+ phases.run(
+ "WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
+ [&](const benchmark::Phase &phase, uint32_t nth) {
+ std::cout << "... "
+ << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+ << std::endl;
+ },
+ args.getWarmupRuns());
+ phases.run(
+ "EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
+ [&](const benchmark::Phase &phase, uint32_t nth) {
+ std::cout << "... "
+ << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" << std::endl;
+ },
+ args.getNumRuns(), true);
}
std::cout << "output tensor indices = [";
--- /dev/null
+../.clang-format.8
\ No newline at end of file
# other development supports
__UbuntuPackages+=" ocl-icd-opencl-dev"
__UbuntuPackages+=" libhdf5-dev"
-__UbuntuBoostPackages=" llibboost-all-dev"
+__UbuntuBoostPackages=" libboost-all-dev"
# symlinks fixer
__UbuntuPackages+=" symlinks"
} // namespace
-#define NONIUS_LOCAL_BENCHMARK(name, ...) \
- namespace \
- { \
- static ::nonius::benchmark_registrar \
- NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
- __VA_ARGS__); \
+#define NONIUS_LOCAL_BENCHMARK(name, ...) \
+ namespace \
+ { \
+ static ::nonius::benchmark_registrar \
+ NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
}
NONIUS_LOCAL_BENCHMARK("CLDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) {
} // namespace
-#define NONIUS_LOCAL_BENCHMARK(name, ...) \
- namespace \
- { \
- static ::nonius::benchmark_registrar \
- NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
- __VA_ARGS__); \
+#define NONIUS_LOCAL_BENCHMARK(name, ...) \
+ namespace \
+ { \
+ static ::nonius::benchmark_registrar \
+ NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
}
NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayer_NCHW", [](nonius::chronometer meter) {
} // namespace
-#define NONIUS_LOCAL_BENCHMARK(name, ...) \
- namespace \
- { \
- static ::nonius::benchmark_registrar \
- NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
- __VA_ARGS__); \
+#define NONIUS_LOCAL_BENCHMARK(name, ...) \
+ namespace \
+ { \
+ static ::nonius::benchmark_registrar \
+ NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
}
NONIUS_LOCAL_BENCHMARK("NEDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) {
} // namespace
-#define NONIUS_LOCAL_BENCHMARK(name, ...) \
- namespace \
- { \
- static ::nonius::benchmark_registrar \
- NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
- __VA_ARGS__); \
+#define NONIUS_LOCAL_BENCHMARK(name, ...) \
+ namespace \
+ { \
+ static ::nonius::benchmark_registrar \
+ NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
}
NONIUS_LOCAL_BENCHMARK("NEDeconvolutionLayer_NCHW", [](nonius::chronometer meter) {
# See the License for the specific language governing permissions and
# limitations under the License.
-import os
import argparse
+from os.path import dirname, realpath, join
class Backend:
class KernelReporter(object):
def __init__(self, args):
- # TODO: Remove os defendency - '/'
- if args.base[0] != '/':
- self.onertBase = os.getcwd() + '/' + args.base
- else:
- self.onertBase = args.base
+ root_path = dirname(dirname(dirname(realpath(__file__))))
+ self.onertBase = join(root_path, "runtime", "onert")
if args.md5:
self.printMD5 = True
else:
default='cpu,acl_cl,acl_neon',
help="backend list to report (use comma)")
arg_parser.add_argument("--md5", action='store_true', help="Print for md5")
- arg_parser.add_argument("base", type=str, help="onert base directory")
args = arg_parser.parse_args()
report = KernelReporter(args)
+++ /dev/null
-if(NOT BUILD_NNAPI_QUICKCHECK)
- return()
-endif(NOT BUILD_NNAPI_QUICKCHECK)
-
-file(GLOB_RECURSE NNAPI_QUICKCHECK_LIB_SOURCES "lib/*.cpp")
-file(GLOB_RECURSE NNAPI_QUICKCHECK_LIB_TESTS "lib/*.test.cpp")
-list(REMOVE_ITEM NNAPI_QUICKCHECK_LIB_SOURCES ${NNAPI_QUICKCHECK_LIB_TESTS})
-
-add_library(nnapi_quickcheck_common ${NNAPI_QUICKCHECK_LIB_SOURCES})
-target_include_directories(nnapi_quickcheck_common PUBLIC "inc")
-target_link_libraries(nnapi_quickcheck_common nnfw_lib_misc)
-target_link_libraries(nnapi_quickcheck_common nnfw_lib_tflite)
-
-add_executable(nnapi_quickcheck_lib_env_test "lib/env.test.cpp")
-target_link_libraries(nnapi_quickcheck_lib_env_test nnapi_quickcheck_common)
-
-function(add_nnapi_quickcheck NAME)
- add_executable(nnapi_quickcheck_${NAME} "tests/${NAME}.cpp")
- nnfw_find_package(GTest)
- target_link_libraries(nnapi_quickcheck_${NAME} gtest gtest_main pthread)
- target_link_libraries(nnapi_quickcheck_${NAME} nnapi_quickcheck_common)
-endfunction(add_nnapi_quickcheck)
-
-add_nnapi_quickcheck(add_1)
-add_nnapi_quickcheck(add_2)
-add_nnapi_quickcheck(add_3)
-add_nnapi_quickcheck(add_4)
-add_nnapi_quickcheck(add_5)
-add_nnapi_quickcheck(add_6)
-add_nnapi_quickcheck(add_7)
-add_nnapi_quickcheck(add_8)
-add_nnapi_quickcheck(add_9)
-add_nnapi_quickcheck(add_quan_1)
-add_nnapi_quickcheck(div_1)
-add_nnapi_quickcheck(div_2)
-add_nnapi_quickcheck(sub_1)
-add_nnapi_quickcheck(sub_2)
-add_nnapi_quickcheck(sub_3)
-add_nnapi_quickcheck(sub_4)
-add_nnapi_quickcheck(sub_5)
-add_nnapi_quickcheck(sub_6)
-add_nnapi_quickcheck(mul_1)
-add_nnapi_quickcheck(mul_2)
-add_nnapi_quickcheck(mul_quan_1)
-add_nnapi_quickcheck(relu_1)
-add_nnapi_quickcheck(relu_quan_1)
-add_nnapi_quickcheck(relu_2)
-add_nnapi_quickcheck(relu_3)
-add_nnapi_quickcheck(relu6_1)
-add_nnapi_quickcheck(relu6_quan_1)
-add_nnapi_quickcheck(relu1_1)
-add_nnapi_quickcheck(conv_1)
-add_nnapi_quickcheck(conv_quan_1)
-add_nnapi_quickcheck(dconv_1)
-add_nnapi_quickcheck(dconv_quan_1)
-add_nnapi_quickcheck(max_pool_1)
-add_nnapi_quickcheck(max_pool_quan_1)
-add_nnapi_quickcheck(avg_pool_1)
-add_nnapi_quickcheck(avg_pool_quan_1)
-add_nnapi_quickcheck(concat_1)
-add_nnapi_quickcheck(concat_quan_1)
-add_nnapi_quickcheck(reshape_1)
-add_nnapi_quickcheck(reshape_quan_1)
-add_nnapi_quickcheck(fully_connected_1)
-add_nnapi_quickcheck(fully_connected_quan_1)
-add_nnapi_quickcheck(softmax_1)
-add_nnapi_quickcheck(softmax_2)
-add_nnapi_quickcheck(softmax_quan_1)
-add_nnapi_quickcheck(resize_bilinear_1)
-add_nnapi_quickcheck(topk_v2_1)
-add_nnapi_quickcheck(cast_1)
-add_nnapi_quickcheck(cast_q_to_f_1)
-add_nnapi_quickcheck(cast_2)
-add_nnapi_quickcheck(gather_1)
-add_nnapi_quickcheck(gather_2)
-add_nnapi_quickcheck(dequantize_1)
-add_nnapi_quickcheck(tanh_1)
-add_nnapi_quickcheck(logistic_quan_1)
-add_nnapi_quickcheck(split_1)
-add_nnapi_quickcheck(split_2)
-add_nnapi_quickcheck(split_3)
-add_nnapi_quickcheck(split_4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ENV_UTILS_H__
-#define __ENV_UTILS_H__
-
-#include <string>
-
-#include <cstdint>
-
-class IntVar
-{
-public:
- IntVar(const std::string &name, int32_t value);
-
-public:
- int32_t operator()(void) const { return _value; }
-
-private:
- int32_t _value;
-};
-
-class FloatVar
-{
-public:
- FloatVar(const std::string &name, float value);
-
-public:
- float operator()(void) const { return _value; }
-
-private:
- float _value;
-};
-
-class StrVar
-{
-public:
- StrVar(const std::string &name, const std::string &value);
-
-public:
- const std::string &operator()(void) const { return _value; }
-
-private:
- std::string _value;
-};
-
-#endif // __ENV_UTILS_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "env.h"
-
-#include "misc/environment.h"
-
-//
-// Integer variable
-//
-IntVar::IntVar(const std::string &name, int32_t value) : _value{value}
-{
- nnfw::misc::env::IntAccessor{name}.access(_value);
-}
-
-//
-// Float variable
-//
-FloatVar::FloatVar(const std::string &name, float value) : _value{value}
-{
- nnfw::misc::env::FloatAccessor{name}.access(_value);
-}
-
-//
-// String variable
-//
-#include <cstdlib>
-
-StrVar::StrVar(const std::string &name, const std::string &value) : _value{value}
-{
- auto env = std::getenv(name.c_str());
-
- if (env)
- {
- _value = std::string{env};
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_1.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
- quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Left from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_2, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_2.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure left data
- const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
- float left_data[left_size] = {
- 0.0f,
- };
-
- // Fill left data with random data
- {
- std::normal_distribution<float> left_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < left_size; ++off)
- {
- left_data[off++] = left_dist(random);
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization,
- reinterpret_cast<const char *>(left_data),
- left_size * sizeof(float));
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
- quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read LHS from Tensor #1
- // - Read RHS from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/TensorShapeUtils.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_3, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
-#define STR_VALUE(NAME, VALUE) StrVar NAME##_Value(#NAME, VALUE);
-#include "add_3.lst"
-#undef STR_VALUE
-
- const auto LHS_SHAPE = nnfw::misc::tensor::Shape::from(LHS_SHAPE_Value());
- const auto RHS_SHAPE = nnfw::misc::tensor::Shape::from(RHS_SHAPE_Value());
- const auto OUT_SHAPE = nnfw::tflite::broadcast(LHS_SHAPE, RHS_SHAPE);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LHS_SHAPE);
- PRINT_VALUE(RHS_SHAPE);
- PRINT_VALUE(OUT_SHAPE);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- using nnfw::tflite::as_dims;
-
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- as_dims(OUT_SHAPE), quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- as_dims(LHS_SHAPE), quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- as_dims(RHS_SHAPE), quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Left from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = 0;
- param.tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(param.verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(param.tolerance);
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef STR_VALUE
-#error "STR_VALUE should be defined"
-#endif // STR_VALUE
-
-STR_VALUE(LHS_SHAPE, "1,3,16,16")
-STR_VALUE(RHS_SHAPE, "1,3,16,16")
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_4, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_4.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
- quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Left from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 2)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 8)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 2)
-INT_VALUE(RIGHT_H, 1)
-INT_VALUE(RIGHT_W, 8)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_5, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_5.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT = RIGHT_Value();
-
- const int32_t OFM_N = LEFT_N;
- const int32_t OFM_C = LEFT_C;
- const int32_t OFM_H = LEFT_H;
- const int32_t OFM_W = LEFT_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT} /* dims */, quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Left from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_6, simple_test)
-{
- int verbose = 1;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_6.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT = RIGHT_Value();
-
- const int32_t OFM_H = LEFT_H;
- const int32_t OFM_W = LEFT_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_H, LEFT_W} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT} /* dims */, quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Left from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 2)
-
-INT_VALUE(RIGHT, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_7, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_7.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_C = LEFT_C;
- const int32_t OFM_H = LEFT_H;
- const int32_t OFM_W = LEFT_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Left from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 8)
-INT_VALUE(RIGHT_W, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_8, simple_test)
-{
- int verbose = 1;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_8.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure left data
- const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
- const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W;
- float left_data[left_size] = {
- 0.0f,
- };
- float right_data[right_size] = {
- 0.0f,
- };
-
- // Fill left data with random data
- {
- std::normal_distribution<float> left_dist(-1.0f, +1.0f);
- int value = 10;
- for (uint32_t off = 0; off < left_size; ++off)
- {
- left_data[off] = value;
- std::cout << left_data[off] << std::endl;
- }
- value = 1;
- for (uint32_t off = 0; off < right_size; ++off)
- {
- right_data[off] = value++;
- std::cout << right_data[off] << std::endl;
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization,
- reinterpret_cast<const char *>(left_data),
- left_size * sizeof(float));
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(
- 2, kTfLiteFloat32 /* type */, "right" /* name */, {RIGHT_C} /* dims */, quantization,
- //{RIGHT_W, RIGHT_C} /* dims */, quantization,
- reinterpret_cast<const char *>(right_data), right_size * sizeof(float));
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read LHS from Tensor #1
- // - Read RHS from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_H, 3)
-INT_VALUE(LEFT_W, 2)
-INT_VALUE(LEFT_C, 4)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_H, 1)
-INT_VALUE(RIGHT_W, 1)
-INT_VALUE(RIGHT_C, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_9, simple_test)
-{
- int verbose = 1;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_9.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_VALUE(LEFT_C);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_VALUE(RIGHT_C);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
- PRINT_VALUE(OFM_C);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure left data
- const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
- const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W;
- float left_data[left_size] = {
- 0.0f,
- };
- float right_data[right_size] = {
- 0.0f,
- };
-
- // Fill left data with random data
- {
- std::normal_distribution<float> left_dist(-1.0f, +1.0f);
- float value = 10.0f;
- for (uint32_t off = 0; off < left_size; ++off)
- {
- left_data[off] = value;
- }
- value = 1.0f;
- for (uint32_t off = 0; off < right_size; ++off)
- {
- right_data[off] = value++;
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(
- 1, kTfLiteFloat32 /* type */, "left" /* name */, {LEFT_W, LEFT_C} /* dims */, quantization,
- reinterpret_cast<const char *>(left_data), left_size * sizeof(float));
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
- quantization, reinterpret_cast<const char *>(right_data),
- right_size * sizeof(float));
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read LHS from Tensor #1
- // - Read RHS from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_H, 1)
-INT_VALUE(LEFT_W, 3)
-INT_VALUE(LEFT_C, 4)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_H, 2)
-INT_VALUE(RIGHT_W, 3)
-INT_VALUE(RIGHT_C, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_add_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "add_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- quantization.scale = 2.0f;
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- quantization.scale = 1.0f;
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteUInt8 /* type */, "right" /* name */,
- {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
- quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Left from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_ADD, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_avg_pool_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "avg_pool_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = KER_W_Value();
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = (IFM_H - KER_H) + 1;
- const int32_t OFM_W = (IFM_W - KER_W) + 1;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add Max Pooling Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLitePoolParams>();
-
- param->padding = kTfLitePaddingValid;
- param->stride_width = 1;
- param->stride_height = 1;
- param->filter_width = KER_W;
- param->filter_height = KER_H;
- param->activation = kTfLiteActNone;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_AVERAGE_POOL_2D, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_avg_pool_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "avg_pool_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = KER_W_Value();
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = (IFM_H - KER_H) + 1;
- const int32_t OFM_W = (IFM_W - KER_W) + 1;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
- quantization.scale = 1.0f;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add Max Pooling Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLitePoolParams>();
-
- param->padding = kTfLitePaddingValid;
- param->stride_width = 1;
- param->stride_height = 1;
- param->filter_width = KER_W;
- param->filter_height = KER_H;
- param->activation = kTfLiteActNone;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_AVERAGE_POOL_2D, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_cast_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "cast_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- TfLiteQuantizationParams quantization;
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add Cast Node
- // Run CAST and store the result into Tensor #0
- // - Read input from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1));
-
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_cast_2, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "cast_2.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input
- interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add Cast Node
- // Run CAST and store the result into Tensor #0
- // - Read input from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1));
-
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_cast_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "cast_q_to_f_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- TfLiteQuantizationParams quantization;
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add Cast Node
- // Run CAST and store the result into Tensor #0
- // - Read input from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_CAST, 1));
-
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_concat_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "concat_1.lst"
-#undef INT_VALUE
-
- // TODO Allow users to set concat axis!
- const int32_t CONCAT_COUNT = CONCAT_COUNT_Value();
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- int32_t OFM_C = 0;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(CONCAT_COUNT);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Randomize IFM depth
- std::default_random_engine generator(SEED);
- std::uniform_int_distribution<int> distribution(1, 8);
-
- std::vector<int32_t> depths;
-
- for (int32_t n = 0; n < CONCAT_COUNT; ++n)
- {
- const auto depth = distribution(generator);
-
- OFM_C += depth;
- depths.emplace_back(depth);
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(depths.size() + 1);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM(s)
- std::vector<int> ifm_indexes;
-
- for (uint32_t n = 0; n < depths.size(); ++n)
- {
- const auto ifm_index = 1 + n;
- const auto IFM_C = depths.at(n);
-
- interp.SetTensorParametersReadWrite(ifm_index, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- ifm_indexes.emplace_back(ifm_index);
- }
-
- // Add Concat Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteConcatenationParams>();
-
- param->activation = kTfLiteActNone;
- param->axis = 3;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters(ifm_indexes, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_CONCATENATION, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs(ifm_indexes);
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(CONCAT_COUNT, 3)
-
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_concat_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "concat_quan_1.lst"
-#undef INT_VALUE
-
- // TODO Allow users to set concat axis!
- const int32_t CONCAT_COUNT = CONCAT_COUNT_Value();
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- int32_t OFM_C = 0;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(CONCAT_COUNT);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Randomize IFM depth
- std::default_random_engine generator(SEED);
- std::uniform_int_distribution<int> distribution(1, 8);
-
- std::vector<int32_t> depths;
-
- for (int32_t n = 0; n < CONCAT_COUNT; ++n)
- {
- const auto depth = distribution(generator);
-
- OFM_C += depth;
- depths.emplace_back(depth);
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
- quantization.scale = 1.0f;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(depths.size() + 1);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM(s)
- std::vector<int> ifm_indexes;
-
- for (uint32_t n = 0; n < depths.size(); ++n)
- {
- const auto ifm_index = 1 + n;
- const auto IFM_C = depths.at(n);
-
- interp.SetTensorParametersReadWrite(ifm_index, kTfLiteUInt8 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- ifm_indexes.emplace_back(ifm_index);
- }
-
- // Add Concat Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteConcatenationParams>();
-
- param->activation = kTfLiteActNone;
- param->axis = 3;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters(ifm_indexes, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_CONCATENATION, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs(ifm_indexes);
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(CONCAT_COUNT, 3)
-
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_conv_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "conv_1.lst"
-#undef INT_VALUE
-
- const int32_t STRIDE_H = STRIDE_H_Value();
- const int32_t STRIDE_W = STRIDE_W_Value();
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_N = KER_N_Value();
- const int32_t KER_C = IFM_C_Value();
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = KER_W_Value();
-
- const int32_t OFM_C = KER_N;
- const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1;
- const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(STRIDE_H);
- PRINT_VALUE(STRIDE_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_N);
- PRINT_VALUE(KER_C);
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure Kernel Data
- const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W;
- float kernel_data[kernel_size] = {
- 0.0f,
- };
-
- // Fill kernel data with random data
- {
- std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < kernel_size; ++off)
- {
- kernel_data[off++] = kernel_dist(random);
- }
- }
-
- // Configure Bias Data
- const auto bias_size = KER_N;
- float bias_data[bias_size] = {
- 0.0f,
- };
-
- // Fill bias data with random data
- {
- std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < bias_size; ++off)
- {
- bias_data[off] = bias_dist(random);
- }
- }
-
- // Assumption on this example
- assert(IFM_C == KER_C);
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(5);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // NOTE kernel_data & bias_data should live longer than interpreter!
- interp.SetTensorParametersReadOnly(
- 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
- quantization, reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(float));
-
- interp.SetTensorParametersReadOnly(
- 3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
- reinterpret_cast<const char *>(bias_data), bias_size * sizeof(float));
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteConvParams>();
-
- param->padding = kTfLitePaddingValid;
- param->stride_width = STRIDE_W;
- param->stride_height = STRIDE_H;
- param->activation = kTfLiteActRelu;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- // - Read Filter from Tensor #2,
- // - Read Bias from Tensor #3
- interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_N, 1)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(STRIDE_H, 1)
-INT_VALUE(STRIDE_W, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_conv_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "conv_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t STRIDE_H = STRIDE_H_Value();
- const int32_t STRIDE_W = STRIDE_W_Value();
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_N = KER_N_Value();
- const int32_t KER_C = IFM_C_Value();
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = KER_W_Value();
-
- const int32_t OFM_C = KER_N;
- const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1;
- const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(STRIDE_H);
- PRINT_VALUE(STRIDE_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_N);
- PRINT_VALUE(KER_C);
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure Kernel Data
- const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W;
- float kernel_data[kernel_size] = {
- 0.0f,
- };
-
- // Fill kernel data with random data
- {
- std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < kernel_size; ++off)
- {
- kernel_data[off++] = kernel_dist(random);
- }
- }
-
- // Configure Bias Data
- const auto bias_size = KER_N;
- int32_t bias_data[bias_size] = {
- 0,
- };
-
- // Fill bias data with random data
- {
- std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < bias_size; ++off)
- {
- bias_data[off] = static_cast<int32_t>(bias_dist(random));
- }
- }
-
- // Assumption on this example
- assert(IFM_C == KER_C);
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- TfLiteQuantizationParams quantization;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(5);
-
- // Configure OFM
- float max_scale = (KER_N, KER_C * KER_H * KER_W) *
- std::numeric_limits<uint8_t>::max(); // * IFM_scale(1.0f) * kernel_scale(1.0f)
- quantization.scale = max_scale;
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- quantization.scale = 1.0f;
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // NOTE kernel_data & bias_data should live longer than interpreter!
- interp.SetTensorParametersReadOnly(
- 2, kTfLiteUInt8 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
- quantization, reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(uint8_t));
-
- quantization.scale *= quantization.scale;
- interp.SetTensorParametersReadOnly(
- 3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
- reinterpret_cast<const char *>(bias_data), bias_size * sizeof(int32_t));
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteConvParams>();
-
- param->padding = kTfLitePaddingValid;
- param->stride_width = STRIDE_W;
- param->stride_height = STRIDE_H;
- param->activation = kTfLiteActRelu;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- // - Read Filter from Tensor #2,
- // - Read Bias from Tensor #3
- interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_N, 1)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(STRIDE_H, 1)
-INT_VALUE(STRIDE_W, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_dconv_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "dconv_1.lst"
-#undef INT_VALUE
-
- const int32_t STRIDE_H = STRIDE_H_Value();
- const int32_t STRIDE_W = STRIDE_W_Value();
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_C = KER_C_Value();
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = KER_W_Value();
-
- const int32_t OFM_C = KER_C;
- const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1;
- const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1;
-
- const int32_t MULTIPLIER = MULTIPLIER_Value();
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_C);
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(STRIDE_H);
- PRINT_VALUE(STRIDE_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(MULTIPLIER);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- assert(MULTIPLIER * IFM_C == KER_C);
-
- // Configure Kernel Data
- const uint32_t kernel_size = KER_C * KER_H * KER_W;
- float kernel_data[kernel_size] = {
- 0.0f,
- };
-
- // Fill kernel data with random data
- {
- std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < kernel_size; ++off)
- {
- kernel_data[off] = kernel_dist(random);
- }
- }
-
- // Configure Bias Data
- const auto bias_size = KER_C;
- float bias_data[bias_size] = {
- 0.0f,
- };
-
- // Fill bias data with random data
- {
- std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < bias_size; ++off)
- {
- bias_data[off] = bias_dist(random);
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(4);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // NOTE kernel_data & bias_data should live longer than interpreter!
- interp.SetTensorParametersReadOnly(
- 2, kTfLiteFloat32 /* type */, "filter" /* name */, {1, KER_H, KER_W, KER_C} /* dims */,
- quantization, reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(float));
-
- interp.SetTensorParametersReadOnly(
- 3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
- reinterpret_cast<const char *>(bias_data), bias_size * sizeof(float));
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteDepthwiseConvParams>();
-
- param->padding = kTfLitePaddingValid;
- param->stride_width = STRIDE_W;
- param->stride_height = STRIDE_H;
- param->depth_multiplier = MULTIPLIER;
- param->activation = kTfLiteActRelu;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- // - Read Filter from Tensor #2,
- // - Read Bias from Tensor #3
- interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_DEPTHWISE_CONV_2D, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_C, 2)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(MULTIPLIER, 1)
-
-INT_VALUE(STRIDE_H, 1)
-INT_VALUE(STRIDE_W, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_dconv_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "dconv_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t STRIDE_H = STRIDE_H_Value();
- const int32_t STRIDE_W = STRIDE_W_Value();
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_C = KER_C_Value();
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = KER_W_Value();
-
- const int32_t OFM_C = KER_C;
- const int32_t OFM_H = (IFM_H - KER_H) / STRIDE_H + 1;
- const int32_t OFM_W = (IFM_W - KER_W) / STRIDE_W + 1;
-
- const int32_t MULTIPLIER = MULTIPLIER_Value();
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_C);
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(STRIDE_H);
- PRINT_VALUE(STRIDE_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(MULTIPLIER);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- assert(MULTIPLIER * IFM_C == KER_C);
-
- // Configure Kernel Data
- const uint32_t kernel_size = KER_C * KER_H * KER_W;
- float kernel_data[kernel_size] = {
- 0.0f,
- };
-
- // Fill kernel data with random data
- {
- std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < kernel_size; ++off)
- {
- kernel_data[off] = kernel_dist(random);
- }
- }
-
- // Configure Bias Data
- const auto bias_size = KER_C;
- int32_t bias_data[bias_size] = {
- 0,
- };
-
- // Fill bias data with random data
- {
- std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < bias_size; ++off)
- {
- bias_data[off] = static_cast<int32_t>(bias_dist(random));
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- TfLiteQuantizationParams quantization;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(4);
-
- // Configure OFM
- float max_scale = (1 * KER_C * KER_H * KER_W) *
- std::numeric_limits<uint8_t>::max(); // * IFM_scale(1.0f) * kernel_scale(1.0f)
- quantization.scale = max_scale;
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- quantization.scale = 1.0f;
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // NOTE kernel_data & bias_data should live longer than interpreter!
- interp.SetTensorParametersReadOnly(
- 2, kTfLiteUInt8 /* type */, "filter" /* name */, {1, KER_H, KER_W, KER_C} /* dims */,
- quantization, reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(uint8_t));
-
- quantization.scale *= quantization.scale;
- interp.SetTensorParametersReadOnly(
- 3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
- reinterpret_cast<const char *>(bias_data), bias_size * sizeof(int32_t));
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteDepthwiseConvParams>();
-
- param->padding = kTfLitePaddingValid;
- param->stride_width = STRIDE_W;
- param->stride_height = STRIDE_H;
- param->depth_multiplier = MULTIPLIER;
- param->activation = kTfLiteActRelu;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- // - Read Filter from Tensor #2,
- // - Read Bias from Tensor #3
- interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_DEPTHWISE_CONV_2D, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_C, 2)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(MULTIPLIER, 1)
-
-INT_VALUE(STRIDE_H, 1)
-INT_VALUE(STRIDE_W, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_dequantize_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "dequantize_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- TfLiteQuantizationParams quantization;
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add DEQUANTIZE Node
- // Run DEQUANTIZE and store the result into Tensor #0
- // - Read input from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_DEQUANTIZE, 1));
-
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_div_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "div_1.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
- quantization);
-
- // Add Division Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Div and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_DIV, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_div_2, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "div_2.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT = RIGHT_Value();
-
- const int32_t OFM_N = LEFT_N;
- const int32_t OFM_C = LEFT_C;
- const int32_t OFM_H = LEFT_H;
- const int32_t OFM_W = LEFT_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT} /* dims */, quantization);
-
- // Add Division Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Div and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_DIV, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-template <typename T> T *make_malloc(void) { return reinterpret_cast<T *>(malloc(sizeof(T))); }
-
-TEST(NNAPI_Quickcheck_fully_connected_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "conv_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_H = KER_N_Value();
- const int32_t KER_W = IFM_C_Value() * IFM_H_Value() * IFM_W_Value();
-
- const int32_t OUT_LEN = KER_H;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OUT_LEN);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure Kernel Data
- const uint32_t kernel_size = KER_H * KER_W;
- float kernel_data[kernel_size] = {
- 0.0f,
- };
-
- // Fill kernel data with random data
- {
- std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < kernel_size; ++off)
- {
- kernel_data[off++] = kernel_dist(random);
- }
- }
-
- // Configure Bias Data
- const auto bias_size = KER_H;
- float bias_data[bias_size] = {
- 0.0f,
- };
-
- // Fill bias data with random data
- {
- std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < bias_size; ++off)
- {
- bias_data[off] = bias_dist(random);
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(4);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, KER_H} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // NOTE kernel_data & bias_data should live longer than interpreter!
- interp.SetTensorParametersReadOnly(
- 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_H, KER_W} /* dims */, quantization,
- reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(float));
-
- interp.SetTensorParametersReadOnly(
- 3, kTfLiteFloat32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
- reinterpret_cast<const char *>(bias_data), bias_size * sizeof(float));
-
- // Add Fully Connected Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_malloc<TfLiteFullyConnectedParams>();
-
- param->activation = kTfLiteActRelu;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- // - Read Filter from Tensor #2,
- // - Read Bias from Tensor #3
- interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_FULLY_CONNECTED, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_H, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-template <typename T> T *make_malloc(void) { return reinterpret_cast<T *>(malloc(sizeof(T))); }
-
-TEST(NNAPI_Quickcheck_fully_connected_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "fully_connected_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = IFM_C_Value() * IFM_H_Value() * IFM_W_Value();
-
- const int32_t OUT_LEN = KER_H;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OUT_LEN);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure Kernel Data
- const uint32_t kernel_size = KER_H * KER_W;
- float kernel_data[kernel_size] = {
- 0.0f,
- };
-
- // Fill kernel data with random data
- {
- std::normal_distribution<float> kernel_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < kernel_size; ++off)
- {
- kernel_data[off++] = kernel_dist(random);
- }
- }
-
- // Configure Bias Data
- const auto bias_size = KER_H;
- int32_t bias_data[bias_size] = {
- 0,
- };
-
- // Fill bias data with random data
- {
- std::normal_distribution<float> bias_dist(-1.0f, +1.0f);
-
- for (uint32_t off = 0; off < bias_size; ++off)
- {
- bias_data[off] = static_cast<int32_t>(bias_dist(random));
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
- quantization.scale = FLOAT_NEAREST_TO_1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(4);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {1 /*N*/, KER_H} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // NOTE kernel_data & bias_data should live longer than interpreter!
- interp.SetTensorParametersReadOnly(
- 2, kTfLiteUInt8 /* type */, "filter" /* name */, {KER_H, KER_W} /* dims */, quantization,
- reinterpret_cast<const char *>(kernel_data), kernel_size * sizeof(uint8_t));
-
- interp.SetTensorParametersReadOnly(
- 3, kTfLiteInt32 /* type */, "bias" /* name */, {bias_size} /* dims */, quantization,
- reinterpret_cast<const char *>(bias_data), bias_size * sizeof(int32_t));
-
- // Add Fully Connected Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_malloc<TfLiteFullyConnectedParams>();
-
- param->activation = kTfLiteActRelu;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- // - Read Filter from Tensor #2,
- // - Read Bias from Tensor #3
- interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_FULLY_CONNECTED, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_H, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_gather_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "gather_1.lst"
-#undef INT_VALUE
-
- const int32_t INPUT_DATA = INPUT_DATA_Value();
- const int32_t INDEX_DATA = INDEX_DATA_Value();
-
- const int32_t OUTPUT_DATA = INDEX_DATA;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(INPUT_DATA);
- PRINT_VALUE(INDEX_DATA);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OUTPUT_DATA);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure INPUT_DATA
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */,
- {INPUT_DATA} /* dims */, quantization);
-
- // Configure INDEX_DATA
- interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "index" /* name */,
- {INDEX_DATA} /* dims */, quantization);
-
- // Configure OUTPUT_VALUES
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_data" /* name */,
- {OUTPUT_DATA} /* dims */, quantization);
-
- auto *param = reinterpret_cast<TfLiteGatherParams *>(malloc(sizeof(TfLiteGatherParams)));
-
- param->axis = 0;
-
- // Add GATHER Node
- // Run GATHER and store its result into Tensor #2
- // - Read input data and index_data from Tensor #0 and #1, respectively
- interp.AddNodeWithParameters({0, 1}, {2}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_GATHER, 1));
-
- // Set Tensor #0 and #1 as Input, and Tensor #2 as Output
- interp.SetInputs({0, 1});
- interp.SetOutputs({2});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(INPUT_DATA, 8192)
-INT_VALUE(INDEX_DATA, 300)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_gather_2, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "gather_2.lst"
-#undef INT_VALUE
-
- const int32_t INPUT_DATA_H = INPUT_DATA_H_Value();
- const int32_t INPUT_DATA_W = INPUT_DATA_W_Value();
- const int32_t INDEX_DATA = INDEX_DATA_Value();
-
- const int32_t OUTPUT_DATA_H = INPUT_DATA_H;
- const int32_t OUTPUT_DATA_W = INDEX_DATA;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(INPUT_DATA_H);
- PRINT_VALUE(INPUT_DATA_W);
- PRINT_VALUE(INDEX_DATA);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OUTPUT_DATA_H);
- PRINT_VALUE(OUTPUT_DATA_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure INPUT_DATA
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */,
- {INPUT_DATA_H, INPUT_DATA_W} /* dims */, quantization);
-
- // Configure INDEX_DATA
- interp.SetTensorParametersReadWrite(1, kTfLiteInt32 /* type */, "index" /* name */,
- {INDEX_DATA} /* dims */, quantization);
-
- // Configure OUTPUT_VALUES
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_data" /* name */,
- {OUTPUT_DATA_H, OUTPUT_DATA_W} /* dims */, quantization);
-
- auto *param = reinterpret_cast<TfLiteGatherParams *>(malloc(sizeof(TfLiteGatherParams)));
-
- param->axis = 0;
-
- // Add GATHER Node
- // Run GATHER and store its result into Tensor #2
- // - Read input data and index_data from Tensor #0 and #1, respectively
- interp.AddNodeWithParameters({0, 1}, {2}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_GATHER, 1));
-
- // Set Tensor #0 and #1 as Input, and Tensor #2 as Output
- interp.SetInputs({0, 1});
- interp.SetOutputs({2});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(INPUT_DATA_H, 128192)
-INT_VALUE(INPUT_DATA_W, 4)
-INT_VALUE(INDEX_DATA, 300)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_logistic_quan_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "logistic_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- TfLiteQuantizationParams in_quantization;
- in_quantization.scale = 0.5f;
- in_quantization.zero_point = 0;
-
- TfLiteQuantizationParams out_quantization;
- out_quantization.scale = 1.f / 256;
- out_quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, out_quantization);
-
- // Configure input
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, in_quantization);
-
- // Add Logistic Node
- // Run Logistic and store the result into Tensor #0
- // - Read input from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_LOGISTIC, 1));
-
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 1)
-INT_VALUE(IFM_H, 2)
-INT_VALUE(IFM_W, 2)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_max_pool_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "max_pool_1.lst"
-#undef INT_VALUE
-
- const TfLitePadding PADDING_TYPE = static_cast<TfLitePadding>(PADDING_TYPE_Value());
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = KER_W_Value();
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = OFM_H_Value();
- const int32_t OFM_W = OFM_W_Value();
-
- assert((OFM_H >= (IFM_H - KER_H)));
- assert((OFM_W >= (IFM_W - KER_W)));
- assert((kTfLitePaddingSame == PADDING_TYPE) || (kTfLitePaddingValid == PADDING_TYPE));
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(PADDING_TYPE);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add Max Pooling Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLitePoolParams>();
-
- param->padding = PADDING_TYPE;
- param->stride_width = 1;
- param->stride_height = 1;
- param->filter_width = KER_W;
- param->filter_height = KER_H;
- param->activation = kTfLiteActNone;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_MAX_POOL_2D, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_N, 1)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(OFM_H, 1)
-INT_VALUE(OFM_W, 1)
-
-// Default is kTfLitePaddingValid (= 2)
-INT_VALUE(PADDING_TYPE, 2)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_max_pool_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "max_pool_quan_1.lst"
-#undef INT_VALUE
-
- const TfLitePadding PADDING_TYPE = static_cast<TfLitePadding>(PADDING_TYPE_Value());
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t KER_H = KER_H_Value();
- const int32_t KER_W = KER_W_Value();
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = OFM_H_Value();
- const int32_t OFM_W = OFM_W_Value();
-
- assert((OFM_H >= (IFM_H - KER_H)));
- assert((OFM_W >= (IFM_W - KER_W)));
- assert((kTfLitePaddingSame == PADDING_TYPE) || (kTfLitePaddingValid == PADDING_TYPE));
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(PADDING_TYPE);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(KER_H);
- PRINT_VALUE(KER_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
- quantization.scale = 1.0f;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add Max Pooling Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLitePoolParams>();
-
- param->padding = PADDING_TYPE;
- param->stride_width = 1;
- param->stride_height = 1;
- param->filter_width = KER_W;
- param->filter_height = KER_H;
- param->activation = kTfLiteActNone;
-
- // Run Convolution and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_MAX_POOL_2D, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(KER_N, 1)
-INT_VALUE(KER_H, 3)
-INT_VALUE(KER_W, 4)
-
-INT_VALUE(OFM_H, 1)
-INT_VALUE(OFM_W, 1)
-
-// Default is kTfLitePaddingValid (= 2)
-INT_VALUE(PADDING_TYPE, 2)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_mul_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "mul_1.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_1D = LEFT_1D_Value();
- const int32_t LEFT_2D = LEFT_2D_Value();
- const int32_t LEFT_3D = LEFT_3D_Value();
-
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_1D = LEFT_1D_Value();
- const int32_t OFM_2D = LEFT_2D_Value();
- const int32_t OFM_3D = LEFT_3D_Value();
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_1D);
- PRINT_VALUE(LEFT_2D);
- PRINT_VALUE(LEFT_3D);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_1D);
- PRINT_VALUE(OFM_2D);
- PRINT_VALUE(OFM_3D);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_1D, OFM_2D, OFM_3D} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_1D, LEFT_2D, LEFT_3D} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_W} /* dims */, quantization);
-
- // Add MUL Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run MUL and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
- param.tensor_logging = 1;
- param.log_path = "report/tensor_mul_1.log";
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-// (3, 1, 4)
-INT_VALUE(LEFT_1D, 3)
-INT_VALUE(LEFT_2D, 1)
-INT_VALUE(LEFT_3D, 4)
-
-INT_VALUE(RIGHT_W, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_mul_2, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "mul_2.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_D1 = LEFT_D1_Value();
- const int32_t LEFT_D2 = LEFT_D2_Value();
- const int32_t LEFT_D3 = LEFT_D3_Value();
-
- const int32_t RIGHT_D1 = RIGHT_D1_Value();
-
- const int32_t OFM_D1 = LEFT_D1;
- const int32_t OFM_D2 = LEFT_D2;
- const int32_t OFM_D3 = LEFT_D3;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_D1);
- PRINT_VALUE(LEFT_D2);
- PRINT_VALUE(LEFT_D3);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_D1);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_D1);
- PRINT_VALUE(OFM_D2);
- PRINT_VALUE(OFM_D3);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
-
- quantization.scale = 1;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_D1, OFM_D2, OFM_D3} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_D1, LEFT_D2, LEFT_D3} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_D1} /* dims */, quantization);
-
- // Add Convolution Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Add and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Left from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_D1, 5)
-INT_VALUE(LEFT_D2, 3)
-INT_VALUE(LEFT_D3, 12)
-
-INT_VALUE(RIGHT_D1, 12)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_mul_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "mul_1.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_1D = LEFT_1D_Value();
- const int32_t LEFT_2D = LEFT_2D_Value();
- const int32_t LEFT_3D = LEFT_3D_Value();
-
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_1D = LEFT_1D_Value();
- const int32_t OFM_2D = LEFT_2D_Value();
- const int32_t OFM_3D = LEFT_3D_Value();
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_1D);
- PRINT_VALUE(LEFT_2D);
- PRINT_VALUE(LEFT_3D);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_1D);
- PRINT_VALUE(OFM_2D);
- PRINT_VALUE(OFM_3D);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- float max_scale =
- std::numeric_limits<uint8_t>::max(); // * input1_scale(1.0f) * input2_scale(1.0f)
- quantization.scale = max_scale;
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {OFM_1D, OFM_2D, OFM_3D} /* dims */, quantization);
-
- // Configure input(s)
- quantization.scale = 1.0f;
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "left" /* name */,
- {LEFT_1D, LEFT_2D, LEFT_3D} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteUInt8 /* type */, "right" /* name */,
- {RIGHT_W} /* dims */, quantization);
-
- // Add MUL Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run MUL and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_MUL, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-// (300, 1, 4)
-INT_VALUE(LEFT_1D, 300)
-INT_VALUE(LEFT_2D, 1)
-INT_VALUE(LEFT_3D, 4)
-
-INT_VALUE(RIGHT_W, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-int main(int argc, char **argv)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu1_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_H, IFM_W} /* dims */, quantization);
-
- // Add ReLU Node
- // Run ReLU and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_RELU_N1_TO_1, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- return RandomTestRunner{SEED, param}.run(builder);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_relu6_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu6_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_H, IFM_W} /* dims */, quantization);
-
- // Add ReLU Node
- // Run ReLU and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_RELU6, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-int main(int argc, char **argv)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu6_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
- quantization.scale = 1.0f;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {IFM_H, IFM_W} /* dims */, quantization);
-
- // Add ReLU Node
- // Run ReLU and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_RELU6, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- return RandomTestRunner{SEED, param}.run(builder);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_relu_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_H, IFM_W} /* dims */, quantization);
-
- // Add ReLU Node
- // Run ReLU and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_relu_2, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu_2.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add ReLU Node
- // Run ReLU and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
-INT_VALUE(IFM_C, 3)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_relu_3, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu_3.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add ReLU Node
- // Run ReLU and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_N, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-int main(int argc, char **argv)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "relu_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
- quantization.scale = 1.0f;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {IFM_H, IFM_W} /* dims */, quantization);
-
- // Add ReLU Node
- // Run ReLU and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_RELU, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- return RandomTestRunner{SEED, param}.run(builder);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 16)
-INT_VALUE(IFM_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_reshape_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "max_pool_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OUT_L = IFM_C * IFM_H * IFM_W;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OUT_L);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t dims[2] = {1, OUT_L};
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and
- // TENSOR_QUANT8_ASYMM are passed on to the runtime.
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, OUT_L} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Configure Shape
- interp.SetTensorParametersReadOnly(2, kTfLiteInt32 /* type */, "shape" /* name */,
- {2} /* dims */, quantization,
- reinterpret_cast<const char *>(dims), 2 * sizeof(int32_t));
-
- // Add Reshape Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteReshapeParams>();
-
- param->num_dimensions = 2;
- param->shape[0] = 1;
- param->shape[1] = OUT_L;
-
- // Run Reshapeand store its result into Tensor #0
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_RESHAPE, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 4)
-INT_VALUE(IFM_W, 8)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_reshape_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "reshape_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OUT_L = IFM_C * IFM_H * IFM_W;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OUT_L);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t dims[2] = {1, OUT_L};
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and
- // TENSOR_QUANT8_ASYMM are passed on to the runtime.
- TfLiteQuantizationParams quantization;
- quantization.scale = 1.0f;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {1 /*N*/, OUT_L} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Configure Shape
- interp.SetTensorParametersReadOnly(2, kTfLiteInt32 /* type */, "shape" /* name */,
- {2} /* dims */, quantization,
- reinterpret_cast<const char *>(dims), 2 * sizeof(int32_t));
-
- // Add Reshape Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteReshapeParams>();
-
- param->num_dimensions = 2;
- param->shape[0] = 1;
- param->shape[1] = OUT_L;
-
- // Run Reshapeand store its result into Tensor #0
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_RESHAPE, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 4)
-INT_VALUE(IFM_W, 8)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_resize_bilinear_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "resize_bilinear_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = OFM_H_Value();
- const int32_t OFM_W = OFM_W_Value();
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- int32_t size_data[2] = {OFM_H, OFM_W};
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and
- // TENSOR_QUANT8_ASYMM are passed on to the runtime.
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure OFM
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure IFM
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Configure Size
- interp.SetTensorParametersReadOnly(
- 2, kTfLiteInt32 /* type */, "size" /* name */, {2} /* dims */, quantization,
- reinterpret_cast<const char *>(size_data), 2 * sizeof(int32_t));
-
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteResizeBilinearParams>();
-
- // NOTE What is this?
- param->align_corners = false;
-
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_RESIZE_BILINEAR, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_C, 2)
-INT_VALUE(IFM_H, 3)
-INT_VALUE(IFM_W, 4)
-
-INT_VALUE(OFM_H, 30)
-INT_VALUE(OFM_W, 40)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_softmax_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "softmax_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = 1;
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W};
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1, IFM_H * IFM_W} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1, IFM_H * IFM_W} /* batch_size, input_size */,
- quantization);
-
- // Add Softmax Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteSoftmaxParams>();
-
- param->beta = 1.0;
-
- // Run Softmax and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 2)
-INT_VALUE(IFM_W, 2)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_softmax_2, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define FLOAT_VALUE(NAME, VALUE) FloatVar NAME##_Value(#NAME, VALUE);
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "softmax_2.lst"
-#undef INT_VALUE
-#undef FLOAT_VALUE
-
- const int32_t IFM_C = 1;
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
- const float BETA = BETA_Value();
-
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_VALUE(BETA);
- PRINT_NEWLINE();
-
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W};
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {1, IFM_H * IFM_W} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {1, IFM_H * IFM_W} /* batch_size, input_size */,
- quantization);
-
- // Add Softmax Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteSoftmaxParams>();
-
- param->beta = BETA;
-
- // Run Softmax and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-#ifndef FLOAT_VALUE
-#error "FLOAT_VALUE should be defined"
-#endif // FLOAT_VALUE
-
-INT_VALUE(IFM_H, 2)
-INT_VALUE(IFM_W, 2)
-FLOAT_VALUE(BETA, 0.1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_softmax_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "softmax_quan_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_C = 1;
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- const nnfw::misc::feature::Shape ifm_shape{IFM_C, IFM_H, IFM_W};
-
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization;
- quantization.scale = 1.0f / 256;
- quantization.zero_point = 0;
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure Output Tensor
- interp.SetTensorParametersReadWrite(0, kTfLiteUInt8 /* type */, "output" /* name */,
- {1, IFM_H * IFM_W} /* dims */, quantization);
-
- // Configure Input Tensor
- interp.SetTensorParametersReadWrite(1, kTfLiteUInt8 /* type */, "input" /* name */,
- {1, IFM_H * IFM_W} /* batch_size, input_size */,
- quantization);
-
- // Add Softmax Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteSoftmaxParams>();
-
- param->beta = 1.0;
-
- // Run Softmax and store its result into Tensor #0
- // - Read IFM from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SOFTMAX, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 2)
-INT_VALUE(IFM_W, 2)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_split_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "split_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
- const int32_t NUM_SPLIT = NUM_SPLIT_Value();
- const int32_t AXIS = AXIS_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_VALUE(NUM_SPLIT);
- PRINT_VALUE(AXIS);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
- const int32_t axis[1] = {AXIS};
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(NUM_SPLIT + 2);
-
- // Configure Input Tensor(s)
- interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */,
- {1} /* dims */, quantization,
- reinterpret_cast<const char *>(axis), 1 * sizeof(int32_t));
-
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Configure Output Tensor
- std::vector<int> ofm_indexes;
-
- for (uint32_t n = 0; n < NUM_SPLIT; ++n)
- {
- const auto ofm_index = 2 + n;
-
- interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- ofm_indexes.emplace_back(ofm_index);
- }
-
- auto *param = reinterpret_cast<TfLiteSplitParams *>(malloc(sizeof(TfLiteSplitParams)));
-
- param->num_splits = NUM_SPLIT;
-
- // Add SPLIT Node
- // Run SPLIT and store its result into Tensor #0
- // - Read axis and IFM from Tensor #0 and #1, respectively
- interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs(ofm_indexes);
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 1)
-INT_VALUE(IFM_H, 5)
-INT_VALUE(IFM_W, 30)
-INT_VALUE(NUM_SPLIT, 5)
-INT_VALUE(AXIS, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_split_2, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "split_2.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
- const int32_t NUM_SPLIT = NUM_SPLIT_Value();
- const int32_t AXIS = AXIS_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_VALUE(NUM_SPLIT);
- PRINT_VALUE(AXIS);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
- const int32_t axis[1] = {AXIS};
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(NUM_SPLIT + 2);
-
- // Configure Input Tensor(s)
- interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */,
- {1} /* dims */, quantization,
- reinterpret_cast<const char *>(axis), 1 * sizeof(int32_t));
-
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Configure Output Tensor
- std::vector<int> ofm_indexes;
-
- for (uint32_t n = 0; n < NUM_SPLIT; ++n)
- {
- const auto ofm_index = 2 + n;
-
- interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- ofm_indexes.emplace_back(ofm_index);
- }
-
- auto *param = reinterpret_cast<TfLiteSplitParams *>(malloc(sizeof(TfLiteSplitParams)));
-
- param->num_splits = NUM_SPLIT;
-
- // Add SPLIT Node
- // Run SPLIT and store its result into Tensor #0
- // - Read axis and IFM from Tensor #0 and #1, respectively
- interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs(ofm_indexes);
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 1)
-INT_VALUE(IFM_H, 5)
-INT_VALUE(IFM_W, 30)
-INT_VALUE(NUM_SPLIT, 3)
-INT_VALUE(AXIS, 2)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_split_3, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "split_3.lst"
-#undef INT_VALUE
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
- const int32_t NUM_SPLIT = NUM_SPLIT_Value();
- const int32_t AXIS = AXIS_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_VALUE(NUM_SPLIT);
- PRINT_VALUE(AXIS);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
- const int32_t axis[1] = {AXIS};
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(NUM_SPLIT + 2);
-
- // Configure Input Tensor(s)
- interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */,
- {1} /* dims */, quantization,
- reinterpret_cast<const char *>(axis), 1 * sizeof(int32_t));
-
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_H, IFM_W} /* dims */, quantization);
-
- // Configure Output Tensor
- std::vector<int> ofm_indexes;
-
- for (uint32_t n = 0; n < NUM_SPLIT; ++n)
- {
- const auto ofm_index = 2 + n;
-
- interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- ofm_indexes.emplace_back(ofm_index);
- }
-
- auto *param = reinterpret_cast<TfLiteSplitParams *>(malloc(sizeof(TfLiteSplitParams)));
-
- param->num_splits = NUM_SPLIT;
-
- // Add SPLIT Node
- // Run SPLIT and store its result into Tensor #0
- // - Read axis and IFM from Tensor #0 and #1, respectively
- interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs(ofm_indexes);
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 5)
-INT_VALUE(IFM_W, 30)
-INT_VALUE(NUM_SPLIT, 3)
-INT_VALUE(AXIS, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-#include "misc/feature/Shape.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <random>
-#include <iostream>
-#include <cassert>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_split_4, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "split_4.lst"
-#undef INT_VALUE
-
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
- const int32_t NUM_SPLIT = NUM_SPLIT_Value();
- const int32_t AXIS = AXIS_Value();
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_VALUE(NUM_SPLIT);
- PRINT_VALUE(AXIS);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
- const int32_t axis[1] = {AXIS};
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(NUM_SPLIT + 2);
-
- // Configure Input Tensor(s)
- interp.SetTensorParametersReadOnly(0, kTfLiteInt32 /* type */, "axis" /* name */,
- {1} /* dims */, quantization,
- reinterpret_cast<const char *>(axis), 1 * sizeof(int32_t));
-
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_H, IFM_W} /* dims */, quantization);
-
- // Configure Output Tensor
- std::vector<int> ofm_indexes;
-
- for (uint32_t n = 0; n < NUM_SPLIT; ++n)
- {
- const auto ofm_index = 2 + n;
-
- interp.SetTensorParametersReadWrite(ofm_index, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- ofm_indexes.emplace_back(ofm_index);
- }
-
- auto *param = reinterpret_cast<TfLiteSplitParams *>(malloc(sizeof(TfLiteSplitParams)));
-
- param->num_splits = NUM_SPLIT;
-
- // Add SPLIT Node
- // Run SPLIT and store its result into Tensor #0
- // - Read axis and IFM from Tensor #0 and #1, respectively
- interp.AddNodeWithParameters({0, 1}, ofm_indexes, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SPLIT, 1));
-
- // Set Tensor #1 as Input #0, and Tensor #2 ~ #NUM_SPLIT+1 as Output #0
- interp.SetInputs({1});
- interp.SetOutputs(ofm_indexes);
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_H, 5)
-INT_VALUE(IFM_W, 30)
-INT_VALUE(NUM_SPLIT, 5)
-INT_VALUE(AXIS, 0)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_1.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
- quantization);
-
- // Add Subtraction Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Sub and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 16)
-INT_VALUE(RIGHT_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_2, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_2.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT = RIGHT_Value();
-
- const int32_t OFM_N = LEFT_N;
- const int32_t OFM_C = LEFT_C;
- const int32_t OFM_H = LEFT_H;
- const int32_t OFM_W = LEFT_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT} /* dims */, quantization);
-
- // Add Subtraction Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Sub and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 16)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_3, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_3.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT = RIGHT_Value();
-
- const int32_t OFM_H = LEFT_H;
- const int32_t OFM_W = LEFT_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_H, LEFT_W} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT, LEFT_W} /* dims */, quantization);
-
- // Add Subtraction Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Sub and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT, 1)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_4, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_1.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_C = LEFT_C_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
-
- const int32_t RIGHT_C = RIGHT_C_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
-
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_C);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_C);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization);
-
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_H, RIGHT_W, RIGHT_C} /* dims */, quantization);
-
- // Add Subtraction Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Sub and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
- interp.SetInputs({1, 2});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_C, 3)
-INT_VALUE(LEFT_H, 8)
-INT_VALUE(LEFT_W, 16)
-
-INT_VALUE(RIGHT_C, 3)
-INT_VALUE(RIGHT_H, 1)
-INT_VALUE(RIGHT_W, 16)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_5, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_5.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
- const int32_t LEFT_C = LEFT_C_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_VALUE(LEFT_C);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_VALUE(RIGHT_C);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
- PRINT_VALUE(OFM_C);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure left data
- const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
- const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W;
- float left_data[left_size] = {
- 0.0f,
- };
- float right_data[right_size] = {
- 0.0f,
- };
-
- // Fill left data with random data
- {
- std::normal_distribution<float> left_dist(-1.0f, +1.0f);
- float value = 10.0f;
- for (uint32_t off = 0; off < left_size; ++off)
- {
- left_data[off] = value;
- }
- value = 1.0f;
- for (uint32_t off = 0; off < right_size; ++off)
- {
- right_data[off] = value++;
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_N, LEFT_H, LEFT_W, LEFT_C} /* dims */, quantization,
- reinterpret_cast<const char *>(left_data),
- left_size * sizeof(float));
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_W, RIGHT_C} /* dims: test with other shapes */,
- quantization, reinterpret_cast<const char *>(right_data),
- right_size * sizeof(float));
-
- // Add Subtraction Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Sub and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
- interp.SetInputs({});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_H, 2)
-INT_VALUE(LEFT_W, 3)
-INT_VALUE(LEFT_C, 4)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_H, 1)
-INT_VALUE(RIGHT_W, 3)
-INT_VALUE(RIGHT_C, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_sub_6, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "sub_6.lst"
-#undef INT_VALUE
-
- const int32_t LEFT_N = LEFT_N_Value();
- const int32_t LEFT_H = LEFT_H_Value();
- const int32_t LEFT_W = LEFT_W_Value();
- const int32_t LEFT_C = LEFT_C_Value();
-
- const int32_t RIGHT_N = RIGHT_N_Value();
- const int32_t RIGHT_H = RIGHT_H_Value();
- const int32_t RIGHT_W = RIGHT_W_Value();
- const int32_t RIGHT_C = RIGHT_C_Value();
-
- const int32_t OFM_N = std::max(LEFT_N, RIGHT_N);
- const int32_t OFM_H = std::max(LEFT_H, RIGHT_H);
- const int32_t OFM_W = std::max(LEFT_W, RIGHT_W);
- const int32_t OFM_C = std::max(LEFT_C, RIGHT_C);
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(LEFT_N);
- PRINT_VALUE(LEFT_H);
- PRINT_VALUE(LEFT_W);
- PRINT_VALUE(LEFT_C);
- PRINT_NEWLINE();
-
- PRINT_VALUE(RIGHT_N);
- PRINT_VALUE(RIGHT_H);
- PRINT_VALUE(RIGHT_W);
- PRINT_VALUE(RIGHT_C);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
- PRINT_VALUE(OFM_C);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Configure left data
- const uint32_t left_size = LEFT_N * LEFT_C * LEFT_H * LEFT_W;
- const uint32_t right_size = RIGHT_N * RIGHT_C * RIGHT_H * RIGHT_W;
- float left_data[left_size] = {
- 0.0f,
- };
- float right_data[right_size] = {
- 0.0f,
- };
-
- // Fill left data with random data
- {
- std::normal_distribution<float> left_dist(-1.0f, +1.0f);
- float value = 10.0f;
- for (uint32_t off = 0; off < left_size; ++off)
- {
- left_data[off] = value;
- }
- value = 1.0f;
- for (uint32_t off = 0; off < right_size; ++off)
- {
- right_data[off] = value++;
- }
- }
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(3);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(1, kTfLiteFloat32 /* type */, "left" /* name */,
- {LEFT_W, LEFT_C} /* dims: test with other shapes */,
- quantization, reinterpret_cast<const char *>(left_data),
- left_size * sizeof(float));
-
- // Configure input(s)
- interp.SetTensorParametersReadOnly(2, kTfLiteFloat32 /* type */, "right" /* name */,
- {RIGHT_N, RIGHT_H, RIGHT_W, RIGHT_C} /* dims */,
- quantization, reinterpret_cast<const char *>(right_data),
- right_size * sizeof(float));
-
- // Add Subtraction Node
- //
- // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
- // So, param should be allocated with malloc
- auto param = make_alloc<TfLiteAddParams>();
-
- param->activation = kTfLiteActNone;
-
- // Run Sub and store the result into Tensor #0
- // - Read Left from Tensor #1
- // - Read Right from Tensor #2,
- interp.AddNodeWithParameters({1, 2}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
- BuiltinOpResolver().FindOp(BuiltinOperator_SUB, 1));
-
- interp.SetInputs({});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(LEFT_N, 1)
-INT_VALUE(LEFT_H, 1)
-INT_VALUE(LEFT_W, 3)
-INT_VALUE(LEFT_C, 4)
-
-INT_VALUE(RIGHT_N, 1)
-INT_VALUE(RIGHT_H, 2)
-INT_VALUE(RIGHT_W, 3)
-INT_VALUE(RIGHT_C, 4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <iostream>
-#include <cassert>
-
-#include <chrono>
-#include <random>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_tanh_1, simple_test)
-{
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "tanh_1.lst"
-#undef INT_VALUE
-
- const int32_t IFM_N = IFM_N_Value();
- const int32_t IFM_C = IFM_C_Value();
- const int32_t IFM_H = IFM_H_Value();
- const int32_t IFM_W = IFM_W_Value();
-
- const int32_t OFM_N = IFM_N;
- const int32_t OFM_C = IFM_C;
- const int32_t OFM_H = IFM_H;
- const int32_t OFM_W = IFM_W;
-
- // Initialize random number generator
- std::minstd_rand random(SEED);
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(IFM_N);
- PRINT_VALUE(IFM_C);
- PRINT_VALUE(IFM_H);
- PRINT_VALUE(IFM_W);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OFM_N);
- PRINT_VALUE(OFM_C);
- PRINT_VALUE(OFM_H);
- PRINT_VALUE(OFM_W);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(2);
-
- // Configure output
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
- {OFM_N, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
-
- // Configure input
- interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
- {IFM_N, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
-
- // Add Tanh Node
- // Run Tanh and store the result into Tensor #0
- // - Read input from Tensor #1
- interp.AddNodeWithParameters({1}, {0}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_TANH, 1));
-
- interp.SetInputs({1});
- interp.SetOutputs({0});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(IFM_N, 1)
-INT_VALUE(IFM_C, 3)
-INT_VALUE(IFM_H, 320)
-INT_VALUE(IFM_W, 320)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/builtin_op_data.h"
-
-#include "env.h"
-#include "memory.h"
-#include "misc/environment.h"
-
-#include "tflite/Diff.h"
-#include "tflite/Quantization.h"
-#include "tflite/interp/FunctionBuilder.h"
-
-#include <chrono>
-#include <iostream>
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-TEST(NNAPI_Quickcheck_topk_v2_1, simple_test)
-{
- // Set random seed
- int SEED = std::chrono::system_clock::now().time_since_epoch().count();
-
- nnfw::misc::env::IntAccessor("SEED").access(SEED);
-
- // Set random test parameters
- int verbose = 0;
- int tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
-
-#define INT_VALUE(NAME, VALUE) IntVar NAME##_Value(#NAME, VALUE);
-#include "topk_v2_1.lst"
-#undef INT_VALUE
-
- const int32_t INPUT_DATA = INPUT_DATA_Value();
- const int32_t K = K_Value();
-
- const int32_t OUTPUT_VALUES = K;
- const int32_t OUTPUT_INDICES = K;
-
- std::cout << "Configurations:" << std::endl;
-#define PRINT_NEWLINE() \
- { \
- std::cout << std::endl; \
- }
-#define PRINT_VALUE(value) \
- { \
- std::cout << " " << #value << ": " << (value) << std::endl; \
- }
- PRINT_VALUE(SEED);
- PRINT_NEWLINE();
-
- PRINT_VALUE(INPUT_DATA);
- PRINT_VALUE(K);
- PRINT_NEWLINE();
-
- PRINT_VALUE(OUTPUT_VALUES);
- PRINT_VALUE(OUTPUT_INDICES);
-#undef PRINT_VALUE
-#undef PRINT_NEWLINE
-
- // Fill the K data
- int32_t k_data[1] = {K};
-
- auto setup = [&](Interpreter &interp) {
- // Comment from 'context.h'
- //
- // Parameters for asymmetric quantization. Quantized values can be converted
- // back to float using:
- // real_value = scale * (quantized_value - zero_point);
- //
- // Q: Is this necessary?
- // A: This may be necessary, because quantization values(scale, zero_point) of TENSOR_INT32 and
- // TENSOR_QUANT8_ASYMM are passed on to the runtime.
- TfLiteQuantizationParams quantization = make_default_quantization();
-
- // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
- interp.AddTensors(4);
-
- // Configure INPUT_DATA
- interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "input" /* name */,
- {INPUT_DATA} /* dims */, quantization);
-
- // Configure K
- interp.SetTensorParametersReadOnly(1, kTfLiteInt32 /* type */, "k" /* name */, {1} /* dims */,
- quantization, reinterpret_cast<const char *>(k_data),
- sizeof(k_data));
-
- // Configure OUTPUT_VALUES
- interp.SetTensorParametersReadWrite(2, kTfLiteFloat32 /* type */, "output_values" /* name */,
- {OUTPUT_VALUES} /* dims */, quantization);
-
- // Configure OUTPUT_INDICES
- interp.SetTensorParametersReadWrite(3, kTfLiteInt32 /* type */, "output_indices" /* name */,
- {OUTPUT_INDICES} /* dims */, quantization);
-
- // Add TopK_V2 Node
- // Run TopK_V2 and store its result into Tensor #2 and #3
- // - Read input data and K from Tensor #0 and #1, respectively
- interp.AddNodeWithParameters({0, 1}, {2, 3}, nullptr, 0, nullptr,
- BuiltinOpResolver().FindOp(BuiltinOperator_TOPK_V2, 1));
-
- // Set Tensor #0 as Input, and Tensor #2 and #3 as Output
- interp.SetInputs({0});
- interp.SetOutputs({2, 3});
- };
-
- const nnfw::tflite::FunctionBuilder builder(setup);
-
- RandomTestParam param;
-
- param.verbose = verbose;
- param.tolerance = tolerance;
-
- int res = RandomTestRunner{SEED, param}.run(builder);
-
- EXPECT_EQ(res, 0);
-}
+++ /dev/null
-#ifndef INT_VALUE
-#error "INT_VALUE should be defined"
-#endif // INT_VALUE
-
-INT_VALUE(INPUT_DATA, 8192)
-INT_VALUE(K, 16)
progname=$(basename "${BASH_SOURCE[0]}")
outdir="."
name=""
+config=""
+config_src=""
usage() {
echo "Usage: $progname [options] modelfile"
echo " -h show this help"
echo " -o set nnpackage output directory (default=$outdir)"
echo " -p set nnpackage output name (default=[modelfile name])"
+ echo " -c provide configuration file"
echo ""
echo "Examples:"
echo " $progname add.tflite => create nnpackage 'add' in $outdir/"
echo " $progname -o out add.tflite => create nnpackage 'add' in out/"
echo " $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/"
+ echo " $progname -c add.cfg add.tflite => create nnpackage 'add' with add.cfg"
exit 1
}
exit 1
fi
-while getopts "ho:p:" OPTION; do
+while getopts "ho:p:c:" OPTION; do
case "${OPTION}" in
h) usage;;
o) outdir=$OPTARG;;
p) name=$OPTARG;;
+ c) config_src=$OPTARG;;
?) exit 1;;
esac
done
echo "Generating nnpackage "$name" in "$outdir""
mkdir -p "$outdir"/"$name"/metadata
+
+if [ -s "$config_src" ]; then
+ config=$(basename "$config_src")
+ cp "$config_src" "$outdir/$name/metadata/$config"
+fi
+
cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
{
"major-version" : "1",
- "minor-version" : "0",
+ "minor-version" : "1",
"patch-version" : "0",
+ "configs" : [ "$config" ],
"models" : [ "$modelfile" ],
"model-types" : [ "$extension" ]
}
template <typename DataType> struct BaseLabelData
{
explicit BaseLabelData(int label = -1, DataType confidence = 0)
- : label(label), confidence(confidence)
+ : label(label), confidence(confidence)
{
}
Runner(std::unique_ptr<tflite::Interpreter> interpreter,
std::unique_ptr<tflite::FlatBufferModel> model,
std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size)
- : interpreter(std::move(interpreter)), model(std::move(model)), delegate(std::move(delegate)),
- interrupted(false), kInputSize(1 * img_size * img_size * 3 * sizeof(DataType))
+ : interpreter(std::move(interpreter)), model(std::move(model)), delegate(std::move(delegate)),
+ interrupted(false), kInputSize(1 * img_size * img_size * 3 * sizeof(DataType))
{
inference_times.reserve(500);
top1.reserve(500);
FloatRunner(std::unique_ptr<tflite::Interpreter> interpreter,
std::unique_ptr<tflite::FlatBufferModel> model,
std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size)
- : Runner<float>(std::move(interpreter), std::move(model), std::move(delegate), img_size)
+ : Runner<float>(std::move(interpreter), std::move(model), std::move(delegate), img_size)
{
}
QuantizedRunner(std::unique_ptr<tflite::Interpreter> interpreter,
std::unique_ptr<tflite::FlatBufferModel> model,
std::unique_ptr<::nnfw::tflite::NNAPIDelegate> delegate, unsigned img_size)
- : Runner<uint8_t>(std::move(interpreter), std::move(model), std::move(delegate), img_size)
+ : Runner<uint8_t>(std::move(interpreter), std::move(model), std::move(delegate), img_size)
{
}
if (interpreter->tensor(input_index)->type == kTfLiteFloat32)
{
return std::unique_ptr<FloatRunner>(
- new FloatRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size));
+ new FloatRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size));
}
else if (interpreter->tensor(input_index)->type == kTfLiteUInt8)
{
- return std::unique_ptr<QuantizedRunner>(new QuantizedRunner(
- std::move(interpreter), std::move(model), std::move(delegate), img_size));
+ return std::unique_ptr<QuantizedRunner>(
+ new QuantizedRunner(std::move(interpreter), std::move(model), std::move(delegate), img_size));
}
throw std::invalid_argument("data type of model's input tensor is not supported.");
}
Target GetTarget(const std::string &str)
{
static const std::map<std::string, Target> target_names{
- {"tflite-cpu", Target::TfLiteCpu},
- {"tflite-delegate", Target::TfLiteDelegate},
- {"nnfw-delegate", Target::NnfwDelegate}};
+ {"tflite-cpu", Target::TfLiteCpu},
+ {"tflite-delegate", Target::TfLiteDelegate},
+ {"nnfw-delegate", Target::NnfwDelegate}};
if (target_names.find(str) == target_names.end())
{
throw std::invalid_argument(
- str + ": invalid target. Run with --help for a list of available targets.");
+ str + ": invalid target. Run with --help for a list of available targets.");
}
return target_names.at(str);
}
}
}
-int main(int argc, char *argv[]) try
+int main(int argc, char *argv[])
+try
{
namespace po = boost::program_options;
po::options_description desc("Run a model on multiple binary images and print"
" statistics");
- desc.add_options()("help", "print this message and quit")(
- "model", po::value<std::string>()->default_value(kDefaultModelFile), "tflite file")(
- "input", po::value<std::string>()->default_value(kDefaultImagesDir),
- "directory with input images")("offset", po::value<int>()->default_value(1), "labels offset")(
- "target", po::value<std::string>()->default_value("nnfw-delegate"),
- "how the model will be run (available targets: tflite-cpu, "
- "tflite-delegate, nnfw-delegate)")("imgsize", po::value<unsigned>()->default_value(224),
- "the width and height of the image");
+ // clang-format off
+ desc.add_options()
+ ("help", "print this message and quit")
+ ("model", po::value<std::string>()->default_value(kDefaultModelFile), "tflite file")
+ ("input", po::value<std::string>()->default_value(kDefaultImagesDir), "directory with input images")
+ ("offset", po::value<int>()->default_value(1), "labels offset")
+ ("target", po::value<std::string>()->default_value("nnfw-delegate"),
+ "how the model will be run (available targets: tflite-cpu, tflite-delegate, nnfw-delegate)")
+ ("imgsize", po::value<unsigned>()->default_value(224), "the width and height of the image");
+ // clang-fomrat on
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
if (vm.count("help"))
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
#