tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none)
tvm_option(USE_COREML "Build with coreml support" OFF)
tvm_option(USE_TARGET_ONNX "Build with ONNX Codegen support" OFF)
+tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF)
+tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF)
if(USE_CPP_RPC AND UNIX)
message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.")
include(cmake/modules/contrib/TF_TVMDSOOP.cmake)
include(cmake/modules/contrib/CoreML.cmake)
include(cmake/modules/contrib/ONNX.cmake)
+include(cmake/modules/contrib/ArmComputeLib.cmake)
include(CheckCXXCompilerFlag)
if(NOT MSVC)
# Whether use MKL-DNN (DNNL) codegen
set(USE_DNNL_CODEGEN OFF)
+# Whether to use Arm Compute Library (ACL) codegen
+# We provide 2 separate flags since we cannot build the ACL runtime on x86.
+# This is useful for cases where you want to cross-compile a relay graph
+# on x86 then run on AArch.
+#
+# An example of how to use this can be found here: docs/deploy/arm_compute_lib.rst.
+#
+# USE_ARM_COMPUTE_LIB - Support for compiling a relay graph offloading supported
+# operators to Arm Compute Library. OFF/ON
+# USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME - Run Arm Compute Library annotated functions via the ACL
+# runtime. OFF/ON/"path/to/ACL"
+set(USE_ARM_COMPUTE_LIB OFF)
+set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME OFF)
+
# Build ANTLR parser for Relay text format
# Possible values:
# - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar)
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# We separate the codegen and runtime build since ACL can only be built
+# for AArch. In the world where we take the cross compilation approach,
+# which is common with arm devices, we need to be able to cross-compile
+# a relay graph on x86 for AArch and then run the graph on AArch.
+if(USE_ARM_COMPUTE_LIB)
+ file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/arm_compute_lib/*.cc)
+ file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/arm_compute_lib/acl_runtime.cc)
+ list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC})
+ list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE})
+ message(STATUS "Build with Arm Compute Library support...")
+endif()
+
+if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
+ set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl)
+ # Detect custom ACL path.
+ if (NOT USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME STREQUAL "ON")
+ set(ACL_PATH ${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME})
+ endif()
+
+ file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/arm_compute_lib/*)
+
+ set(ACL_INCLUDE_DIRS ${ACL_PATH}/include ${ACL_PATH})
+ include_directories(${ACL_INCLUDE_DIRS})
+
+ find_library(EXTERN_ACL_COMPUTE_LIB
+ NAMES arm_compute libarm_compute
+ HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+ )
+ find_library(EXTERN_ACL_COMPUTE_CORE_LIB
+ NAMES arm_compute_core libarm_compute_core
+ HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+ )
+ find_library(EXTERN_ACL_COMPUTE_GRAPH_LIB
+ NAMES arm_compute_graph libarm_compute_graph
+ HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+ )
+
+ list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_LIB})
+ list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_CORE_LIB})
+ list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
+ list(APPEND RUNTIME_SRCS ${ACL_CONTRIB_SRC})
+ message(STATUS "Build with Arm Compute Library graph runtime support: "
+ ${EXTERN_ACL_COMPUTE_LIB} ", \n"
+ ${EXTERN_ACL_COMPUTE_CORE_LIB} ", \n"
+ ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
+
+ # Set flag to detect ACL graph runtime support.
+ add_definitions(-DTVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB)
+endif()
--- /dev/null
+.. Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+Relay Arm|reg| Compute Library Integration
+==========================================
+
+Introduction
+------------
+
+Arm Compute Library (ACL) is an open source project that provides accelerated kernels for Arm CPU's
+and GPU's. Currently the integration offloads operators to ACL to use hand-crafted assembler
+routines in the library. By offloading select operators from a relay graph to ACL we can achieve
+a performance boost on such devices.
+
+Building with ACL support
+-------------------------
+
+The current implementation has two separate build options in cmake. The reason for this split is
+because ACL cannot be used on an x86 machine. However, we still want to be able compile an ACL
+runtime module on an x86 machine.
+
+* USE_ARM_COMPUTE_LIB=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module.
+* USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to
+ compute the ACL offloaded functions.
+
+These flags can be used in different scenarios depending on your setup. For example, if you want
+to compile an ACL module on an x86 machine and then run the module on a remote Arm device via RPC, you will
+need to use USE_ARM_COMPUTE_LIB=ON on the x86 machine and USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON on the remote
+AArch64 device.
+
+Usage
+-----
+
+.. note::
+
+ This section may not stay up-to-date with changes to the API.
+
+Create a relay graph. This may be a single operator or a whole graph. The intention is that any
+relay graph can be input. The ACL integration will only pick supported operators to be offloaded
+whilst the rest will be computed via TVM. (For this example we will use a single
+max_pool2d operator).
+
+.. code:: python
+
+ import tvm
+ from tvm import relay
+
+ data_type = "float32"
+ data_shape = (1, 14, 14, 512)
+ strides = (2, 2)
+ padding = (0, 0, 0, 0)
+ pool_size = (2, 2)
+ layout = "NHWC"
+ output_shape = (1, 7, 7, 512)
+
+ data = relay.var('data', shape=data_shape, dtype=data_type)
+ out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides, layout=layout, padding=padding)
+ module = tvm.IRModule.from_expr(out)
+
+
+Annotate and partition the graph for ACL.
+
+..code:: python
+
+ from tvm.relay.op.contrib.arm_compute_lib import partition_for_arm_compute_lib
+ module = partition_for_arm_compute_lib(module)
+
+
+Build the Relay graph.
+
+.. code:: python
+
+ target = "llvm -mtriple=aarch64-linux-gnu -mattr=+neon"
+ with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
+ lib = relay.build(module, target=target)
+
+
+Export the module.
+
+.. code:: python
+
+ lib_path = '~/lib_acl.so'
+ cross_compile = 'aarch64-linux-gnu-c++'
+ lib.export_library(lib_path, cc=cross_compile)
+
+
+Run Inference. This must be on an Arm device. If compiling on x86 device and running on AArch64,
+consider using the RPC mechanism. Tutorials for using the RPC mechanism:
+https://tvm.apache.org/docs/tutorials/cross_compilation_and_rpc.html#sphx-glr-tutorials-cross-compilation-and-rpc-py
+
+.. code:: python
+
+ ctx = tvm.cpu(0)
+ loaded_lib = tvm.runtime.load_module('lib_acl.so')
+ gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](ctx))
+ d_data = np.random.uniform(0, 1, data_shape).astype(data_type)
+ map_inputs = {'data': d_data}
+ gen_module.set_input(**map_inputs)
+ gen_module.run()
+
+
+More examples
+-------------
+The example above only shows a basic example of how ACL can be used for offloading a single
+Maxpool2D. If you would like to see more examples for each implemented operator and for
+networks refer to the tests: `tests/python/contrib/test_arm_compute_lib`. Here you can modify
+`infrastructure.py` to use the remote device you have setup.
+
+
+Adding a new operator
+---------------------
+Adding a new operator requires changes to a series of places. This section will give a hint on
+what needs to be changed and where, it will not however dive into the complexities for an
+individual operator. This is left to the developer.
+
+There are a series of files we need to make changes to:
+* `python/relay/op/contrib/arm_compute_lib.py` In this file we define the operators we wish to offload using the
+`op.register` decorator. This will mean the annotation pass recognizes this operator as ACL
+offloadable.
+* `src/relay/backend/contrib/arm_compute_lib/codegen.cc` Implement `Create[OpName]JSONNode` method. This is where we
+declare how the operator should be represented by JSON. This will be used to create the ACL module.
+* `src/runtime/contrib/arm_compute_lib/acl_kernel.h` Implement `Create[OpName]Layer` method. This is where we
+define how the JSON representation can be used to create an ACL function. We simply define how to
+translate from the JSON representation to ACL API.
+* `tests/python/contrib/test_arm_compute_lib` Add unit tests for the given operator.
android
integrate
hls
+ arm_compute_lib
def get_json(self):
return self.graph_json
+ def get_lib(self):
+ return self.lib
+
def __getitem__(self, item):
return self.module.__getitem__(item)
"""Contrib modules."""
from .register import get_pattern_table, register_pattern_table
+from .arm_compute_lib import *
from .dnnl import *
from .coreml import *
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""Arm Compute Library supported operators."""
+import tvm
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+
+from ...dataflow_pattern import wildcard, is_op, is_constant
+from .register import register_pattern_table
+
+
+def is_arm_compute_runtime_enabled():
+ """Check if the ACL graph runtime is present.
+
+ Returns
+ -------
+ ret: bool
+ True if present, False if not.
+ """
+ check_enabled = tvm.get_global_func("relay.op.is_arm_compute_runtime_enabled", True)
+ if check_enabled:
+ return check_enabled()
+ return False
+
+
+def partition_for_arm_compute_lib(mod, params=None):
+ """Partition the graph greedily offloading supported
+ operators to Arm Compute Library.
+
+ Parameters
+ ----------
+ mod : Module
+ The module to run passes on.
+ params : Optional[Dict[str, NDArray]]
+ Constant input parameters.
+
+ Returns
+ -------
+ ret : annotated and partitioned module.
+ """
+ if params:
+ mod['main'] = bind_params_by_name(mod['main'], params)
+
+ seq = tvm.transform.Sequential([transform.MergeComposite(arm_compute_lib_pattern_table()),
+ transform.AnnotateTarget('arm_compute_lib'),
+ transform.PartitionGraph()])
+
+ return seq(mod)
+
+
+@register_pattern_table("arm_compute_lib")
+def arm_compute_lib_pattern_table():
+ """Get the ACL pattern table."""
+
+ def conv_pattern():
+ """Create a convolution pattern.
+
+ Returns
+ -------
+ pattern : dataflow_pattern.AltPattern
+ Denotes the convolution pattern.
+ """
+ pattern = is_op('nn.pad')(wildcard()) | wildcard()
+ pattern = is_op('nn.conv2d')(pattern, is_constant())
+ pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
+ pattern = pattern.optional(is_op('nn.relu'))
+ return pattern
+
+ def check_conv(extract):
+ """Check conv pattern is supported by ACL."""
+ call = extract
+ while call.op.name != "nn.conv2d":
+ call = call.args[0]
+ return conv2d(call.attrs, call.args)
+
+ return [('arm_compute_lib.conv2d', conv_pattern(), check_conv)]
+
+
+def _register_external_op_helper(op_name, supported=True):
+ @tvm.ir.register_op_attr(op_name, "target.arm_compute_lib")
+ def _func_wrapper(attrs, args):
+ return supported
+
+ return _func_wrapper
+
+
+_register_external_op_helper("reshape")
+
+
+@tvm.ir.register_op_attr("nn.conv2d", "target.arm_compute_lib")
+def conv2d(attrs, args):
+ """Check if the external ACL codegen for conv2d should be used."""
+ if attrs.groups != 1:
+ return False
+ if attrs.data_layout != "NHWC":
+ return False
+ if attrs.out_dtype != "float32" and attrs.out_dtype != "":
+ return False
+ data_typ = args[0].checked_type
+ if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "float32":
+ return False
+ kernel_typ = args[1].checked_type
+ if kernel_typ.dtype != "float32":
+ return False
+ return True
+
+
+@tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib")
+def max_pool2d(attrs, args):
+ """Check if the external ACL codegen for maxpool2d should be used."""
+ if attrs.layout != "NHWC":
+ return False
+ typ = args[0].checked_type
+ if typ.dtype != "float32":
+ return False
+ return True
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/arm_compute_lib/codegen.cc
+ * \brief Implementation of the Relay -> ACL JSON serializer.
+ */
+#include <tvm/ir/module.h>
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/type.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../../utils.h"
+#include "../codegen_json/codegen_json.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+
+/*!
+ * \brief Generates an ACLModule from a relay expression. This "compilation"
+ * does not require ACL since the actual conversion using ACL APIs is
+ * deferred until creation of the runtime. This step simply serializes the
+ * relay program into a JSON string.
+ */
+class ACLJSONSerializer : public backend::contrib::JSONSerializer {
+ using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
+ using JSONGraphNodeEntry = tvm::runtime::json::JSONGraphNodeEntry;
+
+ public:
+ ACLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}
+
+ /*!
+ * \brief Visit call nodes and generate appropriate JSON node.
+ *
+ * \param cn The current call node.
+ * \return A list of graph entry nodes.
+ */
+ std::vector<JSONGraphNodeEntry> VisitExpr_(const CallNode* cn) override {
+ if (cn->op.as<OpNode>()) {
+ return JSONSerializer::VisitExpr_(cn);
+ }
+ if (!cn->op.as<FunctionNode>()) {
+ LOG(FATAL) << "Arm Compute Library JSON runtime does not support calls to "
+ << cn->op->GetTypeKey();
+ }
+ auto fn = cn->op.as<FunctionNode>();
+ auto comp = fn->GetAttr<String>(attr::kComposite);
+ CHECK(comp.defined()) << "Arm Compute Library JSON runtime only supports composite functions.";
+ const std::string name = comp.value();
+ std::shared_ptr<JSONGraphNode> json_node;
+ if (name == "arm_compute_lib.conv2d") {
+ json_node = CreateCompositeConvJSONNode(cn);
+ } else {
+ LOG(FATAL) << "Unrecognized Arm Compute Library pattern: " << name;
+ }
+ return AddNode(json_node, GetRef<Expr>(cn));
+ }
+
+ private:
+ /*!
+ * \brief Create a JSON representation of a composite convolution.
+ *
+ * \param call The call to be represented.
+ * \return A JSON representation of a specific operator.
+ */
+ std::shared_ptr<JSONGraphNode> CreateCompositeConvJSONNode(const CallNode* cn) {
+ const std::string name = "nn.conv2d";
+ const CallNode* pad = nullptr;
+ const CallNode* conv = nullptr;
+ const CallNode* bias = nullptr;
+ bool has_activation = false;
+
+ // Unpack composite function
+ const auto* fn = cn->op.as<FunctionNode>();
+ CHECK(fn);
+ const auto* current_call = fn->body.as<CallNode>();
+ if (backend::IsOp(current_call, "nn.relu")) {
+ has_activation = true;
+ current_call = current_call->args[0].as<CallNode>();
+ }
+ if (backend::IsOp(current_call, "nn.bias_add")) {
+ bias = current_call;
+ current_call = current_call->args[0].as<CallNode>();
+ }
+ CHECK(backend::IsOp(current_call, "nn.conv2d"));
+ conv = current_call;
+ if (!current_call->args.empty() && current_call->args[0]->IsInstance<CallNode>()) {
+ current_call = current_call->args[0].as<CallNode>();
+ if (backend::IsOp(current_call, "nn.pad")) {
+ pad = current_call;
+ }
+ }
+
+ const auto* conv_attr = conv->attrs.as<Conv2DAttrs>();
+ CHECK(conv_attr);
+ CHECK(conv_attr->kernel_layout == "OHWI")
+ << "Kernel layout must be OHWI, has the module been pre-processed correctly?";
+
+ std::vector<JSONGraphNodeEntry> inputs;
+ inputs.push_back(VisitExpr(cn->args[0])[0]);
+ inputs.push_back(VisitExpr(conv->args[1])[0]);
+ if (bias) {
+ inputs.push_back(VisitExpr(bias->args[1])[0]);
+ }
+
+ auto json_node = std::make_shared<JSONGraphNode>(name, "kernel", inputs, 1);
+ SetCallNodeAttribute(json_node, conv);
+
+ // Override attributes
+ if (pad) {
+ const auto* pad_attr = pad->attrs.as<PadAttrs>();
+ CHECK(pad_attr);
+ auto p = pad_attr->pad_width;
+ // Convert to TVM layout for now, conversion to ACL layout takes place in runtime.
+ // Standard convolution pad layout for TVM: top, left, bottom, right.
+ std::vector<std::string> padding = {std::to_string(p[1][0].as<IntImmNode>()->value),
+ std::to_string(p[2][0].as<IntImmNode>()->value),
+ std::to_string(p[1][1].as<IntImmNode>()->value),
+ std::to_string(p[2][1].as<IntImmNode>()->value)};
+ std::vector<dmlc::any> padding_attr;
+ padding_attr.emplace_back(padding);
+ json_node->SetAttr("padding", padding_attr);
+ }
+ if (has_activation) {
+ std::vector<std::string> activation_type = {"relu"};
+ std::vector<dmlc::any> act_attr;
+ act_attr.emplace_back(activation_type);
+ json_node->SetAttr("activation_type", act_attr);
+ }
+ return json_node;
+ }
+};
+
+/*!
+ * \brief Pre-process a module containing functions ready for ACL codegen.
+ *
+ * For now we enforce OHWI kernel layout and fold the transforms away.
+ *
+ * \param mod The module to be pre-processed.
+ * \return The processed module.
+ */
+IRModule PreProcessModule(const IRModule& mod) {
+ IRModule preprocessed_module;
+ tvm::Map<String, Array<String>> desired_layouts = {{"nn.conv2d", {"NHWC", "OHWI"}}};
+ preprocessed_module = transform::ConvertLayout(desired_layouts)(mod);
+ preprocessed_module = transform::FoldConstant()(preprocessed_module);
+ return preprocessed_module;
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.arm_compute_lib.optimize").set_body_typed(PreProcessModule);
+
+/*!
+ * \brief Create a runtime module for ACL.
+ *
+ * This consists of a series of "serialized functions" which each represent a
+ * sub-graph to be computed by ACL and will each be executed independently from
+ * one another. Each function consists of serialized JSON describing the sub-graph
+ * and serialized constant tensors.
+ *
+ * \note The ACL runtime module only supports a single operator per
+ * sub-graph currently.
+ *
+ * \param ref The ext_func Relay expression/module to be executed using extern ops.
+ * \return A runtime module.
+ */
+runtime::Module ACLCompiler(const ObjectRef& ref) {
+ CHECK(ref->IsInstance<FunctionNode>()) << "The input ref is expected to be a Relay function.";
+ Function func = Downcast<Function>(ref);
+ std::string func_name = backend::GetExtSymbol(func);
+
+ ACLJSONSerializer serializer(func_name, func);
+ serializer.serialize();
+ std::string graph_json = serializer.GetJSON();
+ auto param_names = serializer.GetParams();
+ const auto* pf = runtime::Registry::Get("runtime.arm_compute_lib_runtime_create");
+ CHECK(pf != nullptr) << "Cannot find JSON runtime module to create";
+ runtime::Module lib = (*pf)(func_name, graph_json, param_names);
+ return lib;
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.arm_compute_lib").set_body_typed(ACLCompiler);
+
+/*!
+ * \brief Check whether ACL graph runtime is used.
+ *
+ * \return True if ACL graph runtime is enabled, False if not.
+ */
+inline constexpr bool IsACLRuntimeEnabled() {
+#if TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+ return true;
+#else
+ return false;
+#endif
+}
+
+TVM_REGISTER_GLOBAL("relay.op.is_arm_compute_runtime_enabled").set_body_typed(IsACLRuntimeEnabled);
+
+} // namespace contrib
+} // namespace relay
+} // namespace tvm
* \return A runtime module.
*/
virtual runtime::Module CreateCSourceModule(const ObjectRef& ref) = 0;
-
- /*!
- * \brief Get the external symbol of the Relay function name.
- *
- * \param func The provided function.
- *
- * \return An external symbol.
- */
- std::string GetExtSymbol(const Function& func) const {
- const auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
- CHECK(name_node.defined()) << "Fail to retrieve external symbol.";
- return std::string(name_node.value());
- }
};
// The base class to generate the declaration functions in C.
return AddNode(node, GetRef<Expr>(cn));
}
};
-
-/*!
- * \brief Get the external symbol of the Relay function name.
- *
- * \param func The provided function.
- *
- * \return An external symbol.
- */
-std::string GetExtSymbol(const Function& func) {
- const auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
- CHECK(name_node.defined()) << "Fail to retrieve external symbol.";
- return std::string(name_node.value());
-}
#endif
/*!
return GetRootCall(next_call, depth - 1, expected_op_names);
}
+/*!
+ * \brief Get the external symbol of the Relay function name.
+ *
+ * \param func The provided function.
+ * \return An external symbol.
+ */
+inline std::string GetExtSymbol(const Function& func) {
+ const auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+ CHECK(name_node.defined()) << "Fail to retrieve external symbol.";
+ return std::string(name_node.value());
+}
+
} // namespace backend
} // namespace relay
} // namespace tvm
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/arm_compute_lib/acl_allocator.cc
+ * \brief ACL Allocator implementation that requests memory from TVM.
+ */
+
+#include "acl_allocator.h"
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+void* ACLAllocator::allocate(size_t size, size_t alignment) {
+ CHECK_GT(size, 0) << "Cannot allocate size less than or equal to zero";
+ return this->device_api_->AllocWorkspace(this->ctx_, size, {});
+}
+
+void ACLAllocator::free(void* ptr) { this->device_api_->FreeWorkspace(this->ctx_, ptr); }
+
+std::unique_ptr<arm_compute::IMemoryRegion> ACLAllocator::make_region(size_t size,
+ size_t alignment) {
+ return std::make_unique<ACLMemoryRegion>(size, alignment);
+}
+
+ACLMemoryRegion::ACLMemoryRegion(size_t size, size_t alignment)
+ : IMemoryRegion(size), ptr_(nullptr) {
+ if (size != 0) {
+ this->ptr_ = this->device_api_->AllocDataSpace(this->ctx_, size, alignment, {});
+ }
+}
+
+ACLMemoryRegion::ACLMemoryRegion(void* ptr, size_t size)
+ : IMemoryRegion(size), ptr_(nullptr), is_subregion_(true) {
+ if (size != 0) {
+ this->ptr_ = ptr;
+ }
+}
+
+ACLMemoryRegion::~ACLMemoryRegion() {
+ if (this->ptr_ != nullptr && !is_subregion_) {
+ this->device_api_->FreeDataSpace(this->ctx_, this->ptr_);
+ }
+}
+
+std::unique_ptr<arm_compute::IMemoryRegion> ACLMemoryRegion::extract_subregion(size_t offset,
+ size_t size) {
+ if (this->ptr_ != nullptr && (offset < _size) && (_size - offset >= size)) {
+ return std::make_unique<ACLMemoryRegion>(static_cast<uint8_t*>(this->ptr_) + offset, size);
+ } else {
+ return nullptr;
+ }
+}
+
+} // namespace contrib
+} // namespace runtime
+} // namespace tvm
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/arm_compute_lib/acl_allocator.h
+ * \brief ACL Allocator implementation that requests memory from TVM.
+ */
+
+#ifndef TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_ALLOCATOR_H_
+#define TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_ALLOCATOR_H_
+
+#include <arm_compute/runtime/IAllocator.h>
+#include <arm_compute/runtime/IMemoryRegion.h>
+#include <arm_compute/runtime/MemoryRegion.h>
+#include <tvm/runtime/data_type.h>
+#include <tvm/runtime/device_api.h>
+#include <tvm/runtime/registry.h>
+
+#include <memory>
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+/*!
+ * \brief Override ACL memory allocator and replace with TVM workspace based allocation.
+ */
+class ACLAllocator : public arm_compute::IAllocator {
+ public:
+ ACLAllocator() = default;
+
+ /*!
+ * \brief Allocate bytes to ACL runtime.
+ *
+ * Specific implementation requests memory from TVM using their device api.
+ *
+ * \param size Size to allocate.
+ * \param alignment Alignment that the returned pointer should comply with.
+ * \return A pointer to the allocated memory.
+ */
+ void* allocate(size_t size, size_t alignment) override;
+
+ /*!
+ * \brief Free memory from ACL runtime.
+ *
+ * \param ptr Pointer to workspace to free.
+ */
+ void free(void* ptr) override;
+
+ /*!
+ * \brief Create self-managed memory region.
+ *
+ * \param size Size of the memory region.
+ * \param alignment Alignment of the memory region.
+ * \return The memory region object.
+ */
+ std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override;
+
+ private:
+ /*! \brief Always allocate data in the context of the current CPU. */
+ const TVMContext ctx_{kDLCPU, 0};
+ /*! \brief Device API which allows requests for memory from TVM. */
+ runtime::DeviceAPI* device_api_ = runtime::DeviceAPI::Get(ctx_);
+};
+
+/*!
+ * \brief Memory region that can request TVM memory for ACL to use.
+ */
+class ACLMemoryRegion : public arm_compute::IMemoryRegion {
+ public:
+ ACLMemoryRegion(size_t size, size_t alignment);
+ ACLMemoryRegion(void* ptr, size_t size);
+
+ ~ACLMemoryRegion() override;
+
+ /*! \brief Prevent instances of this class from being copied (As this class contains
+ * pointers). */
+ ACLMemoryRegion(const ACLMemoryRegion&) = delete;
+ /*! \brief Default move constructor. */
+ ACLMemoryRegion(ACLMemoryRegion&&) = default;
+ /*! \brief Prevent instances of this class from being copied (As this class
+ * contains pointers) */
+ ACLMemoryRegion& operator=(const ACLMemoryRegion&) = delete;
+ /*! Default move assignment operator. */
+ ACLMemoryRegion& operator=(ACLMemoryRegion&&) = default;
+
+ void* buffer() override { return this->ptr_; }
+
+ const void* buffer() const override { return this->ptr_; }
+
+ /*!
+ * \brief Extract a sub-region from the memory.
+ *
+ * \warning Ownership is maintained by the parent memory,
+ * while a wrapped raw memory region is returned by this function.
+ * Thus parent memory should not be released before this.
+ *
+ * \param offset Offset to the region.
+ * \param size Size of the region.
+ * \return A wrapped memory sub-region with no ownership of the
+ * underlying memory.
+ */
+ std::unique_ptr<arm_compute::IMemoryRegion> extract_subregion(size_t offset,
+ size_t size) override;
+
+ private:
+ /*! \brief Points to a region of memory allocated by TVM. */
+ void* ptr_;
+ /*! \brief A subregion doesn't manage TVM memory so we don't need to free it. */
+ bool is_subregion_ = false;
+ /*! \brief Always allocate data in the context of the current CPU. */
+ const TVMContext ctx_{kDLCPU, 0};
+ /*! \brief Device API which allows requests for memory from TVM. */
+ runtime::DeviceAPI* device_api_ = runtime::DeviceAPI::Get(ctx_);
+};
+
+} // namespace contrib
+} // namespace runtime
+} // namespace tvm
+
+#endif // TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_ALLOCATOR_H_
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/arm_compute_lib/acl_runtime.cc
+ * \brief A simple JSON runtime for Arm Compute Library.
+ */
+
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/registry.h>
+
+#include "../../file_util.h"
+#include "../json/json_node.h"
+#include "../json/json_runtime.h"
+
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+
+#include "acl_allocator.h"
+#include "acl_utils.h"
+#endif
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using namespace tvm::runtime::json;
+
+class ACLRuntime : public JSONRuntimeBase {
+ public:
+ /*!
+ * \brief The ACL runtime module. Deserialize the provided functions
+ * on creation and store in the layer cache.
+ *
+ * \param symbol_name The name of the function.
+ * \param graph_json serialized JSON representation of a sub-graph.
+ * \param const_names The names of each constant in the sub-graph.
+ */
+ explicit ACLRuntime(const std::string& symbol_name, const std::string& graph_json,
+ const Array<String>& const_names)
+ : JSONRuntimeBase(symbol_name, graph_json, const_names) {}
+
+ /*!
+ * \brief The type key of the module.
+ *
+ * \return module type key.
+ */
+ const char* type_key() const override { return "arm_compute_lib"; }
+
+ /*!
+ * \brief Initialize runtime. Create ACL layer from JSON
+ * representation.
+ *
+ * \param consts The constant params from compiled model.
+ */
+ void Init(const Array<NDArray>& consts) override {
+ CHECK_EQ(consts.size(), const_idx_.size())
+ << "The number of input constants must match the number of required.";
+ SetupConstants(consts);
+ BuildEngine();
+ }
+
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+ /*!
+ * \brief Unpack inputs and outputs and run inference on a given layer.
+ *
+ * \param args Access inputs and outputs.
+ * \param function The layer to execute inference on.
+ * \return Status of inference.
+ */
+ void Run() override {
+ for (size_t i = 0; i < input_nodes_.size(); ++i) {
+ auto nid = input_nodes_[i];
+ uint32_t eid = EntryID(nid, 0);
+ if (nodes_[nid].GetOpType() == "input") {
+ void* data = data_entry_[eid]->data;
+ CheckACLError(layer_.inputs[i].allocator()->import_memory(data));
+ }
+ }
+
+ for (size_t i = 0; i < outputs_.size(); ++i) {
+ uint32_t eid = EntryID(outputs_[i]);
+ void* data = data_entry_[eid]->data;
+ CheckACLError(layer_.outputs[i].allocator()->import_memory(data));
+ }
+
+ this->layer_.function->run();
+ }
+
+ private:
+ /*!
+ * \brief Build ACL layer from JSON representation and cache.
+ *
+ * \note For the time being only one layer or operator is supported
+ * per engine.
+ */
+ void BuildEngine() {
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand> mm = MakeMemoryManager();
+ int num_pools = 0;
+
+ for (size_t i = 0; i < input_nodes_.size(); ++i) {
+ uint32_t nid = input_nodes_[i];
+ const auto& node = nodes_[nid];
+ if (node.GetOpType() == "input") {
+ layer_.inputs.push_back(MakeTensor(node));
+ } else if (node.GetOpType() == "const") {
+ uint32_t eid = EntryID(nid, 0);
+ void* data = data_entry_[eid]->data;
+ layer_.const_inputs.push_back(MakeTensor(node, data));
+ }
+ }
+
+ bool found_kernel_node = false;
+ for (size_t nid = 0; nid < nodes_.size(); ++nid) {
+ const auto& node = nodes_[nid];
+ if (found_kernel_node) {
+ LOG(FATAL)
+ << "Arm Compute Library runtime module only supports one kernel node per function.";
+ }
+ if (node.GetOpType() == "kernel") {
+ found_kernel_node = true;
+ auto op_name = node.GetOpName();
+ if ("nn.conv2d" == op_name) {
+ CreateConvolution2DLayer(&layer_, node, mm);
+ num_pools++;
+ } else if ("nn.max_pool2d" == op_name) {
+ CreatePoolingLayer(&layer_, node);
+ } else if ("reshape" == op_name) {
+ CreateReshapeLayer(&layer_, node);
+ } else {
+ LOG(FATAL) << "Unsupported op: " << op_name;
+ }
+ }
+ }
+
+ this->layer_.function->prepare();
+ if (num_pools > 0) mm->populate(this->allocator_, num_pools);
+ }
+
+ /*!
+ * \brief ACL objects we cache in order to avoid needing to construct
+ * a new layer each time.
+ */
+ struct CachedLayer {
+ std::shared_ptr<arm_compute::IFunction> function;
+ std::vector<arm_compute::Tensor> inputs;
+ std::vector<arm_compute::Tensor> const_inputs;
+ std::vector<arm_compute::Tensor> outputs;
+ };
+
+ /*!
+ * \brief Create a 2D convolution layer.
+ *
+ * \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
+ * \param node The JSON representation of the operator.
+ * \param mm The ACL conv2d layer can request auxiliary memory from TVM.
+ */
+ static void CreateConvolution2DLayer(
+ CachedLayer* layer, const JSONGraphNode& node,
+ const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& mm) {
+ std::vector<std::string> padding = node.GetAttr<std::vector<std::string>>("padding");
+ std::vector<std::string> strides = node.GetAttr<std::vector<std::string>>("strides");
+ std::vector<std::string> dilation = node.GetAttr<std::vector<std::string>>("dilation");
+ arm_compute::PadStrideInfo pad_stride_info = ToACLPadStride(padding, strides);
+
+ int groups = std::stoi(node.GetAttr<std::vector<std::string>>("groups")[0]);
+ CHECK(groups == 1) << "Arm Compute Library NEON convolution only supports group size of 1.";
+
+ arm_compute::ActivationLayerInfo act_info;
+ if (node.HasAttr("activation_type")) {
+ std::string activation_type = node.GetAttr<std::vector<std::string>>("activation_type")[0];
+ if (activation_type == "relu") {
+ act_info = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
+ } else {
+ LOG(FATAL) << "Unsupported activation function";
+ }
+ }
+
+ arm_compute::Size2D dilation_2d(std::stoi(dilation[0]), std::stoi(dilation[1]));
+
+ layer->outputs.push_back(MakeOutputTensor(node.GetOpShape()[0]));
+
+ auto function = std::make_shared<arm_compute::NEConvolutionLayer>(mm);
+ function->configure(&layer->inputs[0], &layer->const_inputs[0],
+ layer->const_inputs.size() > 1 ? &layer->const_inputs[1] : nullptr,
+ &layer->outputs[0], pad_stride_info, arm_compute::WeightsInfo(),
+ dilation_2d, act_info);
+ layer->function = function;
+ }
+
+ /*!
+ * \brief Create a pooling layer.
+ *
+ * \note Currently only maxpool is supported.
+ *
+ * \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
+ * \param node The JSON representation of the operator.
+ */
+ static void CreatePoolingLayer(CachedLayer* layer, const JSONGraphNode& node) {
+ std::vector<std::string> padding = node.GetAttr<std::vector<std::string>>("padding");
+ std::vector<std::string> strides = node.GetAttr<std::vector<std::string>>("strides");
+ arm_compute::PadStrideInfo pad_stride_info = ToACLPadStride(padding, strides);
+
+ auto attr_pool_size = node.GetAttr<std::vector<std::string>>("pool_size");
+ int pool_size_h = std::stoi(attr_pool_size[0]);
+ int pool_size_w = std::stoi(attr_pool_size[1]);
+
+ arm_compute::PoolingType pool_type;
+ if (node.GetOpName() == "nn.max_pool2d") {
+ pool_type = arm_compute::PoolingType::MAX;
+ } else {
+ LOG(FATAL) << "Pooling type not supported";
+ }
+
+ arm_compute::PoolingLayerInfo pool_info =
+ arm_compute::PoolingLayerInfo(pool_type, arm_compute::Size2D(pool_size_h, pool_size_w),
+ arm_compute::DataLayout::NHWC, pad_stride_info);
+
+ layer->outputs.push_back(MakeOutputTensor(node.GetOpShape()[0]));
+
+ auto function = std::make_shared<arm_compute::NEPoolingLayer>();
+ function->configure(&layer->inputs[0], &layer->outputs[0], pool_info);
+ layer->function = function;
+ }
+
+ /*!
+ * \brief Create a reshape layer.
+ *
+ * \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
+ * \param node The JSON representation of the operator.
+ */
+ static void CreateReshapeLayer(CachedLayer* layer, const JSONGraphNode& node) {
+ layer->outputs.push_back(MakeOutputTensor(node.GetOpShape()[0]));
+ auto function = std::make_shared<arm_compute::NEReshapeLayer>();
+ function->configure(&layer->inputs[0], &layer->outputs[0]);
+ layer->function = function;
+ }
+
+ /*! \brief Allow ACL functions to request auxiliary memory from TVM. */
+ ACLAllocator allocator_;
+ /*!
+ * \brief The network layers represented by acl functions.
+ * \note Currently only supports a single layer.
+ */
+ CachedLayer layer_;
+#else
+ void Run() override {
+ LOG(FATAL) << "Cannot call run on Arm Compute Library module without runtime enabled. "
+ << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME.";
+ }
+
+ void BuildEngine() {
+ LOG(WARNING) << "Arm Compute Library engine is not initialized. "
+ << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME.";
+ }
+#endif
+};
+
+runtime::Module ACLRuntimeCreate(const String& symbol_name, const String& graph_json,
+ const Array<String>& const_names) {
+ auto n = make_object<ACLRuntime>(symbol_name, graph_json, const_names);
+ return runtime::Module(n);
+}
+
+TVM_REGISTER_GLOBAL("runtime.arm_compute_lib_runtime_create").set_body_typed(ACLRuntimeCreate);
+
+TVM_REGISTER_GLOBAL("runtime.module.loadbinary_arm_compute_lib")
+ .set_body_typed(JSONRuntimeBase::LoadFromBinary<ACLRuntime>);
+
+} // namespace contrib
+} // namespace runtime
+} // namespace tvm
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/arm_compute_lib/acl_utils.cc
+ * \brief Utils and common functions for the interface.
+ */
+
+#include "acl_utils.h"
+
+#include <arm_compute/runtime/OffsetLifetimeManager.h>
+#include <arm_compute/runtime/PoolManager.h>
+#include <tvm/runtime/data_type.h>
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
+
+void CheckACLError(const arm_compute::Status& status) {
+ CHECK(status.error_code() == arm_compute::ErrorCode::OK) << "ACL: " << status.error_description();
+}
+
+arm_compute::Tensor MakeTensor(const JSONGraphNode& tensor_rep, void* data) {
+ CHECK(tensor_rep.GetOpType() == "input" || tensor_rep.GetOpType() == "const");
+ arm_compute::Tensor tensor;
+ arm_compute::TensorInfo info = MakeTensorInfo(tensor_rep.GetOpShape()[0]);
+ tensor.allocator()->init(info);
+ if (data != nullptr) {
+ CheckACLError(tensor.allocator()->import_memory(data));
+ }
+ return tensor;
+}
+
+arm_compute::Tensor MakeOutputTensor(const std::vector<int64_t>& shape) {
+ arm_compute::Tensor tensor;
+ tensor.allocator()->init(MakeTensorInfo(shape));
+ return tensor;
+}
+
+arm_compute::TensorInfo MakeTensorInfo(const std::vector<int64_t>& shape) {
+ arm_compute::TensorShape acl_shape = MakeTensorShape(shape);
+ return arm_compute::TensorInfo(acl_shape, 1, arm_compute::DataType::F32,
+ arm_compute::DataLayout::NHWC);
+}
+
+arm_compute::TensorShape MakeTensorShape(const std::vector<int64_t>& shape) {
+ arm_compute::TensorShape acl_shape;
+ for (unsigned int i = shape.size(); i > 0; --i) {
+ acl_shape.set(shape.size() - i, shape[i - 1]);
+ }
+ return acl_shape;
+}
+
+std::shared_ptr<arm_compute::MemoryManagerOnDemand> MakeMemoryManager() {
+ auto lifetime_mgr = std::make_shared<arm_compute::OffsetLifetimeManager>();
+ auto pool_mgr = std::make_shared<arm_compute::PoolManager>();
+ return std::make_shared<arm_compute::MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+}
+
+arm_compute::PadStrideInfo ToACLPadStride(const std::vector<std::string>& pad,
+ const std::vector<std::string>& stride) {
+ int pad_0 = 0, pad_1 = 0, pad_2 = 0, pad_3 = 0;
+ int stride_0 = std::stoi(stride[0]), stride_1 = std::stoi(stride[1]);
+ size_t size = pad.size();
+ if (size == 1) {
+ int pad_v = std::stoi(pad[0]);
+ pad_0 = pad_v;
+ pad_1 = pad_v;
+ pad_2 = pad_v;
+ pad_3 = pad_v;
+ } else if (size == 2) {
+ // TVM: height, width -> ACL: left, right, top, bottom
+ int pad_h = std::stoi(pad[0]);
+ int pad_w = std::stoi(pad[1]);
+ pad_0 = pad_w;
+ pad_1 = pad_w;
+ pad_2 = pad_h;
+ pad_3 = pad_h;
+ } else if (size == 4) {
+ // TVM: top, left, bottom, right -> ACL: left, right, top, bottom
+ pad_0 = std::stoi(pad[1]);
+ pad_1 = std::stoi(pad[3]);
+ pad_2 = std::stoi(pad[0]);
+ pad_3 = std::stoi(pad[2]);
+ } else {
+ LOG(FATAL) << "Unsupported padding dimensions";
+ }
+
+ return arm_compute::PadStrideInfo(stride_0, stride_1, pad_0, pad_1, pad_2, pad_3,
+ arm_compute::DimensionRoundingType::FLOOR);
+}
+
+} // namespace contrib
+} // namespace runtime
+} // namespace tvm
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/arm_compute_lib/acl_utils.h
+ * \brief Utils and common functions for the interface.
+ */
+
+#ifndef TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_UTILS_H_
+#define TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_UTILS_H_
+
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+#include <arm_compute/runtime/Tensor.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../json/json_node.h"
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
+
+/*!
+ * \brief Check if there are any errors from acl and forward them to TVM.
+ *
+ * Status values:
+ * - 0 => OK
+ * - 1 => RUNTIME_ERROR
+ * - 2 => UNSUPPORTED_EXTENSION_USE
+ *
+ * \param status status of called function.
+ */
+void CheckACLError(const arm_compute::Status& status);
+
+/*!
+ * \brief Make an acl tensor from JSON tensor representation.
+ *
+ * \param tensor_rep A JSON tensor representation.
+ * \param data (optional) Initialize the tensor with memory.
+ * \return arm_compute::Tensor.
+ */
+arm_compute::Tensor MakeTensor(const JSONGraphNode& tensor_rep, void* data = nullptr);
+
+/*!
+ * \brief Make an acl tensor from type and shape, without having a JSON representation.
+ *
+ * \param shape The shape of the tensor to create.
+ * \return arm_compute::Tensor.
+ */
+arm_compute::Tensor MakeOutputTensor(const std::vector<int64_t>& shape);
+
+/*!
+ * \brief Make an acl tensor info object from JSON tensor
+ * representation.
+ *
+ * \param shape The shape of the tensor to create.
+ * \return arm_compute::TensorInfo.
+ */
+arm_compute::TensorInfo MakeTensorInfo(const std::vector<int64_t>& shape);
+
+/*!
+ * \brief Convert vector object to acl TensorShape.
+ * \note This requires reversing the given vector.
+ *
+ * \param shape The shape of the tensor as a vector.
+ * \return arm_compute::TensorShape.
+ */
+arm_compute::TensorShape MakeTensorShape(const std::vector<int64_t>& shape);
+
+/*!
+ * \brief Create a memory manager for use with a layer that
+ * requires working memory.
+ *
+ * \return reference counted memory manager.
+ */
+std::shared_ptr<arm_compute::MemoryManagerOnDemand> MakeMemoryManager();
+
+/*!
+ * \brief Convert TVM padding and stride format to acl PadStrideInfo.
+ *
+ * \param pad The pad vector.
+ * \param stride The stride vector.
+ * \return arm_compute::PadStrideInfo
+ */
+arm_compute::PadStrideInfo ToACLPadStride(const std::vector<std::string>& pad,
+ const std::vector<std::string>& stride);
+
+} // namespace contrib
+} // namespace runtime
+} // namespace tvm
+
+#endif // TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_UTILS_H_
attrs_[key] = value;
}
+ /*!
+ * \brief Check if node has attribute.
+ *
+ * \param key The key of the attribute.
+ *
+ * \return True if attribute exists, false otherwise.
+ */
+ bool HasAttr(const std::string& key) const { return attrs_.find(key) != attrs_.end(); }
+
virtual ~JSONGraphNode() {}
private:
return Module(n);
}
+ /*!
+ * \brief Get the JSON generated by codegen.
+ *
+ * \param format the format to return.
+ * \return A string of JSON.
+ */
+ std::string GetSource(const std::string& format = "json") override { return graph_json_; }
+
protected:
/*!
* \brief Set up the input and output buffers by binding their DLTensor pointers to the
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Infrastructure and tests for Arm Compute Library"""
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from itertools import zip_longest, combinations
+import json
+
+import tvm
+from tvm import relay
+from tvm import rpc
+from tvm.contrib import graph_runtime
+from tvm.relay.op.contrib import arm_compute_lib
+from tvm.contrib import util
+
+
+class Device:
+ """Adjust the following settings to connect to and use a remote device for tests."""
+ use_remote = False
+ target = "llvm -mtriple=aarch64-linux-gnu -mattr=+neon"
+ # Enable cross compilation when connecting a remote device from a non-arm platform.
+ cross_compile = None
+ # cross_compile = "aarch64-linux-gnu-g++"
+
+ def __init__(self):
+ """Keep remote device for lifetime of object."""
+ self.device = self._get_remote()
+
+ @classmethod
+ def _get_remote(cls):
+ """Get a remote (or local) device to use for testing."""
+ if cls.use_remote:
+ # Here you may adjust settings to run the ACL unit tests via a remote
+ # device using the RPC mechanism. Use this in the case you want to compile
+ # an ACL module on a different machine to what you run the module on i.e.
+ # x86 -> AArch64.
+ #
+ # Use the following to connect directly to a remote device:
+ # device = rpc.connect(
+ # hostname="0.0.0.0",
+ # port=9090)
+ #
+ # Or connect via a tracker:
+ # device = tvm.autotvm.measure.request_remote(
+ # host="0.0.0.0",
+ # port=9090,
+ # device_key="device_key",
+ # timeout=1000)
+ #
+ # return device
+ raise NotImplementedError(
+ "Please adjust these settings to connect to your remote device.")
+ else:
+ device = rpc.LocalSession()
+ return device
+
+
+def get_cpu_op_count(mod):
+ """Traverse graph counting ops offloaded to TVM."""
+ class Counter(tvm.relay.ExprVisitor):
+ def __init__(self):
+ super().__init__()
+ self.count = 0
+
+ def visit_call(self, call):
+ if isinstance(call.op, tvm.ir.Op):
+ self.count += 1
+
+ super().visit_call(call)
+
+ c = Counter()
+ c.visit(mod["main"])
+ return c.count
+
+
+def skip_runtime_test():
+ """Skip test if it requires the runtime and it's not present."""
+ # ACL codegen not present.
+ if not tvm.get_global_func("relay.ext.arm_compute_lib", True):
+ print("Skip because Arm Compute Library codegen is not available.")
+ return True
+
+ # Remote device is in use or ACL runtime not present
+ if not Device.use_remote and not arm_compute_lib.is_arm_compute_runtime_enabled():
+ print("Skip because runtime isn't present or a remote device isn't being used.")
+ return True
+
+
+def skip_codegen_test():
+ """Skip test if it requires the ACL codegen and it's not present."""
+ if not tvm.get_global_func("relay.ext.arm_compute_lib", True):
+ print("Skip because Arm Compute Library codegen is not available.")
+ return True
+
+
+def build_module(mod, target, params=None, enable_acl=True, tvm_ops=0, acl_partitions=1):
+ """Build module with option to build for ACL."""
+ if isinstance(mod, tvm.relay.expr.Call):
+ mod = tvm.IRModule.from_expr(mod)
+ with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
+ if enable_acl:
+ mod = arm_compute_lib.partition_for_arm_compute_lib(mod, params)
+ tvm_op_count = get_cpu_op_count(mod)
+ assert tvm_op_count == tvm_ops, \
+ "Got {} TVM operators, expected {}".format(tvm_op_count, tvm_ops)
+ partition_count = 0
+ for global_var in mod.get_global_vars():
+ if "arm_compute_lib" in global_var.name_hint:
+ partition_count += 1
+
+ assert acl_partitions == partition_count, \
+ "Got {} Arm Compute Library partitions, expected {}".format(
+ partition_count, acl_partitions)
+ relay.backend.compile_engine.get().clear()
+ return relay.build(mod, target=target, params=params)
+
+
+def build_and_run(mod, inputs, outputs, params, device, enable_acl=True, no_runs=1,
+ tvm_ops=0, acl_partitions=1):
+ """Build and run the relay module."""
+ lib = build_module(mod, device.target, params, enable_acl, tvm_ops, acl_partitions)
+ lib = update_lib(lib, device.device, device.cross_compile)
+ gen_module = graph_runtime.GraphModule(lib['default'](device.device.cpu(0)))
+ gen_module.set_input(**inputs)
+ out = []
+ for _ in range(no_runs):
+ gen_module.run()
+ out.append([gen_module.get_output(i) for i in range(outputs)])
+ return out
+
+
+def update_lib(lib, device, cross_compile):
+ """Export the library to the remote/local device."""
+ lib_name = "mod.so"
+ temp = util.tempdir()
+ lib_path = temp.relpath(lib_name)
+ if cross_compile:
+ lib.export_library(lib_path, cc=cross_compile)
+ else:
+ lib.export_library(lib_path)
+ device.upload(lib_path)
+ lib = device.load_module(lib_name)
+ return lib
+
+
+def verify(answers, atol, rtol):
+ """Compare the array of answers. Each entry is a list of outputs."""
+ if len(answers) < 2:
+ raise RuntimeError(
+ f"No results to compare: expected at least two, found {len(answers)}")
+ for answer in zip_longest(*answers):
+ for outs in combinations(answer, 2):
+ tvm.testing.assert_allclose(
+ outs[0].asnumpy(), outs[1].asnumpy(), rtol=rtol, atol=atol)
+
+
+def extract_acl_modules(module):
+ """Get the ACL module(s) from llvm module."""
+ return list(filter(lambda mod: mod.type_key == "arm_compute_lib",
+ module.get_lib().imported_modules))
+
+
+def verify_codegen(module, known_good_codegen, num_acl_modules,
+ target="llvm -mtriple=aarch64-linux-gnu -mattr=+neon"):
+ """Check acl codegen against a known good output."""
+ module = build_module(module, target)
+ acl_modules = extract_acl_modules(module)
+
+ assert len(acl_modules) == num_acl_modules, \
+ f"The number of Arm Compute Library modules produced ({len(acl_modules)}) does not " \
+ f"match the expected value ({num_acl_modules})."
+
+ for mod in acl_modules:
+ source = mod.get_source("json")
+ codegen = json.loads(source)["nodes"]
+ # remove input and const names as these cannot be predetermined
+ for node in range(len(codegen)):
+ if codegen[node]["op"] == "input" or codegen[node]["op"] == "const":
+ codegen[node]["name"] = ""
+ codegen_str = json.dumps(codegen, sort_keys=True, indent=2)
+ known_good_codegen_str = json.dumps(known_good_codegen, sort_keys=True, indent=2)
+
+ assert codegen_str == known_good_codegen_str, \
+ f"The JSON produced by codegen does not match the expected result. \n" \
+ f"Actual={codegen_str} \n" \
+ f"Expected={known_good_codegen_str}"
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Arm Compute Library integration conv2d tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \
+ verify, verify_codegen
+from .infrastructure import Device
+
+
+def _get_model(shape, kernel_size, padding, strides,
+ dilation, groups, dtype, channels,
+ var_names, has_bias=False, has_activation=False, has_pad=False):
+ """Return a model and any parameters it may have"""
+ a = relay.var(next(var_names), shape=shape, dtype=dtype)
+ if has_pad:
+ p = ((0, 0), (padding[0], padding[0]), (padding[1], padding[1]), (0, 0))
+ a = relay.nn.pad(a, pad_width=p)
+ padding = (0, 0, 0, 0)
+ else:
+ if len(padding) == 2:
+ padding = (padding[0], padding[1], padding[0], padding[1])
+ shape = (shape[0], shape[1] + padding[0] * 2,
+ shape[2] + padding[1] * 2, shape[3])
+ weight_shape = (kernel_size, kernel_size, shape[3] // groups, channels)
+ w = tvm.nd.array(np.random.uniform(-128, 127, weight_shape).astype(dtype))
+ weights = relay.const(w, dtype)
+ out = relay.nn.conv2d(
+ a,
+ weights,
+ kernel_size=(kernel_size, kernel_size),
+ data_layout="NHWC",
+ kernel_layout="HWIO",
+ dilation=(1, 1),
+ strides=strides,
+ padding=padding,
+ groups=groups,
+ channels=channels
+ )
+ params = {"w": w}
+ if has_bias:
+ b = tvm.nd.array(np.random.uniform(-128, 127, weight_shape[3]).astype(dtype))
+ biasc = relay.const(b, dtype)
+ out = relay.nn.bias_add(out, biasc, axis=3)
+ params["b"] = b
+ if has_activation:
+ out = relay.nn.relu(out)
+ return out, params
+
+
+def _get_expected_codegen(shape, kernel_size, padding, strides,
+ dilation, groups, dtype, channels,
+ has_bias=False, has_activation=False):
+ if len(padding) == 2:
+ padding = (padding[0], padding[1], padding[0], padding[1])
+ weight_shape = (channels, kernel_size, kernel_size, shape[3] // groups)
+ output_height = ((shape[1] - kernel_size + padding[0] + padding[2]) / strides[0]) + 1
+ output_width = ((shape[2] - kernel_size + padding[1] + padding[3]) / strides[1]) + 1
+ output_shape = (1, int(output_height), int(output_width), channels)
+
+ node = {
+ "op": "kernel",
+ "name": "nn.conv2d",
+ "inputs": [[0, 0, 0], [1, 0, 0]],
+ "attrs": {
+ "groups": [["1"]],
+ "num_inputs": str(3 if has_bias else 2),
+ "num_outputs": "1",
+ "data_layout": [["NHWC"]],
+ "kernel_layout": [["OHWI"]],
+ "channels": [["1"]],
+ "dilation": [["1", "1"]],
+ "out_layout": [[""]],
+ "out_dtype": [[""]],
+ "kernel_size": [[str(kernel_size), str(kernel_size)]],
+ "shape": [[list(output_shape)]],
+ "dtype": [[dtype]],
+ "padding": [[str(p) for p in padding]],
+ "strides": [[str(s) for s in strides]]
+ },
+ }
+
+ if has_activation:
+ node["attrs"]["activation_type"] = [["relu"]]
+
+ input = {
+ "op": "input",
+ "name": "",
+ "attrs": {"shape": [[list(shape)]], "dtype": [["float32"]]}}
+ kernel = {
+ "op": "const",
+ "name": "",
+ "attrs": {"shape": [[list(weight_shape)]], "dtype": [["float32"]]}}
+
+ if has_bias:
+ bias = {
+ "op": "const",
+ "name": "",
+ "attrs": {"shape": [[[weight_shape[0]]]], "dtype": [["float32"]]}}
+ node["inputs"].append([2, 0, 0])
+ return [input, kernel, bias, node]
+ else:
+ return [input, kernel, node]
+
+
+def test_conv2d():
+ if skip_runtime_test():
+ return
+
+ device = Device()
+ np.random.seed(0)
+
+ shape = (1, 14, 14, 32)
+ dtype = "float32"
+
+ inputs = {
+ "a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype)),
+ }
+
+ for kernel_size in [1, 2, 3]:
+ outputs = []
+ func, params = _get_model(shape, kernel_size,
+ (0, 0), (1, 1), 1, 1,
+ dtype, 1, iter(inputs))
+ for acl in [False, True]:
+ outputs.append(build_and_run(func, inputs, 1,
+ params, device,
+ enable_acl=acl)[0])
+ verify(outputs, atol=0.002, rtol=0.01)
+
+ for pad_ksize in [((1, 1), 3), ((2, 2), 5), ((2, 1), 3)]:
+ outputs = []
+ func, params = _get_model(shape, pad_ksize[1], pad_ksize[0],
+ (1, 1), 1, 1, dtype, 1, iter(inputs))
+ for acl in [False, True]:
+ outputs.append(build_and_run(func, inputs, 1,
+ params, device,
+ enable_acl=acl)[0])
+ verify(outputs, atol=0.002, rtol=0.01)
+
+ for strides in [(1, 1), (2, 2)]:
+ outputs = []
+ func, params = _get_model(shape, 2, (0, 0), strides,
+ 1, 1, dtype, 1, iter(inputs))
+ for acl in [False, True]:
+ outputs.append(build_and_run(func, inputs, 1,
+ params, device,
+ enable_acl=acl)[0])
+ verify(outputs, atol=0.002, rtol=0.01)
+
+ # Test composite convolution: (has_pad, has_bias, has_activation).
+ for composite in [(False, True, False), (False, False, True), (False, True, True),
+ (True, False, False)]:
+ outputs = []
+ func, params = _get_model(shape, 2, (1, 1), (1, 1),
+ 1, 1, dtype, 1, iter(inputs),
+ has_pad=composite[0],
+ has_bias=composite[1],
+ has_activation=composite[2])
+ for acl in [False, True]:
+ outputs.append(build_and_run(func, inputs, 1,
+ params, device,
+ enable_acl=acl)[0])
+ verify(outputs, atol=0.002, rtol=0.01)
+
+
+def test_codegen_conv2d():
+ if skip_codegen_test():
+ return
+
+ shape = (1, 25, 25, 1)
+ dtype = "float32"
+ inputs = {"a"}
+
+ for pad_ksize in [((1, 1), 3), ((2, 1), 3)]:
+ args = (shape, pad_ksize[1], pad_ksize[0], (1, 1), 1, 1, dtype, 1)
+ func, params = _get_model(*args, var_names=iter(inputs))
+ exp_codegen = _get_expected_codegen(*args)
+ verify_codegen(func, exp_codegen, 1)
+ # Test composite convolution: (has_pad, has_bias, has_activation).
+ for composite in [(False, True, False), (False, False, True), (False, True, True),
+ (True, False, False)]:
+ args = (shape, 2, (1, 1), (1, 1), 1, 1, dtype, 1)
+ func, params = _get_model(*args, var_names=iter(inputs),
+ has_pad=composite[0],
+ has_bias=composite[1],
+ has_activation=composite[2])
+ exp_codegen = _get_expected_codegen(*args,
+ has_bias=composite[1],
+ has_activation=composite[2])
+ verify_codegen(func, exp_codegen, 1)
+
+
+if __name__ == "__main__":
+ test_conv2d()
+ test_codegen_conv2d()
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Arm Compute Library network tests."""
+
+import numpy as np
+
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, build_and_run, verify
+from .infrastructure import Device
+
+
+def _build_and_run_keras_network(mod, params, inputs, device, tvm_ops, acl_partitions):
+ """Helper function to build and run a network from the Keras frontend."""
+ data = {}
+ np.random.seed(0)
+ for name, shape in inputs.items():
+ data[name] = np.random.uniform(-128, 127, shape).astype("float32")
+
+ outputs = []
+ for acl in [False, True]:
+ outputs.append(build_and_run(mod, data, 1, params,
+ device, enable_acl=acl,
+ tvm_ops=tvm_ops,
+ acl_partitions=acl_partitions)[0])
+ verify(outputs, atol=0.002, rtol=0.01)
+
+
+def test_vgg16():
+ if skip_runtime_test():
+ return
+
+ device = Device()
+
+ def get_model():
+ from keras.applications import VGG16
+ vgg16 = VGG16(include_top=True, weights='imagenet',
+ input_shape=(224, 224, 3), classes=1000)
+ inputs = {vgg16.input_names[0]: (1, 224, 224, 3)}
+ mod, params = relay.frontend.from_keras(vgg16, inputs, layout="NHWC")
+ return mod, params, inputs
+
+ _build_and_run_keras_network(*get_model(), device=device,
+ tvm_ops=10, acl_partitions=18)
+
+
+def test_mobilenet():
+ if skip_runtime_test():
+ return
+
+ device = Device()
+
+ def get_model():
+ from keras.applications import MobileNet
+ mobilenet = MobileNet(include_top=True, weights='imagenet',
+ input_shape=(224, 224, 3), classes=1000)
+ inputs = {mobilenet.input_names[0]: (1, 224, 224, 3)}
+ mod, params = relay.frontend.from_keras(mobilenet, inputs, layout="NHWC")
+ return mod, params, inputs
+
+ _build_and_run_keras_network(*get_model(), device=device,
+ tvm_ops=74, acl_partitions=17)
+
+
+if __name__ == "__main__":
+ test_vgg16()
+ test_mobilenet()
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Arm Compute Library integration pooling tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \
+ verify, verify_codegen
+from .infrastructure import Device
+
+
+def _get_model(shape, typef, sizes, strides, padding,
+ ceil_mode, var_names):
+ """Return a model and any parameters it may have."""
+ var = relay.var(next(var_names), shape=shape, dtype="float32")
+ pool = typef(var, pool_size=sizes, strides=strides, padding=padding,
+ ceil_mode=ceil_mode, layout="NHWC")
+ return pool
+
+
+def _get_expected_codegen(shape, typef, sizes, strides, padding,
+ ceil_mode):
+ if len(padding) == 2:
+ padding = (padding[1], padding[1], padding[0], padding[0])
+ output_height = ((shape[1] - sizes[0] + padding[0] + padding[2]) / strides[0]) + 1
+ output_width = ((shape[2] - sizes[1] + padding[1] + padding[3]) / strides[1]) + 1
+ output_shape = (1, int(output_height), int(output_width), shape[3])
+
+ node = {
+ "op": "kernel",
+ "name": "nn.max_pool2d",
+ "inputs": [[0, 0, 0]],
+ "attrs": {
+ "num_inputs": "1",
+ "num_outputs": "1",
+ "layout": [["NHWC"]],
+ "shape": [[list(output_shape)]],
+ "dtype": [["float32"]],
+ "padding": [[str(p) for p in padding]],
+ "strides": [[str(s) for s in strides]],
+ "pool_size": [[str(s) for s in sizes]],
+ "ceil_mode": [[str(1 if ceil_mode else 0)]]
+ },
+ }
+
+ input = {
+ "op": "input",
+ "name": "",
+ "attrs": {"shape": [[list(shape)]], "dtype": [["float32"]]}}
+ return [input, node]
+
+
+def test_pooling():
+ if skip_runtime_test():
+ return
+
+ device = Device()
+ np.random.seed(0)
+
+ for size in [(2, 2), (3, 3)]:
+ for stride in [(2, 2)]:
+ shape = (1, size[0] + stride[0] * 5,
+ size[1] + stride[1] * 5, 16)
+
+ inputs = {
+ "a": tvm.nd.array(np.random.uniform(-1, 1, shape).astype("float32")),
+ }
+
+ outputs = []
+ func = _get_model(shape, relay.nn.max_pool2d, size,
+ stride, (0, 0), True, iter(inputs))
+ for acl in [False, True]:
+ outputs.append(build_and_run(func, inputs, 1, None, device,
+ enable_acl=acl)[0])
+ verify(outputs, atol=0.001, rtol=0.001)
+
+
+def test_codegen_pooling():
+ if skip_codegen_test():
+ return
+
+ inputs = {"a"}
+
+ for size in [(2, 2), (3, 3)]:
+ for stride in [(2, 2)]:
+ shape = (1, size[0] + stride[0] * 5,
+ size[1] + stride[1] * 5, 16)
+ args = (shape, relay.nn.max_pool2d, size,
+ stride, (0, 0), True)
+ func = _get_model(*args, iter(inputs))
+ exp_codegen = _get_expected_codegen(*args)
+ verify_codegen(func, exp_codegen, 1)
+
+
+if __name__ == "__main__":
+ test_pooling()
+ test_codegen_pooling()
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Arm Compute Library integration reshape tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \
+ verify, verify_codegen
+from .infrastructure import Device
+
+
+def _get_model(input_shape, output_shape, var_names):
+ """Return a model and any parameters it may have."""
+ a = relay.var(next(var_names), shape=input_shape, dtype="float32")
+ reshape = relay.reshape(a, output_shape)
+ return reshape
+
+
+def _get_expected_codegen(input_shape, output_shape):
+ node = {
+ "op": "kernel",
+ "name": "reshape",
+ "inputs": [[0, 0, 0]],
+ "attrs": {
+ "num_inputs": "1",
+ "num_outputs": "1",
+ "newshape": [[str(s) for s in output_shape]],
+ "shape": [[list(output_shape)]],
+ "dtype": [["float32"]],
+ "reverse": [["0"]]
+ },
+ }
+
+ input = {
+ "op": "input",
+ "name": "",
+ "attrs": {"shape": [[list(input_shape)]], "dtype": [["float32"]]}}
+
+ return [input, node]
+
+
+def test_reshape():
+ if skip_runtime_test():
+ return
+
+ device = Device()
+ np.random.seed(0)
+
+ inputs = {
+ "a": tvm.nd.array(
+ np.random.uniform(-128, 127, (1, 1, 1, 1000)).astype("float32"))
+ }
+
+ for shape in [(1, 1000), (10, 10, 10)]:
+ outputs = []
+ func = _get_model(inputs["a"].shape, shape, iter(inputs))
+ for acl in [False, True]:
+ outputs.append(build_and_run(func, inputs, 1, None, device,
+ enable_acl=acl)[0])
+ verify(outputs, atol=1e-7, rtol=1e-7)
+
+
+def test_codegen_reshape():
+ if skip_codegen_test():
+ return
+
+ shape = (1, 1, 1, 1000)
+ inputs = {"a"}
+
+ for new_shape in [(1, 1000), (10, 10, 10)]:
+ args = (shape, new_shape)
+ func = _get_model(*args, iter(inputs))
+ exp_codegen = _get_expected_codegen(*args)
+ verify_codegen(func, exp_codegen, 1)
+
+
+if __name__ == "__main__":
+ test_reshape()
+ test_codegen_reshape()
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Arm Compute Library runtime tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, build_and_run, verify
+from .infrastructure import Device
+
+
+def test_multiple_ops():
+ """
+ Test multiple operators destined for ACL.
+ The ACL runtime will expect these ops as 2 separate functions for
+ the time being.
+ """
+ if skip_runtime_test():
+ return
+
+ device = Device()
+ np.random.seed(0)
+
+ def get_model(input_shape, var_names):
+ """Return a model and any parameters it may have."""
+ a = relay.var(next(var_names), shape=input_shape, dtype="float32")
+ out = relay.reshape(a, (1, 1, 1000))
+ out = relay.reshape(out, (1, 1000))
+ return out
+
+ inputs = {
+ "a": tvm.nd.array(np.random.uniform(0, 1, (1, 1, 1, 1000)).astype("float32"))
+ }
+
+ outputs = []
+ for acl in [False, True]:
+ func = get_model(inputs["a"].shape, iter(inputs))
+ outputs.append(build_and_run(func, inputs, 1, None, device,
+ enable_acl=acl, acl_partitions=2)[0])
+ verify(outputs, atol=0.002, rtol=0.01)
+
+
+def test_heterogeneous():
+ """
+ Test to check if offloading only supported operators works,
+ while leaving unsupported operators computed via tvm.
+ """
+ if skip_runtime_test():
+ return
+
+ device = Device()
+ np.random.seed(0)
+
+ def get_model(input_shape, var_names):
+ """Return a model and any parameters it may have."""
+ a = relay.var(next(var_names), shape=input_shape, dtype="float32")
+ out = relay.reshape(a, (1, 1, 1000))
+ out = relay.sigmoid(out)
+ out = relay.reshape(out, (1, 1000))
+ return out
+
+ inputs = {
+ "a": tvm.nd.array(np.random.uniform(-127, 128, (1, 1, 1, 1000)).astype("float32"))
+ }
+
+ outputs = []
+ for acl in [False, True]:
+ func = get_model(inputs["a"].shape, iter(inputs))
+ outputs.append(build_and_run(func, inputs, 1, None, device,
+ enable_acl=acl, tvm_ops=1,
+ acl_partitions=2)[0])
+ verify(outputs, atol=0.002, rtol=0.01)
+
+
+def test_multiple_runs():
+ """
+ Test that multiple runs of an operator work.
+ """
+ if skip_runtime_test():
+ return
+
+ device = Device()
+
+ def get_model():
+ a = relay.var("a", shape=(1, 28, 28, 512), dtype="float32")
+ w = tvm.nd.array(np.ones((256, 1, 1, 512), dtype="float32"))
+ weights = relay.const(w, "float32")
+ conv = relay.nn.conv2d(
+ a,
+ weights,
+ kernel_size=(1, 1),
+ data_layout="NHWC",
+ kernel_layout="OHWI",
+ strides=(1, 1),
+ padding=(0, 0),
+ dilation=(1, 1)
+ )
+ params = {"w": w}
+ return conv, params
+
+ inputs = {
+ "a": tvm.nd.array(np.random.uniform(-127, 128, (1, 28, 28, 512)).astype("float32")),
+ }
+
+ func, params = get_model()
+ outputs = build_and_run(func, inputs, 1,
+ params, device,
+ enable_acl=True,
+ no_runs=3)
+ verify(outputs, atol=0.002, rtol=0.01)
+
+
+if __name__ == "__main__":
+ test_multiple_ops()
+ test_heterogeneous()
+ test_multiple_runs()
echo set\(USE_VM_PROFILER ON\) >> config.cmake
echo set\(USE_EXAMPLE_EXT_RUNTIME ON\) >> config.cmake
echo set\(USE_DNNL_CODEGEN ON\) >> config.cmake
+echo set\(USE_ARM_COMPUTE_LIB ON\) >> config.cmake
echo set\(USE_LLVM llvm-config-10\) >> config.cmake
echo set\(USE_NNPACK ON\) >> config.cmake
echo set\(NNPACK_PATH /NNPACK/build/\) >> config.cmake