return {output};
}
+ std::vector<Output> VisitExpr_(const TupleNode* node) final {
+ std::vector<Output> outs;
+ for (auto field : node->fields) {
+ auto res = VisitExpr(field);
+ CHECK_EQ(res.size(), 1U) << "Do not support tuple nest";
+ outs.push_back(res[0]);
+ }
+ return outs;
+ }
+
+ std::vector<Output> VisitExpr_(const TupleGetItemNode* op) final {
+ auto res = VisitExpr(op->tuple);
+ CHECK_GT(res.size(), static_cast<size_t>(op->index));
+
+ // Only keep the item we want for the child node.
+ // FIXME(@comaniac): The other items should still be requried for the primary outputs.
+ return {res[op->index]};
+ }
+
std::vector<Output> VisitExpr_(const ConstantNode* cn) final {
// Note this is for demonstration purpose. ConstantNode doesn't necessarily
// belong to calls. We need to revisit this when tuples come into play.
* \endcode
*/
void GenerateBackendCFunc(const std::string& func_name, const Array<Var>& args,
- const Output& out) {
+ const std::vector<Output>& outs) {
// Print signature
code_stream_ << "\n";
code_stream_ << "extern \"C\" int " << func_name << "_wrapper_(";
code_stream_ << "DLTensor* arg" << i << ",\n";
code_stream_ << "\t";
}
- if (args.size() > 0) {
- code_stream_ << "DLTensor* arg" << args.size() << ") {\n";
+ for (size_t i = 0; i < outs.size() - 1; i++) {
+ code_stream_ << "DLTensor* out" << i << ",\n";
+ code_stream_ << "\t";
}
+ code_stream_ << "DLTensor* out" << outs.size() - 1 << ") {\n";
EnterScope();
code_stream_ << "static_cast<" << dtype_str << "*>(arg" << i << "->data),\n";
PrintIndents();
}
- if (args.size() > 0) {
- code_stream_ << "static_cast<" << out.dtype << "*>(arg" << args.size() << "->data)";
+ for (size_t i = 0; i < outs.size() - 1; i++) {
+ code_stream_ << "static_cast<" << outs[i].dtype << "*>(out" << i << "->data),\n";
+ PrintIndents();
}
- code_stream_ << ");\n";
+ code_stream_ << "static_cast<" << outs.back().dtype << "*>(out" << outs.size() - 1
+ << "->data));\n";
PrintIndents();
code_stream_ << "return 0;\n";
ExitScope();
*/
std::string JitImpl(const std::string& ext_func_id, const Array<Var>& args,
const std::vector<std::string>& buf_decl,
- const std::vector<std::string>& body, const std::vector<Output>& out) {
+ const std::vector<std::string>& body, const std::vector<Output>& outs) {
// Create the signature. For example, it could be:
- // extern "C" void dnnl_0_(float* input0, float* input1, float* out, int M, int N) {}
+ // extern "C" void dnnl_0_(float* in0, float* in1, float* out0, float* out1) {}
code_stream_ << "extern \"C\" void " << ext_func_id << "_(";
- CHECK_EQ(out.size(), 1U) << "Internal error: only single output is support.";
-
for (const auto& arg : args) {
const auto& dtype_str = GetDtypeString(arg);
code_stream_ << dtype_str << "* " << arg->name_hint() << ", ";
}
- code_stream_ << out[0].dtype << "* out) {\n";
+ for (size_t i = 0; i < outs.size() - 1; ++i) {
+ code_stream_ << outs[i].dtype << "* out" << i << ", ";
+ }
+ code_stream_ << outs.back().dtype << "* out" << outs.size() - 1 << ") {\n";
this->EnterScope();
// Function body
}
// Copy output
- if (out[0].need_copy) {
+ for (size_t i = 0; i < outs.size(); ++i) {
+ if (!outs[i].need_copy) {
+ continue;
+ }
this->PrintIndents();
- code_stream_ << "std::memcpy(out, " << out[0].name << ", 4 * " << out[0].size << ");\n";
+ code_stream_ << "std::memcpy(out" << i << ", " << outs[i].name << ", 4 * " << outs[i].size
+ << ");\n";
+ }
- // Free buffers
- for (size_t i = 0; i < buf_decl.size(); i++) {
- this->PrintIndents();
- code_stream_ << "std::free(buf_" << i << ");\n";
- }
+ // Free buffers
+ for (size_t i = 0; i < buf_decl.size(); i++) {
+ this->PrintIndents();
+ code_stream_ << "std::free(buf_" << i << ");\n";
}
this->ExitScope();
code_stream_ << "}\n";
// Create the wrapper to call the ext_func
- this->GenerateBackendCFunc(ext_func_id, args, out[0]);
+ this->GenerateBackendCFunc(ext_func_id, args, outs);
return code_stream_.str();
}
return {output};
}
+ std::vector<Output> VisitExpr_(const TupleNode* node) final {
+ std::vector<Output> outs;
+ for (auto field : node->fields) {
+ auto res = VisitExpr(field);
+ CHECK_EQ(res.size(), 1U) << "Do not support tuple nest";
+ outs.push_back(res[0]);
+ }
+ return outs;
+ }
+
std::vector<Output> VisitExpr_(const TupleGetItemNode* op) final {
auto res = VisitExpr(op->tuple);
CHECK_GT(res.size(), static_cast<size_t>(op->index));
// Create a corresponding DNNL function for the given relay Function.
void GenDNNLFunc(const Function& func) {
CHECK(func.defined()) << "Input error: expect a Relay function.";
- const auto* call = func->body.as<CallNode>();
- CHECK(call) << "DNNL expects a single convolution or dense op";
// Record the external symbol for runtime lookup.
auto sid = GetExtSymbol(func);
# specific language governing permissions and limitations
# under the License.
"""Unit tests for graph partitioning."""
+# pylint: disable=not-callable
import os
import sys
exe = runtime.vm.Executable.load_exec(code, lib)
vm = runtime.vm.VirtualMachine(exe)
vm.init(ctx)
- out = vm.run(**map_inputs)
- tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
+ outs = vm.run(**map_inputs)
+ outs = outs if isinstance(outs, runtime.container.ADT) else [outs]
+ results = result if isinstance(result, list) else [result]
+ for out, ref in zip(outs, results):
+ tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=tol, atol=tol)
def check_graph_runtime_result():
compile_engine.get().clear()
rt_mod.set_input(name, data)
rt_mod.set_input(**param)
rt_mod.run()
- out = tvm.nd.empty(out_shape, ctx=ctx)
- out = rt_mod.get_output(0, out)
- tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
+ out_shapes = out_shape if isinstance(out_shape, list) else [out_shape]
+ results = result if isinstance(result, list) else [result]
+
+ for idx, shape in enumerate(out_shapes):
+ out = tvm.nd.empty(shape, ctx=ctx)
+ out = rt_mod.get_output(idx, out)
+ tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol)
check_vm_result()
check_graph_runtime_result()
target = "test_duplicate_merge_and_tuplegetitem"
@reg.register("nn.batch_norm", "target." + target)
- def abs(attrs, args): # pylint: disable=unused-variable
+ def batch_norm(attrs, args): # pylint: disable=unused-variable
return True
@reg.register("nn.relu", "target." + target)
- def abs(attrs, args): # pylint: disable=unused-variable
+ def relu(attrs, args): # pylint: disable=unused-variable
return True
def create_graph():
target = "test_flatten_tuple_output"
@reg.register("split", "target." + target)
- def foo(attrs, args): # pylint: disable=unused-variable
+ def split(attrs, args): # pylint: disable=unused-variable
return True
@reg.register("abs", "target." + target)
- def foo(attrs, args): # pylint: disable=unused-variable
+ def abs(attrs, args): # pylint: disable=unused-variable
return True
def create_graph():
partitioned = seq(create_graph())
assert tvm.ir.structural_equal(partitioned, expected(), map_free_vars=True)
+def test_tuple_output_exec():
+ """Test C codegen and runtime for a subgraph with a tuple output"""
+ a = relay.var('a', shape=(10, 10), dtype='float32')
+ b = relay.var('b', shape=(10, 10), dtype='float32')
+ ba = relay.annotation.compiler_begin(a, 'ccompiler')
+ bb = relay.annotation.compiler_begin(b, 'ccompiler')
+ add = relay.add(ba, bb)
+ sub = relay.subtract(ba, bb)
+ out = relay.Tuple((add, sub))
+ eout = relay.annotation.compiler_end(out, 'ccompiler')
+ func=relay.Function([a, b], eout)
+ mod = tvm.IRModule()
+ mod["main"] = func
+ mod = transform.PartitionGraph()(mod)
+
+ a_data = np.random.rand(10, 10).astype('float32')
+ b_data = np.random.rand(10, 10).astype('float32')
+
+ check_result(mod, {'a': a_data, 'b': b_data},
+ [(10, 10), (10, 10)],
+ [(a_data + b_data), (a_data - b_data)])
if __name__ == "__main__":
test_multi_node_compiler()
test_duplicate_merge_and_tuplegetitem()
test_constant_tuples()
test_flatten_tuple_output()
+ test_tuple_output_exec()