From b577171d666c82aed158f02ce5f84325e9e3ee09 Mon Sep 17 00:00:00 2001 From: Yao Wang Date: Mon, 16 Sep 2019 11:22:00 -0700 Subject: [PATCH] [Graph Tuner] Fix benchmark layout in graph tuner (#3926) * Fix graph tuner benchmarking layout transform * Add test --- python/tvm/autotvm/graph_tuner/base_graph_tuner.py | 4 + tests/python/unittest/test_graph_tuner_core.py | 116 +++++++++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/python/tvm/autotvm/graph_tuner/base_graph_tuner.py b/python/tvm/autotvm/graph_tuner/base_graph_tuner.py index dca4148..1cb76d4 100644 --- a/python/tvm/autotvm/graph_tuner/base_graph_tuner.py +++ b/python/tvm/autotvm/graph_tuner/base_graph_tuner.py @@ -243,6 +243,7 @@ class BaseGraphTuner(object): transformation and is_valid showing whether this is a valid layout transformation. """ input_names = self._input_shapes.keys() + pair_tracker = set() for key, val in self._in_nodes_dict.items(): node_entry = self._node_list[key] target_input_idx = -1 @@ -282,6 +283,9 @@ class BaseGraphTuner(object): i_infer_layout_func = OP2LAYOUT[node_entry["topi_op"][i]] i_wkl = node_entry["workloads"][i] + if (i_idx, o_idx) in pair_tracker: + continue + pair_tracker.add((i_idx, o_idx)) for m, i_record in enumerate(in_node_entry["record_candidates"]): for n, o_record in enumerate(node_entry["record_candidates"]): diff --git a/tests/python/unittest/test_graph_tuner_core.py b/tests/python/unittest/test_graph_tuner_core.py index c26b4b8..7dc2e3d 100644 --- a/tests/python/unittest/test_graph_tuner_core.py +++ b/tests/python/unittest/test_graph_tuner_core.py @@ -467,9 +467,125 @@ def test_tuple(): % (str(expected_out), str(out)) +def test_triangle_block(): + target = "llvm" + dtype = "float32" + dshape = (1, 3, 8, 8) + layout = "NCHW" + target_ops = [relay.nn.conv2d] + + data = relay.var("data", shape=dshape, dtype=dtype) + w0 = relay.var("w0_weight") + conv0 = relay.nn.conv2d(data, w0, channels=16, kernel_size=(3, 3), padding=(1, 1)) + w1 = relay.var("w1_weight") + conv1 = relay.nn.conv2d(conv0, w1, channels=32, kernel_size=(1, 1)) + w2 = relay.var("w2_weight") + conv2 = relay.nn.conv2d(data, w2, channels=32, kernel_size=(3, 3), padding=(1, 1)) + out = relay.concatenate([conv0, conv1, conv2], axis=1) + net = relay.Function(relay.analysis.free_vars(out), out) + net, params = relay.testing.create_workload(net) + + tasks = autotvm.task.extract_from_program(net["main"], + target=target, + params=params, + ops=(relay.op.nn.conv2d,)) + wkl_list = [ + create_workload((1, 3, 8, 8), (16, 3, 3, 3), (1, 1), (1, 1), (1, 1), layout, layout, dtype, dtype), + create_workload((1, 16, 8, 8), (32, 16, 1, 1), (1, 1), (0, 0), (1, 1), layout, layout, dtype, dtype), + create_workload((1, 3, 8, 8), (32, 3, 3, 3), (1, 1), (1, 1), (1, 1), layout, layout, dtype, dtype), + ] + costs = [0.04, 0.012, 0.03, 0.02, 0.02, 0.045] + config_list = [] + cfg_dict = {"i": -1, + "c": None, + "e": [["tile_ic", "sp", [3, 1]], + ["tile_oc", "sp", [4, 4]], + ["tile_ow", "sp", [4, 2]], + ["unroll_kw", "ot", True]], + "t": ""} + config_list.append(ConfigEntity.from_json_dict(cfg_dict)) + cfg_dict = {"i": -1, + "c": None, + "e": [["tile_ic", "sp", [2, 8]], + ["tile_oc", "sp", [1, 32]], + ["tile_oh", "ot", 1], + ["tile_ow", "sp", [4, 2]]], + "t": ""} + config_list.append(ConfigEntity.from_json_dict(cfg_dict)) + cfg_dict = {"i": -1, + "c": None, + "e": [["tile_ic", "sp", [8, 4]], + ["tile_oc", "sp", [4, 8]], + ["tile_ow", "sp", [2, 4]], + ["unroll_kw", "ot", False]], + "t": ""} + config_list.append(ConfigEntity.from_json_dict(cfg_dict)) + cfg_dict = {"i": -1, + "c": None, + "e": [["tile_ic", "sp", [1, 3]], + ["tile_oc", "sp", [2, 8]], + ["tile_ow", "sp", [4, 2]], + ["unroll_kw", "ot", True]], + "t": ""} + config_list.append(ConfigEntity.from_json_dict(cfg_dict)) + cfg_dict = {"i": -1, + "c": None, + "e": [["tile_ic", "sp", [4, 4]], + ["tile_oc", "sp", [2, 16]], + ["tile_oh", "ot", 1], + ["tile_ow", "sp", [4, 2]]], + "t": ""} + config_list.append(ConfigEntity.from_json_dict(cfg_dict)) + cfg_dict = {"i": -1, + "c": None, + "e": [["tile_ic", "sp", [16, 2]], + ["tile_oc", "sp", [8, 4]], + ["tile_ow", "sp", [2, 4]], + ["unroll_kw", "ot", False]], + "t": ""} + config_list.append(ConfigEntity.from_json_dict(cfg_dict)) + + records = [] + + wkl_list = wkl_list + wkl_list + tasks = tasks + tasks + for wkl, cost, config, task in zip(wkl_list, costs, config_list, tasks): + task.workload = wkl + ms_input = MeasureInput(target=target, task=task, config=config) + ms_output = MeasureResult(costs=(cost,), error_no=0, all_cost=-1, timestamp=-1) + records.append((ms_input, ms_output)) + + ltf_records = [] + ltf_arg = [tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c"] + ltf_arg = autotvm.task.topi_integration.serialize_args(ltf_arg) + ltf_wkl = ('layout_transform',) + autotvm.task.args_to_workload(ltf_arg) + ltf_task = copy.deepcopy(tasks[0]) + ltf_task.workload = ltf_wkl + ms_input = MeasureInput(target=target, task=ltf_task, config=None) + ms_output = MeasureResult(costs=(1.91224744e-05,), error_no=0, all_cost=-1, timestamp=-1) + ltf_records.append((ms_input, ms_output)) + + executor = DPTuner(net, {"data": dshape}, records, target_ops, target) + executor.benchmark_layout_transform(layout_records=ltf_records, infer_layout=True) + executor.run() + out = [record[0].config for record in executor.get_optimal_records()] + expected_out = [records[3][0].config, records[1][0].config, records[2][0].config] + assert expected_out == out, "Output mismatch: expecting %s but got %s" \ + % (str(expected_out), str(out)) + + executor = PBQPTuner(net, {"data": dshape}, records, target_ops, target) + executor.benchmark_layout_transform(layout_records=ltf_records, infer_layout=True) + executor.run() + out = [record[0].config for record in executor.get_optimal_records()] + expected_out = [records[3][0].config, records[1][0].config, records[2][0].config] + assert expected_out == out, "Output mismatch: expecting %s but got %s" \ + % (str(expected_out), str(out)) + + if __name__=="__main__": test_graph_tuner_layout_transform() test_DPTuner_run() test_PBQPTuner_run() test_many_sub_graphs() test_tuple() + test_triangle_block() -- 2.7.4