From b6ee52bff90acb868412b764a93fbdeb483859b9 Mon Sep 17 00:00:00 2001
From: Giuseppe Rossini <giuseppe.rossini@arm.com>
Date: Fri, 10 Jul 2020 18:58:22 +0100
Subject: [PATCH] [Bug fix] Fix in arm_cpu/conv2d_alter_op for NHWC quantized
 (#6027)

* Bug fix] Fix in arm_cpu/conv2d_alter_op for NHWC quantized

Few minor typos to be fixed in topi/arm_cpu/conv2d_alter_op.py for the
NHWC quantized route:
- Kernel shape was misread (CO, IC, KH, KW) -> (KH, KW, IC, OC)
- Pad along the K dimension was misspelled: pad_k -> pad_K
- Workload name was wrong: "conv2d_NHWC_int8_without_tranform.arm_cpu"
  -> "conv2d_NHWC_quantized_without_transform.arm_cpu"

This submission fixes those errors and add a further test for conv2d_alter_op.py

Change-Id: I0622df05f1d4d15311946f6e75f1840a34815a5b

* Move -target to -mtriple

Change-Id: Ieff80c774e8ab0fa7f48d83d50a79f3a62e8fe13

* Retrigger tests

Change-Id: I5541bed54eacc5063bf4a4fda725209cc23f621e
---
 python/tvm/relay/op/strategy/arm_cpu.py         |  2 +-
 tests/python/relay/test_pass_alter_op_layout.py | 65 +++++++++++++++++++++++++
 topi/python/topi/arm_cpu/conv2d_alter_op.py     | 11 +++--
 3 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py
index d682aad..e639e22 100644
--- a/python/tvm/relay/op/strategy/arm_cpu.py
+++ b/python/tvm/relay/op/strategy/arm_cpu.py
@@ -284,7 +284,7 @@ def conv2d_gemm_without_weight_transform_strategy_arm_cpu(attrs, inputs, out_typ
             name="conv2d_NHWC_quantized_without_transform.arm_cpu")
     else:
         raise RuntimeError(
-            "Unsupported conv2d_gemm_without_weight_transform layout {0} with datatype {1}".
+            "Unsupported conv2d_NHWC_quantized_without_transform layout {0} with datatype {1}".
             format(layout, data.dtype))
     return strategy
 
diff --git a/tests/python/relay/test_pass_alter_op_layout.py b/tests/python/relay/test_pass_alter_op_layout.py
index bbe10c7..77105f0 100644
--- a/tests/python/relay/test_pass_alter_op_layout.py
+++ b/tests/python/relay/test_pass_alter_op_layout.py
@@ -1053,6 +1053,70 @@ def test_alter_layout_nhwc_arm():
 
     assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
 
+def test_alter_layout_nhwc_int8_aarch64():
+    """ Check that AlterOplayout does not alter NHWC data layout. """
+    from tvm import autotvm
+    expected_workload_shape = (20, 42, 4, 16)
+
+    # We use Int8Fallback  to disable the fallback flag
+    # and to test the new workload produced during the pass
+    class Int8Fallback(autotvm.FallbackContext):
+        def _query_inside(self, target, workload):
+            key = (target, workload)
+            if key in self.memory:
+                return self.memory[key]
+            cfg = autotvm.task.space.FallbackConfigEntity()
+            cfg.is_fallback = False
+            cfg.cost = 0
+            self.memory[key] = cfg
+            return cfg
+        def update(self, target, workload, cfg):
+            key = (str(target), workload)
+            assert workload[2][1] == expected_workload_shape
+            assert workload[0] == "conv2d_NHWC_quantized_without_transform.arm_cpu"
+            self.memory[key] = cfg
+
+    def alter_conv2d(attrs, inputs, tinfos, out_type):
+        import topi
+        with tvm.target.create("llvm -device=arm_cpu -mtriple=aarch64-linux-gnu"):
+            with Int8Fallback():
+                tmp =  topi.nn.conv2d_alter_layout(attrs, inputs, tinfos, out_type)
+                return tmp
+
+    # Check NHWC conversion.
+    def before_nhwc_int8():
+        x = relay.var("x", shape=(1, 56, 56, 73), dtype='int8')
+        weight = relay.var('weight1', shape=(3, 3, 73, 79), dtype='int8')
+        y = relay.nn.conv2d(x, weight,
+                            channels=79,
+                            kernel_size=(3, 3),
+                            data_layout='NHWC',
+                            kernel_layout='HWIO',
+                            out_dtype='int32')
+        y = relay.Function(analysis.free_vars(y), y)
+        return y
+
+    def expected_nhwc_int8():
+        x = relay.var("x", shape=(1, 56, 56, 73), dtype='int8')
+        weight = relay.var('weight1', shape=(3, 3, 73, 79), dtype='int8')
+        tile_rows = 4
+        tile_cols = 16
+        weight_transformed = relay.nn.contrib_conv2d_gemm_weight_transform(weight, tile_rows, tile_cols)
+        y = relay.nn.contrib_conv2d_gemm_without_weight_transform(x, weight_transformed,
+                            channels=79,
+                            kernel_size=(3, 3),
+                            data_layout='NHWC',
+                            kernel_layout='HWIO',
+                            out_dtype='int32')
+        y = relay.Function(analysis.free_vars(y), y)
+        return y
+    with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", alter_conv2d):
+        a = before_nhwc_int8()
+        a = run_opt_pass(a, transform.AlterOpLayout())
+        b = run_opt_pass(expected_nhwc_int8(), transform.InferType())
+
+    assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
+
 def test_alter_op_with_global_var():
     """Test directly replacing an operator with a new one"""
     def before():
@@ -1114,4 +1178,5 @@ if __name__ == "__main__":
     test_alter_layout_pool()
     test_alter_layout_sum()
     test_alter_layout_nhwc_arm()
+    test_alter_layout_nhwc_int8_aarch64()
     test_alter_op_with_global_var()
diff --git a/topi/python/topi/arm_cpu/conv2d_alter_op.py b/topi/python/topi/arm_cpu/conv2d_alter_op.py
index 99fdf21..f37ae57 100644
--- a/topi/python/topi/arm_cpu/conv2d_alter_op.py
+++ b/topi/python/topi/arm_cpu/conv2d_alter_op.py
@@ -245,9 +245,9 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
         assert (data.dtype == 'int8' and kernel.dtype == 'int8' or
                 data.dtype == 'uint8' and kernel.dtype == 'uint8')
         assert data_layout == "NHWC" and kernel_layout == "HWIO"
-        CO, IC, KH, KW = get_const_tuple(kernel.shape)
+        KH, KW, IC, OC = get_const_tuple(kernel.shape)
         K = KH * KW * IC
-        N = CO
+        N = OC
 
         tile_rows = 4
         tile_cols = 16
@@ -257,7 +257,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
         if N % tile_rows != 0:
             pad_N = tile_rows - (N % tile_rows)
         if K % tile_cols != 0:
-            pad_k = tile_cols - (K % tile_cols)
+            pad_K = tile_cols - (K % tile_cols)
 
         N_padded = N + pad_N
         K_padded = K + pad_K
@@ -267,10 +267,11 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
                                      tile_rows,
                                      tile_cols), kernel.dtype)
 
+        new_workload_name = "conv2d_NHWC_quantized_without_transform.arm_cpu"
         new_workload = autotvm.task.args_to_workload([data, new_kernel,
                                                       strides, padding, dilation,
-                                                      out_dtype, (KH, KW), CO],
-                                                     "conv2d_NHWC_int8_without_tranform.arm_cpu")
+                                                      out_dtype, (KH, KW), OC],
+                                                     new_workload_name)
         dispatch_ctx.update(target, new_workload, cfg)
 
         return relay.nn.contrib_conv2d_gemm_without_weight_transform(inputs[0],
-- 
2.7.4