From bfa9e94a43cbde3672284be12a47aa5efda8644b Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EA=B9=80=EC=9A=A9=EC=84=AD/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84?=
 =?utf8?q?=EC=9E=90?= <yons.kim@samsung.com>
Date: Thu, 5 Jul 2018 13:06:03 +0900
Subject: [PATCH] Introduce OperationCount and PerfPredictor (#1859)

* Introduce OperationCount and PerfPredictor

- For #1836
- OperationCount class has count units and these will be calculated for
total cycle by PerfPredictor
- PerfPredictor class will calculate total cycles with OperationCount.
By setting each cycle's value, performance prediction will be
calculated.

Signed-off-by: Yongseop Kim <yons.kim@samsung.com>

* Add more description for how to calc in detail
---
 tools/tflitefile_tool/operator_counter.py | 181 ++++++++++++++++++++++++++++++
 tools/tflitefile_tool/perf_predictor.py   |  15 +++
 2 files changed, 196 insertions(+)
 create mode 100755 tools/tflitefile_tool/operator_counter.py
 create mode 100755 tools/tflitefile_tool/perf_predictor.py

diff --git a/tools/tflitefile_tool/operator_counter.py b/tools/tflitefile_tool/operator_counter.py
new file mode 100755
index 0000000..1126720
--- /dev/null
+++ b/tools/tflitefile_tool/operator_counter.py
@@ -0,0 +1,181 @@
+#!/usr/bin/python
+
+import tflite.Conv2DOptions
+import tflite.Pool2DOptions
+import tflite.BuiltinOptions
+import tflite.Tensor
+from tensor_wrapping import Tensor
+import math
+
+
+class OperationCount(object):
+    def __init__(self, add_count=0, mul_count=0, nonlinear_count=0):
+        self.add_count = add_count
+        self.mul_count = mul_count
+        self.nonlinear_count = nonlinear_count
+
+    def Increase(self, op_count):
+        self.IncreaseAddCount(op_count.GetAddCount())
+        self.IncreaseMulCount(op_count.GetMulCount())
+        self.IncreaseNonlinearCount(op_count.GetNonlinearCount())
+
+    def IncreaseAddCount(self, add_count):
+        self.add_count = self.add_count + add_count
+
+    def IncreaseMulCount(self, mul_count):
+        self.mul_count = self.mul_count + mul_count
+
+    def IncreaseNonlinearCount(self, nonlinear_count):
+        self.nonlinear_count = self.nonlinear_count + nonlinear_count
+
+    def GetAddCount(self):
+        return self.add_count
+
+    def GetMulCount(self):
+        return self.mul_count
+
+    def GetNonlinearCount(self):
+        return self.nonlinear_count
+
+    def TotalCount(self):
+        return self.add_count + self.mul_count + self.nonlinear_count
+
+
+# NOTE: How to count operations of convolution(and also pooling)?
+#
+# If we know operations of output's one element, we can calculate total output's operations.
+# For example, consider output Shape[3,3]
+# [ e11 e12 e13 ]
+# [ e21 e22 e23 ]
+# [ e31 e32 e33 ]
+# If we know operations for calculation of e11, we can know total operations of output(e11, e12, ... e33)
+# by operations of e11 * 9(total number of elements)
+#
+# So we only need to know how to calculate operations of e11. For this, just think how to conv operation to the output's element
+# If input_channel is 1, we can only think of kernel_size(kernel_w and kernel_h).
+# For example, consider input Shape[3,3] and kernel Shape[2,2]
+# [ i11 i12 i13 ]   [ k11 k12 ]   [ o11 o12 o13 ]
+# [ i21 i22 i23 ] * [ k21 k22 ] = [ o21 o22 o23 ]
+# [ i31 i32 i33 ]                 [ o31 o32 o33 ]
+#
+# Conv operation: for o11, i11 * k11 + i21 * k21 + i12 * k12 + i22 * k22 = o11
+# On above conv operation, mul operations are done at 4 times(== kernel_w * kernel_h)
+# and add operations are dont at 3 times(== kernel_w * kernel_h - 1)
+# and also, bias will be done and it will be counted on add operations.
+#
+# Anyway, we can calculate total operations on this way. This can apply to the way of pooling.
+def CountOpsConv2D(tf_operator, inputs, outputs):
+    assert (tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
+            .Conv2DOptions)
+    # NOTE: Assume that conv2d operator always take 3 tensors as inputs
+    #       and both width and height are the same.
+    # operator_inputs[]: [input_tensor, weight_tensor, bias_tensor]
+    # operator_outputs[]: [output_tensor]
+    # tflite's tensor shape: [N,H,W,C]
+    input_tensor = inputs[0].tf_tensor
+    weight_tensor = inputs[1].tf_tensor
+    output_tensor = outputs[0].tf_tensor
+
+    # kernel_ops = (kernel_w * kernel_h * input_channel * 2(multiply and add))
+    kernel_ops = (weight_tensor.Shape(2) * weight_tensor.Shape(1) * input_tensor.Shape(3))
+
+    # total ops
+    #     = batch_size * output_channel * output_width * output_height * kernel_ops
+    total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) * output_tensor.Shape(2)
+                 * output_tensor.Shape(1))
+
+    return OperationCount(
+        (total_ops * (kernel_ops + 1)),  # bias
+        (total_ops * (kernel_ops)))
+
+
+# NOTE: Reference the comment 'NOTE' of CountOpsConv2D
+def CountOpsPooling(tf_operator, inputs, outputs):
+    assert (tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
+            .Pool2DOptions)
+    input_tensor = inputs[0].tf_tensor
+    output_tensor = outputs[0].tf_tensor
+
+    pool2d_options = tflite.Pool2DOptions.Pool2DOptions()
+    pool2d_options.Init(tf_operator.BuiltinOptions().Bytes,
+                        tf_operator.BuiltinOptions().Pos)
+
+    # kernel_ops = kernel_w * kernel_h
+    kernel_ops = (pool2d_options.FilterWidth() * pool2d_options.FilterHeight())
+
+    # total ops
+    #     = batch_size * output_channel * output_width * output_height *
+    #       kernel_ops(kernel_w * kernel_h)
+    total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) * output_tensor.Shape(2)
+                 * output_tensor.Shape(1))
+
+    return OperationCount((total_ops * kernel_ops - 1), (total_ops * kernel_ops))
+
+
+def CountOpsSoftmax(tf_operator, inputs, outputs):
+    assert (tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
+            .SoftmaxOptions)
+
+    input_tensor = inputs[0].tf_tensor
+
+    batch_size = input_tensor.Shape(0)
+    input_dim = input_tensor.Shape(1)
+
+    # Softmax(x_i) = exp(x_i) / sum of exp(x)
+    add_count = input_dim - 1  # sum of exp(x)
+    mul_count = input_dim  # /
+    nonlinear_count = input_dim + input_dim  # sum of exp(x) and exp(x_i)
+
+    return OperationCount(add_count, mul_count, nonlinear_count)
+
+
+def CountOpsFullyConnected(tf_operator, inputs, outputs):
+    assert (tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
+            .FullyConnectedOptions)
+
+    # NOTE: Assume that fully_connected operator always take 3 tensors as inputs
+    #       and its X tensor's shape is [1, 1, 1, input_dim] with
+    #       its output Y [1, output_dim]
+    input_tensor = inputs[0].tf_tensor
+    output_tensor = outputs[0].tf_tensor
+
+    # ops_per_element
+    #     = input_dim(multiplication) + input_dim-1(addition) + 1(bias)
+    # total_ops
+    #     = ops_per_elem * output_dim
+    add_count = mul_count = input_tensor.Shape(3) * output_tensor.Shape(1)
+
+    return OperationCount(add_count, mul_count)
+
+
+def CountOpsNothing(tf_operator, inputs, outputs):
+    return OperationCount()
+
+
+def CountOpsDummy(tf_operator, inputs, outputs):
+    return OperationCount()
+
+
+ops_counters = {
+    # Inceptionv3
+    "CONV_2D": CountOpsConv2D,
+    "AVERAGE_POOL_2D": CountOpsPooling,
+    "MAX_POOL_2D": CountOpsPooling,
+    "SOFTMAX": CountOpsSoftmax,
+    "FULLY_CONNECTED": CountOpsFullyConnected,
+    "CONCATENATION": CountOpsNothing,
+
+    # ADAS
+    "TOPK_V2": CountOpsDummy,
+    "SUB": CountOpsDummy,
+    "STRIDED_SLICE": CountOpsDummy,
+    "RESHAPE": CountOpsDummy,
+    "GATHER": CountOpsDummy,
+    "RESIZE_BILINEAR": CountOpsDummy,
+    "CAST": CountOpsDummy,
+    "ADD": CountOpsDummy,
+    "MUL": CountOpsDummy,
+    "DIV": CountOpsDummy,
+    "CUSTOM(TensorFlowMax)": CountOpsDummy,
+    "CUSTOM": CountOpsDummy,
+}
diff --git a/tools/tflitefile_tool/perf_predictor.py b/tools/tflitefile_tool/perf_predictor.py
new file mode 100755
index 0000000..49df993
--- /dev/null
+++ b/tools/tflitefile_tool/perf_predictor.py
@@ -0,0 +1,15 @@
+#!/usr/bin/python
+
+from operator_counter import OperationCount
+
+
+class PerfPredictor(object):
+    def __init__(self, add_cycle=1, mul_cycle=1, nonlinear_cycle=1):
+        self.add_cycle = add_cycle
+        self.mul_cycle = mul_cycle
+        self.nonlinear_cycle = nonlinear_cycle
+
+    def PredictCycles(self, op_count):
+        return (op_count.GetAddCount() * self.add_cycle +
+                op_count.GetMulCount() * self.mul_cycle +
+                op_count.GetNonlinearCount() * self.nonlinear_cycle)
-- 
2.7.4