From bfa9e94a43cbde3672284be12a47aa5efda8644b Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EA=B9=80=EC=9A=A9=EC=84=AD/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84?= =?utf8?q?=EC=9E=90?= Date: Thu, 5 Jul 2018 13:06:03 +0900 Subject: [PATCH] Introduce OperationCount and PerfPredictor (#1859) * Introduce OperationCount and PerfPredictor - For #1836 - OperationCount class has count units and these will be calculated for total cycle by PerfPredictor - PerfPredictor class will calculate total cycles with OperationCount. By setting each cycle's value, performance prediction will be calculated. Signed-off-by: Yongseop Kim * Add more description for how to calc in detail --- tools/tflitefile_tool/operator_counter.py | 181 ++++++++++++++++++++++++++++++ tools/tflitefile_tool/perf_predictor.py | 15 +++ 2 files changed, 196 insertions(+) create mode 100755 tools/tflitefile_tool/operator_counter.py create mode 100755 tools/tflitefile_tool/perf_predictor.py diff --git a/tools/tflitefile_tool/operator_counter.py b/tools/tflitefile_tool/operator_counter.py new file mode 100755 index 0000000..1126720 --- /dev/null +++ b/tools/tflitefile_tool/operator_counter.py @@ -0,0 +1,181 @@ +#!/usr/bin/python + +import tflite.Conv2DOptions +import tflite.Pool2DOptions +import tflite.BuiltinOptions +import tflite.Tensor +from tensor_wrapping import Tensor +import math + + +class OperationCount(object): + def __init__(self, add_count=0, mul_count=0, nonlinear_count=0): + self.add_count = add_count + self.mul_count = mul_count + self.nonlinear_count = nonlinear_count + + def Increase(self, op_count): + self.IncreaseAddCount(op_count.GetAddCount()) + self.IncreaseMulCount(op_count.GetMulCount()) + self.IncreaseNonlinearCount(op_count.GetNonlinearCount()) + + def IncreaseAddCount(self, add_count): + self.add_count = self.add_count + add_count + + def IncreaseMulCount(self, mul_count): + self.mul_count = self.mul_count + mul_count + + def IncreaseNonlinearCount(self, nonlinear_count): + self.nonlinear_count = self.nonlinear_count + nonlinear_count + + def GetAddCount(self): + return self.add_count + + def GetMulCount(self): + return self.mul_count + + def GetNonlinearCount(self): + return self.nonlinear_count + + def TotalCount(self): + return self.add_count + self.mul_count + self.nonlinear_count + + +# NOTE: How to count operations of convolution(and also pooling)? +# +# If we know operations of output's one element, we can calculate total output's operations. +# For example, consider output Shape[3,3] +# [ e11 e12 e13 ] +# [ e21 e22 e23 ] +# [ e31 e32 e33 ] +# If we know operations for calculation of e11, we can know total operations of output(e11, e12, ... e33) +# by operations of e11 * 9(total number of elements) +# +# So we only need to know how to calculate operations of e11. For this, just think how to conv operation to the output's element +# If input_channel is 1, we can only think of kernel_size(kernel_w and kernel_h). +# For example, consider input Shape[3,3] and kernel Shape[2,2] +# [ i11 i12 i13 ] [ k11 k12 ] [ o11 o12 o13 ] +# [ i21 i22 i23 ] * [ k21 k22 ] = [ o21 o22 o23 ] +# [ i31 i32 i33 ] [ o31 o32 o33 ] +# +# Conv operation: for o11, i11 * k11 + i21 * k21 + i12 * k12 + i22 * k22 = o11 +# On above conv operation, mul operations are done at 4 times(== kernel_w * kernel_h) +# and add operations are dont at 3 times(== kernel_w * kernel_h - 1) +# and also, bias will be done and it will be counted on add operations. +# +# Anyway, we can calculate total operations on this way. This can apply to the way of pooling. +def CountOpsConv2D(tf_operator, inputs, outputs): + assert (tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions() + .Conv2DOptions) + # NOTE: Assume that conv2d operator always take 3 tensors as inputs + # and both width and height are the same. + # operator_inputs[]: [input_tensor, weight_tensor, bias_tensor] + # operator_outputs[]: [output_tensor] + # tflite's tensor shape: [N,H,W,C] + input_tensor = inputs[0].tf_tensor + weight_tensor = inputs[1].tf_tensor + output_tensor = outputs[0].tf_tensor + + # kernel_ops = (kernel_w * kernel_h * input_channel * 2(multiply and add)) + kernel_ops = (weight_tensor.Shape(2) * weight_tensor.Shape(1) * input_tensor.Shape(3)) + + # total ops + # = batch_size * output_channel * output_width * output_height * kernel_ops + total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) * output_tensor.Shape(2) + * output_tensor.Shape(1)) + + return OperationCount( + (total_ops * (kernel_ops + 1)), # bias + (total_ops * (kernel_ops))) + + +# NOTE: Reference the comment 'NOTE' of CountOpsConv2D +def CountOpsPooling(tf_operator, inputs, outputs): + assert (tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions() + .Pool2DOptions) + input_tensor = inputs[0].tf_tensor + output_tensor = outputs[0].tf_tensor + + pool2d_options = tflite.Pool2DOptions.Pool2DOptions() + pool2d_options.Init(tf_operator.BuiltinOptions().Bytes, + tf_operator.BuiltinOptions().Pos) + + # kernel_ops = kernel_w * kernel_h + kernel_ops = (pool2d_options.FilterWidth() * pool2d_options.FilterHeight()) + + # total ops + # = batch_size * output_channel * output_width * output_height * + # kernel_ops(kernel_w * kernel_h) + total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) * output_tensor.Shape(2) + * output_tensor.Shape(1)) + + return OperationCount((total_ops * kernel_ops - 1), (total_ops * kernel_ops)) + + +def CountOpsSoftmax(tf_operator, inputs, outputs): + assert (tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions() + .SoftmaxOptions) + + input_tensor = inputs[0].tf_tensor + + batch_size = input_tensor.Shape(0) + input_dim = input_tensor.Shape(1) + + # Softmax(x_i) = exp(x_i) / sum of exp(x) + add_count = input_dim - 1 # sum of exp(x) + mul_count = input_dim # / + nonlinear_count = input_dim + input_dim # sum of exp(x) and exp(x_i) + + return OperationCount(add_count, mul_count, nonlinear_count) + + +def CountOpsFullyConnected(tf_operator, inputs, outputs): + assert (tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions() + .FullyConnectedOptions) + + # NOTE: Assume that fully_connected operator always take 3 tensors as inputs + # and its X tensor's shape is [1, 1, 1, input_dim] with + # its output Y [1, output_dim] + input_tensor = inputs[0].tf_tensor + output_tensor = outputs[0].tf_tensor + + # ops_per_element + # = input_dim(multiplication) + input_dim-1(addition) + 1(bias) + # total_ops + # = ops_per_elem * output_dim + add_count = mul_count = input_tensor.Shape(3) * output_tensor.Shape(1) + + return OperationCount(add_count, mul_count) + + +def CountOpsNothing(tf_operator, inputs, outputs): + return OperationCount() + + +def CountOpsDummy(tf_operator, inputs, outputs): + return OperationCount() + + +ops_counters = { + # Inceptionv3 + "CONV_2D": CountOpsConv2D, + "AVERAGE_POOL_2D": CountOpsPooling, + "MAX_POOL_2D": CountOpsPooling, + "SOFTMAX": CountOpsSoftmax, + "FULLY_CONNECTED": CountOpsFullyConnected, + "CONCATENATION": CountOpsNothing, + + # ADAS + "TOPK_V2": CountOpsDummy, + "SUB": CountOpsDummy, + "STRIDED_SLICE": CountOpsDummy, + "RESHAPE": CountOpsDummy, + "GATHER": CountOpsDummy, + "RESIZE_BILINEAR": CountOpsDummy, + "CAST": CountOpsDummy, + "ADD": CountOpsDummy, + "MUL": CountOpsDummy, + "DIV": CountOpsDummy, + "CUSTOM(TensorFlowMax)": CountOpsDummy, + "CUSTOM": CountOpsDummy, +} diff --git a/tools/tflitefile_tool/perf_predictor.py b/tools/tflitefile_tool/perf_predictor.py new file mode 100755 index 0000000..49df993 --- /dev/null +++ b/tools/tflitefile_tool/perf_predictor.py @@ -0,0 +1,15 @@ +#!/usr/bin/python + +from operator_counter import OperationCount + + +class PerfPredictor(object): + def __init__(self, add_cycle=1, mul_cycle=1, nonlinear_cycle=1): + self.add_cycle = add_cycle + self.mul_cycle = mul_cycle + self.nonlinear_cycle = nonlinear_cycle + + def PredictCycles(self, op_count): + return (op_count.GetAddCount() * self.add_cycle + + op_count.GetMulCount() * self.mul_cycle + + op_count.GetNonlinearCount() * self.nonlinear_cycle) -- 2.7.4