From da96d8f8219abf6b5b2d59905c1cee1b0a0fbc81 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=A8=D0=B5=D0=B4?= =?utf8?q?=D1=8C=D0=BA=D0=BE/AI=20Tools=20Lab=20/SRR/Engineer/=EC=82=BC?= =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Wed, 10 Apr 2019 06:54:57 +0300 Subject: [PATCH] [NNFW] optimized Conv on CPU (#4964) Moved condition and offset calculation out of the innermost loop in cpu convolution kernel. Achieved 2+x speed up on mobilenet on neurun. Signed-off-by: Andrei Shedko --- libs/cker/include/cker/operation/Conv.h | 40 ++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/libs/cker/include/cker/operation/Conv.h b/libs/cker/include/cker/operation/Conv.h index e494f0e..d232dbc 100644 --- a/libs/cker/include/cker/operation/Conv.h +++ b/libs/cker/include/cker/operation/Conv.h @@ -95,18 +95,18 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const float { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) + const int in_offset = Offset(input_shape, batch, in_y, in_x, 0); + const int filter_offset = Offset(filter_shape, out_channel, filter_y, filter_x, 0); + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - float input_value = - input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; - float filter_value = filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; + float input_value = input_data[in_offset + in_channel]; + float filter_value = filter_data[filter_offset + in_channel]; total += (input_value * filter_value); } } @@ -176,18 +176,18 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const uint8 { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + const int in_base = Offset(input_shape, batch, in_y, in_x, 0); + const int filter_base = Offset(filter_shape, out_channel, filter_y, filter_x, 0); + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) + for (int in_channel = 0; in_channel < input_depth; in_channel++) { - int32_t input_val = - input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; - int32_t filter_val = filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; + int32_t input_val = input_data[in_channel + in_base]; + int32_t filter_val = filter_data[in_channel + filter_base]; acc += (filter_val + filter_offset) * (input_val + input_offset); } } -- 2.7.4