From: Андрей Шедько/AI Tools Lab /SRR/Engineer/삼성전자 Date: Wed, 10 Apr 2019 03:54:57 +0000 (+0300) Subject: [NNFW] optimized Conv on CPU (#4964) X-Git-Tag: accepted/tizen/unified/20190430.113441~43 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=da96d8f8219abf6b5b2d59905c1cee1b0a0fbc81;p=platform%2Fcore%2Fml%2Fnnfw.git [NNFW] optimized Conv on CPU (#4964) Moved condition and offset calculation out of the innermost loop in cpu convolution kernel. Achieved 2+x speed up on mobilenet on neurun. Signed-off-by: Andrei Shedko --- diff --git a/libs/cker/include/cker/operation/Conv.h b/libs/cker/include/cker/operation/Conv.h index e494f0e..d232dbc 100644 --- a/libs/cker/include/cker/operation/Conv.h +++ b/libs/cker/include/cker/operation/Conv.h @@ -95,18 +95,18 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const float { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) + const int in_offset = Offset(input_shape, batch, in_y, in_x, 0); + const int filter_offset = Offset(filter_shape, out_channel, filter_y, filter_x, 0); + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - float input_value = - input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; - float filter_value = filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; + float input_value = input_data[in_offset + in_channel]; + float filter_value = filter_data[filter_offset + in_channel]; total += (input_value * filter_value); } } @@ -176,18 +176,18 @@ inline void Conv(const ConvParams ¶ms, const Shape &input_shape, const uint8 { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + const int in_base = Offset(input_shape, batch, in_y, in_x, 0); + const int filter_base = Offset(filter_shape, out_channel, filter_y, filter_x, 0); + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) + for (int in_channel = 0; in_channel < input_depth; in_channel++) { - int32_t input_val = - input_data[Offset(input_shape, batch, in_y, in_x, in_channel)]; - int32_t filter_val = filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; + int32_t input_val = input_data[in_channel + in_base]; + int32_t filter_val = filter_data[in_channel + filter_base]; acc += (filter_val + filter_offset) * (input_val + input_offset); } }