[NNFW] optimized Conv on CPU (#4964)
authorАндрей Шедько/AI Tools Lab /SRR/Engineer/삼성전자 <a.shedko@samsung.com>
Wed, 10 Apr 2019 03:54:57 +0000 (06:54 +0300)
committer박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Wed, 10 Apr 2019 03:54:57 +0000 (12:54 +0900)
Moved condition and offset calculation out of the innermost loop in cpu
convolution kernel. Achieved 2+x speed up on mobilenet on neurun.

Signed-off-by: Andrei Shedko <a.shedko@samsung.com>
libs/cker/include/cker/operation/Conv.h

index e494f0e..d232dbc 100644 (file)
@@ -95,18 +95,18 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const float
           {
             for (int filter_x = 0; filter_x < filter_width; ++filter_x)
             {
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+              const int in_y = in_y_origin + dilation_height_factor * filter_y;
+              // If the location is outside the bounds of the input image,
+              // use zero as a default value.
+              if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
               {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y = in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+                const int in_offset = Offset(input_shape, batch, in_y, in_x, 0);
+                const int filter_offset = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
+                for (int in_channel = 0; in_channel < input_depth; ++in_channel)
                 {
-                  float input_value =
-                      input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
-                  float filter_value = filter_data[Offset(filter_shape, out_channel, filter_y,
-                                                          filter_x, in_channel)];
+                  float input_value = input_data[in_offset + in_channel];
+                  float filter_value = filter_data[filter_offset + in_channel];
                   total += (input_value * filter_value);
                 }
               }
@@ -176,18 +176,18 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
           {
             for (int filter_x = 0; filter_x < filter_width; ++filter_x)
             {
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+              const int in_y = in_y_origin + dilation_height_factor * filter_y;
+              // If the location is outside the bounds of the input image,
+              // use zero as a default value.
+              const int in_base = Offset(input_shape, batch, in_y, in_x, 0);
+              const int filter_base = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
+              if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
               {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y = in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+                for (int in_channel = 0; in_channel < input_depth; in_channel++)
                 {
-                  int32_t input_val =
-                      input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val = filter_data[Offset(filter_shape, out_channel, filter_y,
-                                                          filter_x, in_channel)];
+                  int32_t input_val = input_data[in_channel + in_base];
+                  int32_t filter_val = filter_data[in_channel + filter_base];
                   acc += (filter_val + filter_offset) * (input_val + input_offset);
                 }
               }