[IE CLDNN] Fix for some unwanted behaviour in layout optimizer (#2669)
authorMikołaj Życzyński <mikolaj.zyczynski@intel.com>
Fri, 30 Oct 2020 12:18:14 +0000 (13:18 +0100)
committerGitHub <noreply@github.com>
Fri, 30 Oct 2020 12:18:14 +0000 (15:18 +0300)
inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp

index 789b548..46efaa5 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018-2019 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -264,8 +264,26 @@ void minimize_local_reorders(program_impl& p, std::map<program_node*, format::ty
         if (!node->is_in_data_flow())
             continue;
 
-        if (lo.get_preferred_format(*node) != format::any)
-            continue;
+        auto preferred_format = lo.get_preferred_format(*node);
+
+        if (preferred_format != format::any) {
+            if (preferred_format == format::b_fs_yx_fsv4 &&
+                (node->get_output_layout().data_type == data_types::i8 || node->get_output_layout().data_type == data_types::u8)) {
+                std::set<format::type> io_formats;
+                for (auto user : node->get_users()) {
+                    io_formats.insert(fmt_map.at(user));
+                }
+                for (auto dep : node->get_dependencies()) {
+                    if (!dep->is_in_data_flow())
+                        continue;
+                    io_formats.insert(fmt_map.at(dep));
+                }
+                if (!(io_formats.size() == 1 && io_formats.count(preferred_format) == 0))
+                    continue;
+            } else {
+                continue;
+            }
+        }
 
         if (fmt_map.at(node) == format::any) {
             auto out_fmt = node->get_output_layout().format;
index 9fc8cea..56fe153 100644 (file)
@@ -209,6 +209,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
         return true;
 
     if (next.is_type<convolution>() &&
+        !(prev.is_type<quantize>() && (prev_dt == data_types::i8 || prev_dt == data_types::u8)) &&
         (fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bfyx)  && prev_output_layout.size.feature[0] == 3 &&
         (fmt_next == format::b_fs_yx_fsv4 ||
          fmt_next == format::bs_fs_yx_bsv16_fsv16))
@@ -360,7 +361,7 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(layout const &input_layout,
         }
 
         // Check for non-grouped or depthwise convolution
-        if (input_layout.size.spatial[2] == 1 &&
+        if (input_layout.format.dimension() == 4 &&
             ((ks_x == 7 && ks_y == 7) || (ks_x == 3 && ks_y == 3) || (ks_x == 1 && ks_y == 1) || (ks_x == 5 && ks_y == 5)) &&
             weights_layout.size.batch[0] >= 16 &&
             ((conv->groups == 1 && conv->split() == 1) ||
@@ -370,7 +371,7 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(layout const &input_layout,
              (input_layout.size.feature[0] <= 4)))  // only bfyx -> fsv16 kernel supports asymmetric quantization in fsv16 format
             return true;
         // Check for grouped convolution
-        else if (input_layout.size.spatial[2] == 1 && input_layout.size.batch[0] < 16 &&
+        else if (input_layout.format.dimension() == 4 && input_layout.size.batch[0] < 16 &&
                  out_features_per_group >= 16 &&
                  // Need to extend imad fsv4 kernel to handle e.g. 3 input features per group
                  (in_features_per_group % 4 == 0) &&