/*
-// Copyright (c) 2018-2019 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
if (!node->is_in_data_flow())
continue;
- if (lo.get_preferred_format(*node) != format::any)
- continue;
+ auto preferred_format = lo.get_preferred_format(*node);
+
+ if (preferred_format != format::any) {
+ if (preferred_format == format::b_fs_yx_fsv4 &&
+ (node->get_output_layout().data_type == data_types::i8 || node->get_output_layout().data_type == data_types::u8)) {
+ std::set<format::type> io_formats;
+ for (auto user : node->get_users()) {
+ io_formats.insert(fmt_map.at(user));
+ }
+ for (auto dep : node->get_dependencies()) {
+ if (!dep->is_in_data_flow())
+ continue;
+ io_formats.insert(fmt_map.at(dep));
+ }
+ if (!(io_formats.size() == 1 && io_formats.count(preferred_format) == 0))
+ continue;
+ } else {
+ continue;
+ }
+ }
if (fmt_map.at(node) == format::any) {
auto out_fmt = node->get_output_layout().format;
return true;
if (next.is_type<convolution>() &&
+ !(prev.is_type<quantize>() && (prev_dt == data_types::i8 || prev_dt == data_types::u8)) &&
(fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bfyx) && prev_output_layout.size.feature[0] == 3 &&
(fmt_next == format::b_fs_yx_fsv4 ||
fmt_next == format::bs_fs_yx_bsv16_fsv16))
}
// Check for non-grouped or depthwise convolution
- if (input_layout.size.spatial[2] == 1 &&
+ if (input_layout.format.dimension() == 4 &&
((ks_x == 7 && ks_y == 7) || (ks_x == 3 && ks_y == 3) || (ks_x == 1 && ks_y == 1) || (ks_x == 5 && ks_y == 5)) &&
weights_layout.size.batch[0] >= 16 &&
((conv->groups == 1 && conv->split() == 1) ||
(input_layout.size.feature[0] <= 4))) // only bfyx -> fsv16 kernel supports asymmetric quantization in fsv16 format
return true;
// Check for grouped convolution
- else if (input_layout.size.spatial[2] == 1 && input_layout.size.batch[0] < 16 &&
+ else if (input_layout.format.dimension() == 4 && input_layout.size.batch[0] < 16 &&
out_features_per_group >= 16 &&
// Need to extend imad fsv4 kernel to handle e.g. 3 input features per group
(in_features_per_group % 4 == 0) &&