[IE CLDNN] Extend resample int8 packing optimization (#1662)
authorKonrad Dobros <konrad.dobros@intel.com>
Fri, 7 Aug 2020 13:08:40 +0000 (15:08 +0200)
committerGitHub <noreply@github.com>
Fri, 7 Aug 2020 13:08:40 +0000 (16:08 +0300)
This extends resample optimization for 8-bit types that uses feature
packed to mode to process multiple features in one work-item to features
not being multiple of packing factor.

For nearest resampling it is safe to copy extra feature padding for
blocked formats, so this change only removes this condition.

inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp
inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp

index 5f3a423..2ee687f 100644 (file)
@@ -72,7 +72,9 @@ static size_t packing_factor(const resample_params& params) {
     size_t input_factor = get_layout_packing_factor(params.inputs[0].GetLayout());
     size_t output_factor = get_layout_packing_factor(params.output.GetLayout());
 
-    return std::min(input_factor, output_factor);
+    if (input_factor % output_factor == 0 || output_factor % input_factor == 0)
+        return std::min(input_factor, output_factor);
+    return 1;
 }
 
 static bool use_packing(const resample_params& params) {
@@ -83,8 +85,7 @@ static bool use_packing(const resample_params& params) {
     if (pack == 1)
         return false;
 
-    if (params.inputs[0].Feature().v % pack != 0 || params.output.Feature().v % pack != 0 ||
-        params.inputs[0].Feature().pad.before % pack != 0 || params.output.Feature().pad.before % pack != 0)
+    if (params.inputs[0].Feature().pad.before % pack != 0 || params.output.Feature().pad.before % pack != 0)
         return false;
 
     auto packed_work_items = params.output.X().v * params.output.Y().v * params.output.Z().v
index dfd7091..d06f938 100644 (file)
@@ -775,4 +775,6 @@ INSTANTIATE_TEST_CASE_P(smoke,
 
                             .smoke_params(data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
                             .smoke_params(data_types::f16, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+                            .smoke_params(data_types::i8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+                            .smoke_params(data_types::u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
                         ), );