Publishing 2019 R1 content

[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / convolution_gpu.cpp
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp

index dd5a004..54e63a7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -32,13 +32,17 @@ struct convolution_gpu : typed_primitive_gpu_impl<convolution>
  
  protected:
  
-    virtual bool validate(typed_primitive_inst<convolution>& instance) const override
+    virtual bool validate_impl(const typed_primitive_inst<convolution>& instance) const override
      {
-        bool res = parent::validate(instance);
+        bool res = true;
+
+        auto outer_id = _outer.id();
+        auto data_type = instance.node.input().get_output_layout().data_type;
  
          // Check whether all memory elements use the same unit type (FP16 or FP32).
-        CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.node.input().get_output_layout().data_type, "output memory", instance.node.get_output_layout().data_type, "");
-        CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.node.input().get_output_layout().data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
+        CLDNN_ERROR_DATA_TYPES_MISMATCH(outer_id, "Input memory", data_type, "output memory", instance.node.get_output_layout().data_type, "");
+        // Integer signed/unsigned is ok for convoluiton
+        CLDNN_ERROR_DATA_TYPES_MISMATCH_IGNORE_SIGN(outer_id, "Input memory", data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
  
          return res;
      }
@@ -59,6 +63,11 @@ protected:
          return _outer.get_split(); 
      }
  
+    virtual uint32_t get_groups() const override
+    {
+        return _outer.get_groups();
+    }
+
  public:
  
      static primitive_impl* create(const convolution_node &arg)
@@ -72,6 +81,7 @@ public:
          const auto& stride          = primitive->stride;
          const auto& dilation        = primitive->dilation;
          const auto& input_offset    = primitive->input_offset;
+        const auto& groups           = primitive->groups;
  
          const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
          const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split;
@@ -80,22 +90,24 @@ public:
  
          assert(arg.get_output_layout().size.feature[0] / primitive->split() == weights_layout.size.batch[0]);
  
-        auto conv_params = get_weights_bias_default_params<kernel_selector::convolution_params>(arg, actual_split);
+        auto conv_params = get_weights_bias_default_params<kernel_selector::convolution_params>(arg, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, groups);
          auto conv_optional_params = get_default_weights_bias_optional_params<kernel_selector::convolution_optional_params>(arg.get_program());
  
          const auto additional_offset = tensor::max(input_offset, 0);
          if (additional_offset != 0)
          {
-            conv_params.inputs[0] = convert_data_tensor(input_layout, actual_split, additional_offset);
+            conv_params.inputs[0] = convert_data_tensor(input_layout, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, additional_offset);
          }
  
          if(primitive->with_activation)
-            convert_activation_func_params(primitive, conv_params);
+            convert_activation_func_params(primitive, conv_params.activation);
  
-        conv_params.depthwiseSeparableOpt = depthwise_separable_opt;
+        conv_params.depthwise_separable_opt = depthwise_separable_opt;
          conv_params.transposed = transposed;
  
+        conv_params.local_convolution = weights_size.local[0] > 1 || weights_size.local[1] > 1;
          conv_params.split = split;
+        conv_params.groups = groups;
          conv_params.filterSize = {
              (uint32_t)weights_size.spatial[0],
              (uint32_t)weights_size.spatial[1],
@@ -141,8 +153,7 @@ public:
  
          kernel_selector::KernelsData best_kernels = kernel_selector.GetBestKernels(conv_params, conv_optional_params);
                 
-        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
-
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with these arguments");
          auto conv = new convolution_gpu(arg, best_kernels[0]);
  
          return conv;
@@ -165,7 +176,12 @@ namespace{
              implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), convolution_gpu::create);
              // MMAD
              implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), convolution_gpu::create);
+            implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byx8_f4), convolution_gpu::create);
+
              implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), convolution_gpu::create);
+            implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), convolution_gpu::create);
+            implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), convolution_gpu::create);
+            implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), convolution_gpu::create);
          }
          ~attach() {}
      };