Publishing 2019 R1 content

[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / deconvolution_gpu.cpp
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp

index 68ffdbe..7ec6291 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
@@ -32,9 +32,9 @@ struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution>
  protected:
  
      // TODO: share it with convolution and fully connected
-    virtual bool validate(typed_primitive_inst<deconvolution>& instance) const override
+    virtual bool validate_impl(const typed_primitive_inst<deconvolution>& instance) const override
      {
-        bool res = parent::validate(instance);
+        bool res = true;
  
          CLDNN_ERROR_NOT_EQUAL(_outer.id(), "deconvolution filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in deconvolution.");
          // Check whether all memory elements use the same unit type (FP16 or FP32).
@@ -64,6 +64,11 @@ protected:
          return _outer.get_split(); 
      }
  
+    virtual uint32_t get_groups() const override
+    {
+        return _outer.get_groups();
+    }
+
  public:
  
      static primitive_impl* create(const deconvolution_node& arg)
@@ -93,18 +98,21 @@ public:
          const tensor dilation = {0,0,1,1};
  #endif
          const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
+        const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split;
  
          const auto& input_offset = primitive->input_offset;
+        const auto& groups = primitive->groups;
  
-        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(arg, depthwise_separable_opt ? 1 : split);
+        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(arg, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, groups);
          auto deconv_optional_params = get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
  
          if(primitive->with_activation)
-            convert_activation_func_params(primitive, deconv_params);
+            convert_activation_func_params(primitive, deconv_params.activation);
  
-        deconv_params.depthwiseSeparableOpt = depthwise_separable_opt;
+        deconv_params.depthwise_separable_opt = depthwise_separable_opt;
  
          deconv_params.split = split;
+        deconv_params.groups = groups;
          deconv_params.filterSize = {
              (uint32_t)weights_size.spatial[0],
              (uint32_t)weights_size.spatial[1],
@@ -136,8 +144,7 @@ public:
          auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
          auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
  
-        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
-
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with these arguments");
          auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
  
          return deconv;