inference-engine/thirdparty/clDNN/src/gpu/fully_connected_grad_input_gpu.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "fully_connected_grad_input_inst.h"
  18 #include "primitive_gpu_base.h"
  19 #include "implementation_map.h"
  20 #include "error_handler.h"
  21 #include "kernel_selector_helper.h"
  22 #include "fully_connected_grad_input/fully_connected_grad_input_kernel_selector.h"
  23 #include "fully_connected_grad_input/fully_connected_grad_input_kernel_base.h"
  24 #include "api/CPP/fully_connected_grad_input.hpp"
  25
  26 namespace cldnn {
  27 namespace gpu {
  28
  29 struct fully_connected_grad_input_gpu : typed_primitive_gpu_impl<fully_connected_grad_input> {
  30     using parent = typed_primitive_gpu_impl<fully_connected_grad_input>;
  31     using parent::parent;
  32
  33 protected:
  34     kernel::kernel_arguments_data get_arguments(typed_primitive_inst<fully_connected_grad_input>& instance,
  35                                                         int32_t) const override {
  36         kernel::kernel_arguments_data args = parent::get_arguments(instance, 1);
  37         args.weights = (memory_impl::cptr) &instance.weights_memory();
  38
  39         return args;
  40     }
  41
  42 public:
  43     static primitive_impl* create(const fully_connected_grad_input_node& arg) {
  44         auto fully_connected_grad_input_params =
  45             get_default_params<kernel_selector::fully_connected_grad_input_params>(arg);
  46         auto fully_connected_grad_input_optional_params =
  47             get_default_optional_params<kernel_selector::fully_connected_grad_input_optional_params>(arg.get_program());
  48
  49         const auto& weights_layout = arg.weights().get_output_layout();
  50         fully_connected_grad_input_params.weights = convert_weights_tensor(weights_layout);
  51         fully_connected_grad_input_params.gradient = true;
  52         fully_connected_grad_input_params.inputs.push_back(
  53             convert_data_tensor(arg.get_dependency(1).get_output_layout()));
  54
  55         auto& kernel_selector = kernel_selector::fully_connected_grad_input_kernel_selector::Instance();
  56         auto best_kernels = kernel_selector.GetBestKernels(fully_connected_grad_input_params,
  57                                                            fully_connected_grad_input_optional_params);
  58         CLDNN_ERROR_BOOL(arg.id(),
  59                          "Best_kernel.empty()",
  60                          best_kernels.empty(),
  61                          "Cannot find a proper kernel with this arguments");
  62
  63         auto fully_connected_grad_input = new fully_connected_grad_input_gpu(arg, best_kernels[0]);
  64
  65         return fully_connected_grad_input;
  66     }
  67 };
  68
  69 namespace {
  70 struct attach {
  71     attach() {
  72         auto val_fw = fully_connected_grad_input_gpu::create;
  73
  74         implementation_map<fully_connected_grad_input>::add({
  75             {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
  76             {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
  77             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
  78             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
  79             {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
  80             {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
  81         });
  82     }
  83     ~attach() {}
  84 };
  85 attach attach_impl;
  86 }  // namespace
  87 }  // namespace gpu
  88 }  // namespace cldnn