ed20ed24e709e7271f9950b3c24bba0c3d840392
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / fully_connected_grad_input_gpu.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "fully_connected_grad_input_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "implementation_map.h"
20 #include "error_handler.h"
21 #include "kernel_selector_helper.h"
22 #include "fully_connected_grad_input/fully_connected_grad_input_kernel_selector.h"
23 #include "fully_connected_grad_input/fully_connected_grad_input_kernel_base.h"
24 #include "api/CPP/fully_connected_grad_input.hpp"
25
26 namespace cldnn {
27 namespace gpu {
28
29 struct fully_connected_grad_input_gpu : typed_primitive_gpu_impl<fully_connected_grad_input> {
30     using parent = typed_primitive_gpu_impl<fully_connected_grad_input>;
31     using parent::parent;
32
33 protected:
34     kernel::kernel_arguments_data get_arguments(typed_primitive_inst<fully_connected_grad_input>& instance,
35                                                         int32_t) const override {
36         kernel::kernel_arguments_data args = parent::get_arguments(instance, 1);
37         args.weights = (memory_impl::cptr) &instance.weights_memory();
38
39         return args;
40     }
41
42 public:
43     static primitive_impl* create(const fully_connected_grad_input_node& arg) {
44         auto fully_connected_grad_input_params =
45             get_default_params<kernel_selector::fully_connected_grad_input_params>(arg);
46         auto fully_connected_grad_input_optional_params =
47             get_default_optional_params<kernel_selector::fully_connected_grad_input_optional_params>(arg.get_program());
48
49         const auto& weights_layout = arg.weights().get_output_layout();
50         fully_connected_grad_input_params.weights = convert_weights_tensor(weights_layout);
51         fully_connected_grad_input_params.gradient = true;
52         fully_connected_grad_input_params.inputs.push_back(
53             convert_data_tensor(arg.get_dependency(1).get_output_layout()));
54
55         auto& kernel_selector = kernel_selector::fully_connected_grad_input_kernel_selector::Instance();
56         auto best_kernels = kernel_selector.GetBestKernels(fully_connected_grad_input_params,
57                                                            fully_connected_grad_input_optional_params);
58         CLDNN_ERROR_BOOL(arg.id(),
59                          "Best_kernel.empty()",
60                          best_kernels.empty(),
61                          "Cannot find a proper kernel with this arguments");
62
63         auto fully_connected_grad_input = new fully_connected_grad_input_gpu(arg, best_kernels[0]);
64
65         return fully_connected_grad_input;
66     }
67 };
68
69 namespace {
70 struct attach {
71     attach() {
72         auto val_fw = fully_connected_grad_input_gpu::create;
73
74         implementation_map<fully_connected_grad_input>::add({
75             {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
76             {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
77             {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
78             {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
79             {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
80             {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
81         });
82     }
83     ~attach() {}
84 };
85 attach attach_impl;
86 }  // namespace
87 }  // namespace gpu
88 }  // namespace cldnn