inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include "scale_inst.h"
  18 #include "primitive_gpu_base.h"
  19 #include "implementation_map.h"
  20 #include "kernel_selector_helper.h"
  21 #include "eltwise/eltwise_kernel_selector.h"
  22 #include "eltwise/eltwise_kernel_base.h"
  23 #include "error_handler.h"
  24
  25 using namespace cldnn;
  26
  27 namespace cldnn { namespace gpu {
  28
  29
  30 struct scale_gpu : typed_primitive_gpu_impl<scale>
  31 {
  32     using parent = typed_primitive_gpu_impl<scale>;
  33     using parent::parent;
  34
  35 protected:
  36
  37     virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t) const override
  38     {
  39         kernel::kernel_arguments_data args;
  40         args.inputs = { &instance.input_memory(), &instance.scale_memory() };
  41         args.output = &instance.output_memory();
  42
  43         if (_outer.bias_term())
  44         {
  45             args.inputs.push_back(&instance.bias_memory());
  46         }
  47         return args;
  48     }
  49
  50 public:
  51
  52     static primitive_impl* create(const scale_node& arg)
  53     {
  54         auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
  55         auto ew_optional_params = get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
  56
  57         ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
  58
  59         ew_params.eltwiseParams.operations.push_back({
  60             { kernel_selector::eltwise_params::InputType::Buffer(0), kernel_selector::eltwise_params::InputType::Buffer(1) },
  61             kernel_selector::eltwise_mode::MUL });
  62
  63         if (arg.bias_term())
  64         {
  65             ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
  66             ew_params.eltwiseParams.operations.push_back({
  67                 { kernel_selector::eltwise_params::InputType::Intermediate(0), kernel_selector::eltwise_params::InputType::Buffer(2) },
  68                 kernel_selector::eltwise_mode::ADD });
  69         }
  70
  71         ew_params.eltwiseParams.layoutBased = true;
  72
  73         auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
  74         auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
  75
  76         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
  77
  78         auto scale = new scale_gpu(arg, best_kernels[0]);
  79
  80         return scale;
  81     }
  82 };
  83
  84 namespace {
  85     struct attach {
  86         attach() {
  87             auto val_fw = scale_gpu::create;
  88
  89             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
  90             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
  91             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
  92             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
  93             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
  94             implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
  95         }
  96         ~attach() {}
  97     };
  98     attach attach_impl;
  99 }
 100 } }