1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2017, Intel Corporation, all rights reserved.
14 // Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #ifndef _OPENCV_LIBDNN_HPP_
44 #define _OPENCV_LIBDNN_HPP_
45 #include "../../precomp.hpp"
53 namespace cv { namespace dnn { namespace ocl4dnn {
56 struct OCL4DNNConvConfig
73 bool bias_term; // = false;
77 template<typename Dtype>
78 class OCL4DNNConvSpatial
81 explicit OCL4DNNConvSpatial(OCL4DNNConvConfig config);
82 ~OCL4DNNConvSpatial();
83 bool Forward(const UMat& bottom_data, const UMat& weight,
85 UMat& top_data, int32_t batch_size);
90 std::string kernelName;
92 size_t local_work_size[3];
93 size_t global_work_size[3];
94 int32_t workItem_output[3];
104 kernelConfig(const std::string& name, const size_t* global_size, const size_t* local_size,
105 const int32_t* workItem,
111 for (int32_t x = 0; x < 3; x++)
113 local_work_size[x] = local_size ? local_size[x] : 1;
114 global_work_size[x] = global_size[x];
115 workItem_output[x] = workItem[x];
117 swizzle_weights = swizzle;
118 use_null_local = local_size == NULL;
132 tunerParam(int type, int w, int h, int d)
141 inline void addDef(const char* name)
143 options_ << " -D " << name;
146 inline void addDef(const char* name, const int value)
148 options_ << " -D " << name << "=" << value;
151 inline void addDef(const char* name, const float value)
153 options_ << " -D " << name << "=(float)" << value;
156 inline void addDef(const char* name, const double value)
158 options_ << " -D " << name << "=(double)" << value;
161 inline void addDef(const char* name, const char* value)
163 options_ << " -D " << name << "=" << value;
166 void useFirstAvailable(const UMat &bottom,
173 void collectCommonInformation();
174 void setupKernelDetails(int32_t kernelType,
179 ocl::Program compileKernel();
180 typedef std::map<std::string, ocl::Program> phash_t;
182 void calculateBenchmark(const UMat &bottom, UMat &verifyTop,
183 const UMat &weight, const UMat &bias,
187 void setupConvolution(const UMat &bottom,
193 bool createConvolutionKernel(int32_t kernelType,
197 bool setupIDLF(int32_t blockWidth,
200 bool createBasicKernel(int32_t blockWidth,
203 bool createGEMMLikeConvKernel(int32_t blockWidth,
206 void CreateSubBuffer(const UMat& buffer, UMat& sub_buffer,
207 int32_t offset, int32_t size, bool write_only);
208 bool convolve(const UMat &bottom, UMat &top,
209 const UMat &weight, const UMat &bias,
211 kernelConfig* config,
212 const cv::ocl::Queue& queue);
213 float timedConvolve(const UMat &bottom, UMat &top,
214 const UMat &weight, const UMat &bias,
215 int32_t numImages, kernelConfig* config);
217 bool verifyResult(const UMat &bottom,
222 kernelConfig* config,
225 bool swizzleWeight(const UMat &weight,
226 int32_t swizzled_factor,
227 bool interleave = false);
230 std::string generateSpecificKey(int32_t type, int32_t blockWidth,
233 void cacheTunedConfig();
234 bool loadTunedConfig();
236 void saveTunedConfig();
237 bool loadCachedConfig();
239 void unloadProgram(const std::string& kernelName);
240 void prepareKernel(const UMat &bottom, UMat &top,
241 const UMat &weight, const UMat &bias,
243 bool setupKernelByConfig(int x, int y, int z, int type,
244 int lx, int ly, int lz,
245 bool swizzle, bool nullLocal);
246 void generateTunerItems(std::vector< cv::Ptr<tunerParam> > &tunerItems);
250 UMat swizzled_weights_umat;
252 int32_t bottom_index_;
266 /// M_ is the channel dimension of the output for a single group, which is the
267 /// leading dimension of the filter matrix.
271 std::string key_, key_sanitized_;
272 std::string short_key_;
273 std::string kernel_name_;
274 std::string cache_path_;
275 bool use_cache_path_; // true if cache_path_ directory exists
276 bool force_auto_tuning_;
277 int32_t kernel_index_;
278 std::vector< cv::Ptr<kernelConfig> > kernelQueue;
279 cv::Ptr<kernelConfig> bestKernelConfig;
291 std::stringstream options_;
292 cv::ocl::ProgramSource src_;
293 int32_t prev_kernel_type_;
297 LIBDNN_POOLING_METHOD_MAX = 0,
298 LIBDNN_POOLING_METHOD_AVE = 1,
299 LIBDNN_POOLING_METHOD_STO = 2
300 } ocl4dnnPoolingMethod_t;
302 struct OCL4DNNPoolConfig
304 OCL4DNNPoolConfig() :
310 pool_method(LIBDNN_POOLING_METHOD_MAX),
311 global_pooling(false)
321 ocl4dnnPoolingMethod_t pool_method; // = LIBDNN_POOLING_METHOD_MAX;
322 bool global_pooling; // = false;
325 template<typename Dtype>
329 explicit OCL4DNNPool(OCL4DNNPoolConfig config);
331 bool Forward(const UMat& bottom_data,
337 // Pooling parameters
338 std::vector<int32_t> pad_;
339 std::vector<int32_t> stride_;
340 std::vector<int32_t> kernel_shape_;
341 std::vector<int32_t> im_in_shape_;
342 std::vector<int32_t> im_out_shape_;
344 ocl4dnnPoolingMethod_t pool_method_;
356 int32_t pooled_height_;
357 int32_t pooled_width_;
360 struct OCL4DNNInnerProductConfig
362 OCL4DNNInnerProductConfig() :
363 num_output(0), M(0), K(0),
364 bias_term(false), transpose(false), phase_test(true)
370 bool transpose; // = false;
371 bool phase_test; // = true;
374 template<typename Dtype>
375 class OCL4DNNInnerProduct
378 explicit OCL4DNNInnerProduct(OCL4DNNInnerProductConfig config);
379 ~OCL4DNNInnerProduct();
380 bool Forward(const UMat& bottom_data,
385 OCL4DNNInnerProductConfig config_;
398 LRNParameter_NormRegion_ACROSS_CHANNELS = 0,
399 LRNParameter_NormRegion_WITHIN_CHANNEL = 1
400 } LRNParameter_NormRegion_WITHIN_CHANNEL_t;
402 struct OCL4DNNLRNConfig
405 lrn_type(LRNParameter_NormRegion_ACROSS_CHANNELS),
407 local_size(0), alpha(0.f), beta(0.f), k(0.f), norm_by_size(false),
408 batch_size(0), channels(0), height(0), width(0)
411 LRNParameter_NormRegion_WITHIN_CHANNEL_t lrn_type;
412 bool phase_test; // = true;
424 template<typename Dtype>
428 explicit OCL4DNNLRN(OCL4DNNLRNConfig config);
429 bool Forward(const UMat& bottom_data, UMat& top_data);
432 bool crossChannelForward(const UMat& bottom_data, UMat& top_data);
433 LRNParameter_NormRegion_WITHIN_CHANNEL_t lrn_type_;
446 struct OCL4DNNSoftmaxConfig
448 OCL4DNNSoftmaxConfig() : axis(0), channels(0), logsoftmax(false)
456 template<typename Dtype>
460 explicit OCL4DNNSoftmax(OCL4DNNSoftmaxConfig config);
462 bool Forward(const UMat& bottom_data, UMat& top_data);
465 int32_t softmax_axis_;
474 #endif // HAVE_OPENCL
475 } // namespace ocl4dnn