1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2017, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
42 #include "../../precomp.hpp"
45 #include "ocl4dnn.hpp"
46 #include "opencl_kernels_dnn.hpp"
49 namespace cv { namespace dnn { namespace ocl4dnn {
50 template<typename Dtype>
51 OCL4DNNSoftmax<Dtype>::OCL4DNNSoftmax(OCL4DNNSoftmaxConfig config)
53 softmax_axis_ = config.axis;
54 channels_ = config.channels;
55 log_softmax_ = config.logsoftmax;
61 for (int32_t i = softmax_axis_ + 1; i < config.in_shape.size(); i++)
62 inner_num_ *= config.in_shape[i];
63 use_slm_ = (config.in_shape[softmax_axis_] * inner_num_ + inner_num_ * 17) <= 8192;
64 for (int32_t i = 0; i < softmax_axis_; i++)
65 outer_num_ *= config.in_shape[i];
66 count_ = inner_num_ + outer_num_;
68 std::vector<int32_t> scale_dims = config.in_shape;
69 scale_dims[softmax_axis_] = use_slm_ ? 1 : 17;
70 for (int32_t i = 0; i < scale_dims.size(); i++)
71 scale_sz *= scale_dims[i];
73 scale_data_.create(1, scale_sz, CV_32FC1);
76 template<typename Dtype>
77 OCL4DNNSoftmax<Dtype>::~OCL4DNNSoftmax()
79 scale_data_.release();
82 template<typename Dtype>
83 bool OCL4DNNSoftmax<Dtype>::Forward(const UMat& bottom, UMat& top)
86 ocl::Queue queue = ocl::Queue::getDefault();
87 bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport();
88 if (intel_subgroup && inner_num_ < 128)
90 String opts = clOptionSupport("-cl-no-subgroup-ifp") ? " -cl-no-subgroup-ifp " : "";
92 ocl::Kernel oclk_softmax_forward_kernel;
94 if (log_softmax_) opts += " -DLOG_SOFTMAX ";
96 kname = CL_KERNEL_SELECT("softmax_forward_slm");
98 kname = CL_KERNEL_SELECT("softmax_forward");
100 if (!oclk_softmax_forward_kernel.create(kname.c_str(), ocl::dnn::softmax_loss_oclsrc, opts))
103 size_t global_size[] = { 256, (size_t)outer_num_, 1 };
104 size_t local_size[] = { 256, 1, 1 };
109 oclk_softmax_forward_kernel.set(argIdx++, outer_num_);
110 oclk_softmax_forward_kernel.set(argIdx++, channels_);
111 oclk_softmax_forward_kernel.set(argIdx++, inner_num_);
112 oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(scale_data_));
113 oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
114 oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
115 oclk_softmax_forward_kernel.set(argIdx++, NULL, channels_ * inner_num_* sizeof(Dtype));
116 oclk_softmax_forward_kernel.set(argIdx++, NULL, inner_num_* sizeof(Dtype));
117 oclk_softmax_forward_kernel.set(argIdx++, NULL, 16 * inner_num_* sizeof(Dtype));
121 oclk_softmax_forward_kernel.set(argIdx++, outer_num_);
122 oclk_softmax_forward_kernel.set(argIdx++, channels_);
123 oclk_softmax_forward_kernel.set(argIdx++, inner_num_);
124 oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(scale_data_));
125 oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom));
126 oclk_softmax_forward_kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top));
128 ret = oclk_softmax_forward_kernel.run(3, global_size, local_size, false);
133 template class OCL4DNNSoftmax<float>;
134 } // namespace ocl4dnn
137 #endif // HAVE_OPENCL