Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / lrn.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/cudnn.hpp"
11 #include "../csl/tensor_ops.hpp"
12
13 #include <cstddef>
14 #include <utility>
15
16 namespace cv { namespace dnn { namespace cuda4dnn {
17
18     enum class LRNType {
19         ACROSS_CHANNELS,
20         WITHIN_CHANNEL
21     };
22
23     template <class T>
24     class LRNOp final : public CUDABackendNode {
25     public:
26         using wrapper_type = GetCUDABackendWrapperType<T>;
27
28         LRNOp(csl::cudnn::Handle handle, LRNType type_, std::size_t local_size, T alpha, T beta, T bias, std::size_t largestInputSize)
29             : scratch_mem_in_bytes { 0 }
30         {
31             typename csl::LRN<T>::LRNType type{};
32             switch (type_) {
33             case LRNType::ACROSS_CHANNELS: type = csl::LRN<T>::LRNType::ACROSS_CHANNELS; break;
34             case LRNType::WITHIN_CHANNEL: type = csl::LRN<T>::LRNType::WITHIN_CHANNEL; break;
35             }
36             lrn = csl::LRN<T>(std::move(handle), local_size, alpha, beta, bias, type);
37
38             csl::WorkspaceBuilder builder;
39             if (type_ == LRNType::WITHIN_CHANNEL) {
40                 /* this is not a bug; we require two of these */
41                 builder.require<T>(largestInputSize);
42                 builder.require<T>(largestInputSize);
43             }
44
45             scratch_mem_in_bytes = builder.required_workspace_size();
46         }
47
48         void forward(
49             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
50             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
51             csl::Workspace& workspace) override
52         {
53             for (int i = 0; i < inputs.size(); i++)
54             {
55                 auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
56                 auto input = input_wrapper->getView();
57
58                 auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
59                 auto output = output_wrapper->getSpan();
60
61                 csl::WorkspaceAllocator allocator(workspace);
62                 lrn.normalize(input, output, allocator.get_instance());
63             }
64         }
65
66         std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
67
68     private:
69         csl::LRN<T> lrn;
70         std::size_t scratch_mem_in_bytes;
71     };
72
73 }}} /* namespace cv::dnn::cuda4dnn */
74
75 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP */