1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP
8 #include "../../op_cuda.hpp"
10 #include "../csl/cudnn.hpp"
11 #include "../csl/tensor_ops.hpp"
16 namespace cv { namespace dnn { namespace cuda4dnn {
24 class LRNOp final : public CUDABackendNode {
26 using wrapper_type = GetCUDABackendWrapperType<T>;
28 LRNOp(csl::cudnn::Handle handle, LRNType type_, std::size_t local_size, T alpha, T beta, T bias, std::size_t largestInputSize)
29 : scratch_mem_in_bytes { 0 }
31 typename csl::LRN<T>::LRNType type{};
33 case LRNType::ACROSS_CHANNELS: type = csl::LRN<T>::LRNType::ACROSS_CHANNELS; break;
34 case LRNType::WITHIN_CHANNEL: type = csl::LRN<T>::LRNType::WITHIN_CHANNEL; break;
36 lrn = csl::LRN<T>(std::move(handle), local_size, alpha, beta, bias, type);
38 csl::WorkspaceBuilder builder;
39 if (type_ == LRNType::WITHIN_CHANNEL) {
40 /* this is not a bug; we require two of these */
41 builder.require<T>(largestInputSize);
42 builder.require<T>(largestInputSize);
45 scratch_mem_in_bytes = builder.required_workspace_size();
49 const std::vector<cv::Ptr<BackendWrapper>>& inputs,
50 const std::vector<cv::Ptr<BackendWrapper>>& outputs,
51 csl::Workspace& workspace) override
53 for (int i = 0; i < inputs.size(); i++)
55 auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
56 auto input = input_wrapper->getView();
58 auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
59 auto output = output_wrapper->getSpan();
61 csl::WorkspaceAllocator allocator(workspace);
62 lrn.normalize(input, output, allocator.get_instance());
66 std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
70 std::size_t scratch_mem_in_bytes;
73 }}} /* namespace cv::dnn::cuda4dnn */
75 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP */