modules/dnn/src/cuda4dnn/primitives/lrn.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP
   6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP
   7
   8 #include "../../op_cuda.hpp"
   9
  10 #include "../csl/cudnn.hpp"
  11 #include "../csl/tensor_ops.hpp"
  12
  13 #include <cstddef>
  14 #include <utility>
  15
  16 namespace cv { namespace dnn { namespace cuda4dnn {
  17
  18     enum class LRNType {
  19         ACROSS_CHANNELS,
  20         WITHIN_CHANNEL
  21     };
  22
  23     template <class T>
  24     class LRNOp final : public CUDABackendNode {
  25     public:
  26         using wrapper_type = GetCUDABackendWrapperType<T>;
  27
  28         LRNOp(csl::cudnn::Handle handle, LRNType type_, std::size_t local_size, T alpha, T beta, T bias, std::size_t largestInputSize)
  29             : scratch_mem_in_bytes { 0 }
  30         {
  31             typename csl::LRN<T>::LRNType type{};
  32             switch (type_) {
  33             case LRNType::ACROSS_CHANNELS: type = csl::LRN<T>::LRNType::ACROSS_CHANNELS; break;
  34             case LRNType::WITHIN_CHANNEL: type = csl::LRN<T>::LRNType::WITHIN_CHANNEL; break;
  35             }
  36             lrn = csl::LRN<T>(std::move(handle), local_size, alpha, beta, bias, type);
  37
  38             csl::WorkspaceBuilder builder;
  39             if (type_ == LRNType::WITHIN_CHANNEL) {
  40                 /* this is not a bug; we require two of these */
  41                 builder.require<T>(largestInputSize);
  42                 builder.require<T>(largestInputSize);
  43             }
  44
  45             scratch_mem_in_bytes = builder.required_workspace_size();
  46         }
  47
  48         void forward(
  49             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
  50             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
  51             csl::Workspace& workspace) override
  52         {
  53             for (int i = 0; i < inputs.size(); i++)
  54             {
  55                 auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
  56                 auto input = input_wrapper->getView();
  57
  58                 auto output_wrapper = outputs[i].dynamicCast<wrapper_type>();
  59                 auto output = output_wrapper->getSpan();
  60
  61                 csl::WorkspaceAllocator allocator(workspace);
  62                 lrn.normalize(input, output, allocator.get_instance());
  63             }
  64         }
  65
  66         std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
  67
  68     private:
  69         csl::LRN<T> lrn;
  70         std::size_t scratch_mem_in_bytes;
  71     };
  72
  73 }}} /* namespace cv::dnn::cuda4dnn */
  74
  75 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP */