1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #include <cuda_runtime.h>
10 #include "grid_stride_range.hpp"
11 #include "execution.hpp"
13 #include "../cuda4dnn/csl/stream.hpp"
14 #include "../cuda4dnn/csl/tensor.hpp"
15 #include "../cuda4dnn/csl/span.hpp"
17 #include <cuda_runtime.h>
19 using namespace cv::dnn::cuda4dnn::csl;
20 using namespace cv::dnn::cuda4dnn::csl::device;
22 namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
26 __global__ void resize_nn(
27 Span<T> output, size_type out_height, size_type out_width,
28 View<T> input, size_type in_height, size_type in_width)
30 auto in_image_size = in_height * in_width;
31 auto out_image_size = out_height * out_width;
33 /* o2i = output to input */
34 auto o2i_fx = static_cast<float>(in_width) / out_width;
35 auto o2i_fy = static_cast<float>(in_height) / out_height;
37 /* think of the output and input as a collection of 2d images with the last axis
38 * representing the width and the last but one axis representing the height
40 * the remaining axis together form a collection of these images
42 for (auto idx : grid_stride_range(output.size())) {
43 const index_type n = idx / out_image_size;
44 const index_type x = (idx % out_image_size) % out_width;
45 const index_type y = (idx % out_image_size) / out_width;
47 auto in_x = static_cast<index_type>(x * o2i_fx);
48 auto in_y = static_cast<index_type>(y * o2i_fy);
50 index_type in_idx = n * in_image_size + in_y * in_width + in_x;
51 output[idx] = input[in_idx];
56 __global__ void resize_bilinear(
57 Span<T> output, size_type out_height, size_type out_width,
58 View<T> input, size_type in_height, size_type in_width,
59 float o2i_fy, float o2i_fx)
61 auto in_image_size = in_height * in_width;
62 auto out_image_size = out_height * out_width;
64 /* think of the output and input as a collection of 2d images with the last axis
65 * representing the width and the last but one axis representing the height
67 * the remaining axis together form a collection of these images
69 for (auto idx : grid_stride_range(output.size())) {
70 const index_type n = idx / out_image_size;
71 const index_type x = (idx % out_image_size) % out_width;
72 const index_type y = (idx % out_image_size) / out_width;
74 auto in_x = x * o2i_fx;
75 auto in_y = y * o2i_fy;
77 auto in_x0 = static_cast<index_type>(in_x);
78 auto in_y0 = static_cast<index_type>(in_y);
81 auto in_x1 = min<index_type>(in_x0 + 1, in_width - 1);
82 auto in_y1 = min<index_type>(in_y0 + 1, in_height - 1);
84 const index_type in_offset_r0 = n * in_image_size + in_y0 * in_width;
85 const index_type in_offset_r1 = n * in_image_size + in_y1 * in_width;
87 auto v_00 = input[in_offset_r0 + in_x0],
88 v_01 = input[in_offset_r0 + in_x1],
89 v_10 = input[in_offset_r1 + in_x0],
90 v_11 = input[in_offset_r1 + in_x1];
94 T(in_y - in_y0) * T(v_10 - v_00) +
95 T(in_x - in_x0) * T(v_01 - v_00) +
96 T(in_y - in_y0) * T(in_x - in_x0) * T(v_11 - v_01 - v_10 + v_00);
102 void resize_nn(const Stream& stream, TensorSpan<T> output, TensorView<T> input) {
103 auto in_height = input.get_axis_size(-2);
104 auto in_width = input.get_axis_size(-1);
106 auto out_height = output.get_axis_size(-2);
107 auto out_width = output.get_axis_size(-1);
109 auto kernel = raw::resize_nn<T>;
110 auto policy = make_policy(kernel, output.size(), 0, stream);
111 launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width);
114 template void resize_nn<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>);
115 template void resize_nn<float>(const Stream&, TensorSpan<float>, TensorView<float>);
118 void resize_bilinear(const Stream& stream, TensorSpan<T> output, TensorView<T> input, float scale_y, float scale_x) {
119 auto in_height = input.get_axis_size(-2);
120 auto in_width = input.get_axis_size(-1);
122 auto out_height = output.get_axis_size(-2);
123 auto out_width = output.get_axis_size(-1);
125 auto kernel = raw::resize_bilinear<T>;
126 auto policy = make_policy(kernel, output.size(), 0, stream);
127 launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x);
130 template void resize_bilinear<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, float, float);
131 template void resize_bilinear<float>(const Stream&, TensorSpan<float>, TensorView<float>, float, float);
133 }}}} /* namespace cv::dnn::cuda4dnn::kernels */