modules/gpu/src/cuda/bilateral_filter.cu

   1 /*M///////////////////////////////////////////////////////////////////////////////////////\r
   2 //\r
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
   4 //\r
   5 //  By downloading, copying, installing or using the software you agree to this license.\r
   6 //  If you do not agree to this license, do not download, install,\r
   7 //  copy or use the software.\r
   8 //\r
   9 //\r
  10 //                           License Agreement\r
  11 //                For Open Source Computer Vision Library\r
  12 //\r
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
  15 // Third party copyrights are property of their respective owners.\r
  16 //\r
  17 // Redistribution and use in source and binary forms, with or without modification,\r
  18 // are permitted provided that the following conditions are met:\r
  19 //\r
  20 //   * Redistribution's of source code must retain the above copyright notice,\r
  21 //     this list of conditions and the following disclaimer.\r
  22 //\r
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,\r
  24 //     this list of conditions and the following disclaimer in the documentation\r
  25 //     and/or other materials provided with the distribution.\r
  26 //\r
  27 //   * The name of the copyright holders may not be used to endorse or promote products\r
  28 //     derived from this software without specific prior written permission.\r
  29 //\r
  30 // This software is provided by the copyright holders and contributors "as is" and\r
  31 // any express or implied warranties, including, but not limited to, the implied\r
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.\r
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,\r
  34 // indirect, incidental, special, exemplary, or consequential damages\r
  35 // (including, but not limited to, procurement of substitute goods or services;\r
  36 // loss of use, data, or profits; or business interruption) however caused\r
  37 // and on any theory of liability, whether in contract, strict liability,\r
  38 // or tort (including negligence or otherwise) arising in any way out of\r
  39 // the use of this software, even if advised of the possibility of such damage.\r
  40 //\r
  41 //M*/\r
  42 \r
  43 #include "internal_shared.hpp"\r
  44 #include "opencv2/gpu/device/limits.hpp"\r
  45 \r
  46 namespace cv { namespace gpu { namespace device\r
  47 {\r
  48     namespace bilateral_filter\r
  49     {\r
  50         __constant__ float* ctable_color;\r
  51         __constant__ float* ctable_space;\r
  52         __constant__ size_t ctable_space_step;\r
  53 \r
  54         __constant__ int cndisp;\r
  55         __constant__ int cradius;\r
  56 \r
  57         __constant__ short cedge_disc;\r
  58         __constant__ short cmax_disc;\r
  59 \r
  60         void load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc)\r
  61         {\r
  62             cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );\r
  63             cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );\r
  64             size_t table_space_step = table_space.step / sizeof(float);\r
  65             cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );\r
  66 \r
  67             cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );\r
  68             cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );\r
  69 \r
  70             cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );\r
  71             cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );\r
  72         }\r
  73 \r
  74         template <int channels>\r
  75         struct DistRgbMax\r
  76         {\r
  77             static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)\r
  78             {\r
  79                 uchar x = ::abs(a[0] - b[0]);\r
  80                 uchar y = ::abs(a[1] - b[1]);\r
  81                 uchar z = ::abs(a[2] - b[2]);\r
  82                 return (::max(::max(x, y), z));\r
  83             }\r
  84         };\r
  85 \r
  86         template <>\r
  87         struct DistRgbMax<1>\r
  88         {\r
  89             static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)\r
  90             {\r
  91                 return ::abs(a[0] - b[0]);\r
  92             }\r
  93         };\r
  94 \r
  95         template <int channels, typename T>\r
  96         __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)\r
  97         {\r
  98             const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
  99             const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);\r
 100 \r
 101             T dp[5];\r
 102 \r
 103             if (y > 0 && y < h - 1 && x > 0 && x < w - 1)\r
 104             {\r
 105                 dp[0] = *(disp + (y  ) * disp_step + x + 0);\r
 106                 dp[1] = *(disp + (y-1) * disp_step + x + 0);\r
 107                 dp[2] = *(disp + (y  ) * disp_step + x - 1);\r
 108                 dp[3] = *(disp + (y+1) * disp_step + x + 0);\r
 109                 dp[4] = *(disp + (y  ) * disp_step + x + 1);\r
 110 \r
 111                 if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)\r
 112                 {\r
 113                     const int ymin = ::max(0, y - cradius);\r
 114                     const int xmin = ::max(0, x - cradius);\r
 115                     const int ymax = ::min(h - 1, y + cradius);\r
 116                     const int xmax = ::min(w - 1, x + cradius);\r
 117 \r
 118                     float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};\r
 119 \r
 120                     const uchar* ic = img + y * img_step + channels * x;\r
 121 \r
 122                     for(int yi = ymin; yi <= ymax; yi++)\r
 123                     {\r
 124                         const T* disp_y = disp + yi * disp_step;\r
 125 \r
 126                         for(int xi = xmin; xi <= xmax; xi++)\r
 127                         {\r
 128                             const uchar* in = img + yi * img_step + channels * xi;\r
 129 \r
 130                             uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);\r
 131 \r
 132                             const float weight = ctable_color[dist_rgb] * (ctable_space + ::abs(y-yi)* ctable_space_step)[::abs(x-xi)];\r
 133 \r
 134                             const T disp_reg = disp_y[xi];\r
 135 \r
 136                             cost[0] += ::min(cmax_disc, ::abs(disp_reg - dp[0])) * weight;\r
 137                             cost[1] += ::min(cmax_disc, ::abs(disp_reg - dp[1])) * weight;\r
 138                             cost[2] += ::min(cmax_disc, ::abs(disp_reg - dp[2])) * weight;\r
 139                             cost[3] += ::min(cmax_disc, ::abs(disp_reg - dp[3])) * weight;\r
 140                             cost[4] += ::min(cmax_disc, ::abs(disp_reg - dp[4])) * weight;\r
 141                         }\r
 142                     }\r
 143 \r
 144                     float minimum = numeric_limits<float>::max();\r
 145                     int id = 0;\r
 146 \r
 147                     if (cost[0] < minimum)\r
 148                     {\r
 149                         minimum = cost[0];\r
 150                         id = 0;\r
 151                     }\r
 152                     if (cost[1] < minimum)\r
 153                     {\r
 154                         minimum = cost[1];\r
 155                         id = 1;\r
 156                     }\r
 157                     if (cost[2] < minimum)\r
 158                     {\r
 159                         minimum = cost[2];\r
 160                         id = 2;\r
 161                     }\r
 162                     if (cost[3] < minimum)\r
 163                     {\r
 164                         minimum = cost[3];\r
 165                         id = 3;\r
 166                     }\r
 167                     if (cost[4] < minimum)\r
 168                     {\r
 169                         minimum = cost[4];\r
 170                         id = 4;\r
 171                     }\r
 172 \r
 173                     *(disp + y * disp_step + x) = dp[id];\r
 174                 }\r
 175             }\r
 176         }\r
 177 \r
 178         template <typename T>\r
 179         void bilateral_filter_caller(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)\r
 180         {\r
 181             dim3 threads(32, 8, 1);\r
 182             dim3 grid(1, 1, 1);\r
 183             grid.x = divUp(disp.cols, threads.x << 1);\r
 184             grid.y = divUp(disp.rows, threads.y);\r
 185 \r
 186             switch (channels)\r
 187             {\r
 188             case 1:\r
 189                 for (int i = 0; i < iters; ++i)\r
 190                 {\r
 191                     bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);\r
 192                     cudaSafeCall( cudaGetLastError() );\r
 193 \r
 194                     bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);\r
 195                     cudaSafeCall( cudaGetLastError() );\r
 196                 }\r
 197                 break;\r
 198             case 3:\r
 199                 for (int i = 0; i < iters; ++i)\r
 200                 {\r
 201                     bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);\r
 202                     cudaSafeCall( cudaGetLastError() );\r
 203 \r
 204                     bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);\r
 205                     cudaSafeCall( cudaGetLastError() );\r
 206                 }\r
 207                 break;\r
 208             default:\r
 209                 cv::gpu::error("Unsupported channels count", __FILE__, __LINE__, "bilateral_filter_caller");\r
 210             }\r
 211 \r
 212             if (stream == 0)\r
 213                 cudaSafeCall( cudaDeviceSynchronize() );\r
 214         }\r
 215 \r
 216         void bilateral_filter_gpu(PtrStepSzb disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)\r
 217         {\r
 218             bilateral_filter_caller(disp, img, channels, iters, stream);\r
 219         }\r
 220 \r
 221         void bilateral_filter_gpu(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)\r
 222         {\r
 223             bilateral_filter_caller(disp, img, channels, iters, stream);\r
 224         }\r
 225     } // namespace bilateral_filter\r
 226 }}} // namespace cv { namespace gpu { namespace device\r