modules/ocl/src/opencl/stereobp.cl

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // @Authors
  18 //    Jia Haipeng, jiahaipeng95@gmail.com
  19 //    Peng Xiao,   pengxiao@outlook.com
  20 //
  21 // Redistribution and use in source and binary forms, with or without modification,
  22 // are permitted provided that the following conditions are met:
  23 //
  24 //   * Redistribution's of source code must retain the above copyright notice,
  25 //     this list of conditions and the following disclaimer.
  26 //
  27 //   * Redistribution's in binary form must reproduce the above copyright notice,
  28 //     this list of conditions and the following disclaimer in the documentation
  29 //     and/or other GpuMaterials provided with the distribution.
  30 //
  31 //   * The name of the copyright holders may not be used to endorse or promote products
  32 //     derived from this software without specific prior written permission.
  33 //
  34 // This software is provided by the copyright holders and contributors as is and
  35 // any express or implied warranties, including, but not limited to, the implied
  36 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  37 // In no event shall the Intel Corporation or contributors be liable for any direct,
  38 // indirect, incidental, special, exemplary, or consequential damages
  39 // (including, but not limited to, procurement of substitute goods or services;
  40 // loss of use, data, or profits; or business interruption) however caused
  41 // and on any theory of liability, whether in contract, strict liability,
  42 // or tort (including negligence or otherwise) arising in any way out of
  43 // the use of this software, even if advised of the possibility of such damage.
  44 //
  45 //M*/
  46
  47 #if defined (DOUBLE_SUPPORT)
  48
  49 #ifdef cl_khr_fp64
  50 #pragma OPENCL EXTENSION cl_khr_fp64:enable
  51 #elif defined (cl_amd_fp64)
  52 #pragma OPENCL EXTENSION cl_amd_fp64:enable
  53 #endif
  54
  55 #endif
  56
  57 #ifdef T_FLOAT
  58 #define T float
  59 #else
  60 #define T short
  61 #endif
  62
  63 ///////////////////////////////////////////////////////////////
  64 /////////////////common///////////////////////////////////////
  65 /////////////////////////////////////////////////////////////
  66 T saturate_cast(float v){
  67 #ifdef T_SHORT
  68     return convert_short_sat_rte(v);
  69 #else
  70     return v;
  71 #endif
  72 }
  73
  74 #define FLOAT_MAX 3.402823466e+38f
  75 typedef struct
  76 {
  77     int   cndisp;
  78     float cmax_data_term;
  79     float cdata_weight;
  80     float cmax_disc_term;
  81     float cdisc_single_jump;
  82 }con_srtuct_t;
  83 ///////////////////////////////////////////////////////////////
  84 ////////////////////////// comp data //////////////////////////
  85 ///////////////////////////////////////////////////////////////
  86
  87 float pix_diff_1(__global const uchar *ls, __global const uchar *rs)
  88 {
  89     return abs((int)(*ls) - *rs);
  90 }
  91
  92 float pix_diff_3(__global const uchar *ls, __global const uchar *rs)
  93 {
  94     const float tr = 0.299f;
  95     const float tg = 0.587f;
  96     const float tb = 0.114f;
  97
  98     float val;
  99
 100     val =  tb * abs((int)ls[0] - rs[0]);
 101     val += tg * abs((int)ls[1] - rs[1]);
 102     val += tr * abs((int)ls[2] - rs[2]);
 103
 104     return val;
 105 }
 106 float pix_diff_4(__global const uchar *ls, __global const uchar *rs)
 107 {
 108     uchar4 l, r;
 109     l = *((__global uchar4 *)ls);
 110     r = *((__global uchar4 *)rs);
 111
 112     const float tr = 0.299f;
 113     const float tg = 0.587f;
 114     const float tb = 0.114f;
 115
 116     float val;
 117
 118     val  = tb * abs((int)l.x - r.x);
 119     val += tg * abs((int)l.y - r.y);
 120     val += tr * abs((int)l.z - r.z);
 121
 122     return val;
 123 }
 124
 125
 126 #ifndef CN
 127 #define CN 4
 128 #endif
 129
 130 #define CAT(X,Y) X##Y
 131 #define CAT2(X,Y) CAT(X,Y)
 132
 133 #define PIX_DIFF CAT2(pix_diff_, CN)
 134
 135 __kernel void comp_data(__global uchar *left,  int left_rows,  int left_cols,  int left_step,
 136                         __global uchar *right, int right_step,
 137                         __global T *data, int data_step,
 138                         __constant con_srtuct_t *con_st)
 139 {
 140     int x = get_global_id(0);
 141     int y = get_global_id(1);
 142
 143     if (y > 0 && y < (left_rows - 1) && x > 0 && x < (left_cols - 1))
 144     {
 145         data_step /= sizeof(T);
 146         const __global uchar* ls = left  + y * left_step  + x * CN;
 147         const __global uchar* rs = right + y * right_step + x * CN;
 148
 149         __global T *ds = data + y * data_step + x;
 150
 151         const unsigned int disp_step = data_step * left_rows;
 152
 153         for (int disp = 0; disp < con_st -> cndisp; disp++)
 154         {
 155             if (x - disp >= 1)
 156             {
 157                 float val = 0;
 158                 val = PIX_DIFF(ls, rs - disp * CN);
 159                 ds[disp * disp_step] =  saturate_cast(fmin(con_st -> cdata_weight * val,
 160                     con_st -> cdata_weight * con_st -> cmax_data_term));
 161             }
 162             else
 163             {
 164                 ds[disp * disp_step] =  saturate_cast(con_st -> cdata_weight * con_st -> cmax_data_term);
 165             }
 166         }
 167     }
 168 }
 169
 170 ///////////////////////////////////////////////////////////////
 171 //////////////////////// data step down ///////////////////////
 172 ///////////////////////////////////////////////////////////////
 173 __kernel void data_step_down(__global T *src, int src_rows,
 174                              __global T *dst, int dst_rows, int dst_cols,
 175                              int src_step, int dst_step,
 176                              int cndisp)
 177 {
 178     const int x = get_global_id(0);
 179     const int y = get_global_id(1);
 180
 181     if (x < dst_cols && y < dst_rows)
 182     {
 183         src_step /= sizeof(T);
 184         dst_step /= sizeof(T);
 185         for (int d = 0; d < cndisp; ++d)
 186         {
 187             float dst_reg;
 188             dst_reg  = src[(d * src_rows + min(2*y+0, src_rows-1)) * src_step + 2*x+0];
 189             dst_reg += src[(d * src_rows + min(2*y+1, src_rows-1)) * src_step + 2*x+0];
 190             dst_reg += src[(d * src_rows + min(2*y+0, src_rows-1)) * src_step + 2*x+1];
 191             dst_reg += src[(d * src_rows + min(2*y+1, src_rows-1)) * src_step + 2*x+1];
 192
 193             dst[(d * dst_rows + y) * dst_step + x] = saturate_cast(dst_reg);
 194         }
 195     }
 196 }
 197
 198 ///////////////////////////////////////////////////////////////
 199 /////////////////// level up messages  ////////////////////////
 200 ///////////////////////////////////////////////////////////////
 201 __kernel void level_up_message(__global T *src, int src_rows, int src_step,
 202                                __global T *dst, int dst_rows, int dst_cols, int dst_step,
 203                                int cndisp)
 204 {
 205     const int x = get_global_id(0);
 206     const int y = get_global_id(1);
 207
 208     if (x < dst_cols && y < dst_rows)
 209     {
 210         src_step /= sizeof(T);
 211         dst_step /= sizeof(T);
 212
 213         const int dst_disp_step = dst_step * dst_rows;
 214         const int src_disp_step = src_step * src_rows;
 215
 216         __global T       *dstr = dst + y * dst_step + x;
 217         __global const T *srcr = src + (y / 2 * src_step) + (x / 2);
 218
 219         for (int d = 0; d < cndisp; ++d)
 220             dstr[d * dst_disp_step] = srcr[d * src_disp_step];
 221     }
 222 }
 223
 224 ///////////////////////////////////////////////////////////////
 225 ////////////////////  calc all iterations /////////////////////
 226 ///////////////////////////////////////////////////////////////
 227 void calc_min_linear_penalty(__global T * dst, int disp_step,
 228                              int cndisp, float cdisc_single_jump)
 229 {
 230     float prev = dst[0];
 231     float cur;
 232
 233     for (int disp = 1; disp < cndisp; ++disp)
 234     {
 235         prev += cdisc_single_jump;
 236         cur = dst[disp_step * disp];
 237
 238         if (prev < cur)
 239         {
 240             cur = prev;
 241             dst[disp_step * disp] = saturate_cast(prev);
 242         }
 243
 244         prev = cur;
 245     }
 246
 247     prev = dst[(cndisp - 1) * disp_step];
 248     for (int disp = cndisp - 2; disp >= 0; disp--)
 249     {
 250         prev += cdisc_single_jump;
 251         cur = dst[disp_step * disp];
 252
 253         if (prev < cur)
 254         {
 255             cur = prev;
 256             dst[disp_step * disp] = saturate_cast(prev);
 257         }
 258         prev = cur;
 259     }
 260 }
 261 void message(const __global T *msg1, const __global T *msg2,
 262              const __global T *msg3, const __global T *data, __global T *dst,
 263              int msg_disp_step, int data_disp_step, int cndisp, float cmax_disc_term, float cdisc_single_jump)
 264 {
 265     float minimum = FLOAT_MAX;
 266
 267     for(int i = 0; i < cndisp; ++i)
 268     {
 269         float dst_reg;
 270         dst_reg  = msg1[msg_disp_step * i];
 271         dst_reg += msg2[msg_disp_step * i];
 272         dst_reg += msg3[msg_disp_step * i];
 273         dst_reg += data[data_disp_step * i];
 274
 275         if (dst_reg < minimum)
 276             minimum = dst_reg;
 277
 278         dst[msg_disp_step * i] = saturate_cast(dst_reg);
 279     }
 280
 281     calc_min_linear_penalty(dst, msg_disp_step, cndisp, cdisc_single_jump);
 282
 283     minimum += cmax_disc_term;
 284
 285     float sum = 0;
 286     for(int i = 0; i < cndisp; ++i)
 287     {
 288         float dst_reg = dst[msg_disp_step * i];
 289         if (dst_reg > minimum)
 290         {
 291             dst_reg = minimum;
 292             dst[msg_disp_step * i] = saturate_cast(minimum);
 293         }
 294         sum += dst_reg;
 295     }
 296     sum /= cndisp;
 297
 298     for(int i = 0; i < cndisp; ++i)
 299         dst[msg_disp_step * i] -= sum;
 300 }
 301 __kernel void one_iteration(__global T *u,    int u_step,
 302                             __global T *data, int data_step,
 303                             __global T *d,    __global T *l, __global T *r,
 304                             int t, int cols, int rows,
 305                             int cndisp, float cmax_disc_term, float cdisc_single_jump)
 306 {
 307     const int y = get_global_id(1);
 308     const int x = ((get_global_id(0)) << 1) + ((y + t) & 1);
 309
 310     if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1))
 311     {
 312         u_step    /= sizeof(T);
 313         data_step /= sizeof(T);
 314
 315         __global T *us = u + y * u_step + x;
 316         __global T *ds = d + y * u_step + x;
 317         __global T *ls = l + y * u_step + x;
 318         __global T *rs = r + y * u_step + x;
 319         const __global  T *dt = data + y * data_step + x;
 320
 321         int msg_disp_step = u_step * rows;
 322         int data_disp_step = data_step * rows;
 323
 324         message(us + u_step, ls      + 1, rs - 1, dt, us, msg_disp_step, data_disp_step, cndisp,
 325             cmax_disc_term, cdisc_single_jump);
 326         message(ds - u_step, ls      + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step, cndisp,
 327             cmax_disc_term, cdisc_single_jump);
 328
 329         message(us + u_step, ds - u_step, rs - 1, dt, rs, msg_disp_step, data_disp_step, cndisp,
 330             cmax_disc_term, cdisc_single_jump);
 331         message(us + u_step, ds - u_step, ls + 1, dt, ls, msg_disp_step, data_disp_step, cndisp,
 332             cmax_disc_term, cdisc_single_jump);
 333     }
 334 }
 335
 336 ///////////////////////////////////////////////////////////////
 337 /////////////////////////// output ////////////////////////////
 338 ///////////////////////////////////////////////////////////////
 339 __kernel void output(const __global T *u, int u_step,
 340                      const __global T *d, const __global T *l,
 341                      const __global T *r, const __global T *data,
 342                      __global T *disp, int disp_rows, int disp_cols, int disp_step,
 343                      int cndisp)
 344 {
 345     const int x = get_global_id(0);
 346     const int y = get_global_id(1);
 347
 348     if (y > 0 && y < disp_rows - 1 && x > 0 && x < disp_cols - 1)
 349     {
 350         u_step    /= sizeof(T);
 351         disp_step /= sizeof(T);
 352         const __global T *us = u + (y + 1) * u_step + x;
 353         const __global T *ds = d + (y - 1) * u_step + x;
 354         const __global T *ls = l + y * u_step + (x + 1);
 355         const __global T *rs = r + y * u_step + (x - 1);
 356         const __global T *dt = data + y * u_step + x;
 357
 358         int disp_steps = disp_rows * u_step;
 359
 360         int best = 0;
 361         float best_val = FLOAT_MAX;
 362         for (int d = 0; d < cndisp; ++d)
 363         {
 364             float val;
 365             val  = us[d * disp_steps];
 366             val += ds[d * disp_steps];
 367             val += ls[d * disp_steps];
 368             val += rs[d * disp_steps];
 369             val += dt[d * disp_steps];
 370
 371             if (val < best_val)
 372             {
 373                 best_val = val;
 374                 best = d;
 375             }
 376         }
 377
 378         (disp + y * disp_step)[x] = convert_short_sat(best);
 379     }
 380 }