1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
12 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Jia Haipeng, jiahaipeng95@gmail.com
19 // Peng Xiao, pengxiao@outlook.com
21 // Redistribution and use in source and binary forms, with or without modification,
22 // are permitted provided that the following conditions are met:
24 // * Redistribution's of source code must retain the above copyright notice,
25 // this list of conditions and the following disclaimer.
27 // * Redistribution's in binary form must reproduce the above copyright notice,
28 // this list of conditions and the following disclaimer in the documentation
29 // and/or other GpuMaterials provided with the distribution.
31 // * The name of the copyright holders may not be used to endorse or promote products
32 // derived from this software without specific prior written permission.
34 // This software is provided by the copyright holders and contributors as is and
35 // any express or implied warranties, including, but not limited to, the implied
36 // warranties of merchantability and fitness for a particular purpose are disclaimed.
37 // In no event shall the Intel Corporation or contributors be liable for any direct,
38 // indirect, incidental, special, exemplary, or consequential damages
39 // (including, but not limited to, procurement of substitute goods or services;
40 // loss of use, data, or profits; or business interruption) however caused
41 // and on any theory of liability, whether in contract, strict liability,
42 // or tort (including negligence or otherwise) arising in any way out of
43 // the use of this software, even if advised of the possibility of such damage.
47 #if defined (DOUBLE_SUPPORT)
50 #pragma OPENCL EXTENSION cl_khr_fp64:enable
51 #elif defined (cl_amd_fp64)
52 #pragma OPENCL EXTENSION cl_amd_fp64:enable
63 ///////////////////////////////////////////////////////////////
64 /////////////////common///////////////////////////////////////
65 /////////////////////////////////////////////////////////////
66 T saturate_cast(float v){
68 return convert_short_sat_rte(v);
74 #define FLOAT_MAX 3.402823466e+38f
81 float cdisc_single_jump;
83 ///////////////////////////////////////////////////////////////
84 ////////////////////////// comp data //////////////////////////
85 ///////////////////////////////////////////////////////////////
87 float pix_diff_1(__global const uchar *ls, __global const uchar *rs)
89 return abs((int)(*ls) - *rs);
92 float pix_diff_3(__global const uchar *ls, __global const uchar *rs)
94 const float tr = 0.299f;
95 const float tg = 0.587f;
96 const float tb = 0.114f;
100 val = tb * abs((int)ls[0] - rs[0]);
101 val += tg * abs((int)ls[1] - rs[1]);
102 val += tr * abs((int)ls[2] - rs[2]);
106 float pix_diff_4(__global const uchar *ls, __global const uchar *rs)
109 l = *((__global uchar4 *)ls);
110 r = *((__global uchar4 *)rs);
112 const float tr = 0.299f;
113 const float tg = 0.587f;
114 const float tb = 0.114f;
118 val = tb * abs((int)l.x - r.x);
119 val += tg * abs((int)l.y - r.y);
120 val += tr * abs((int)l.z - r.z);
130 #define CAT(X,Y) X##Y
131 #define CAT2(X,Y) CAT(X,Y)
133 #define PIX_DIFF CAT2(pix_diff_, CN)
135 __kernel void comp_data(__global uchar *left, int left_rows, int left_cols, int left_step,
136 __global uchar *right, int right_step,
137 __global T *data, int data_step,
138 __constant con_srtuct_t *con_st)
140 int x = get_global_id(0);
141 int y = get_global_id(1);
143 if (y > 0 && y < (left_rows - 1) && x > 0 && x < (left_cols - 1))
145 data_step /= sizeof(T);
146 const __global uchar* ls = left + y * left_step + x * CN;
147 const __global uchar* rs = right + y * right_step + x * CN;
149 __global T *ds = data + y * data_step + x;
151 const unsigned int disp_step = data_step * left_rows;
153 for (int disp = 0; disp < con_st -> cndisp; disp++)
158 val = PIX_DIFF(ls, rs - disp * CN);
159 ds[disp * disp_step] = saturate_cast(fmin(con_st -> cdata_weight * val,
160 con_st -> cdata_weight * con_st -> cmax_data_term));
164 ds[disp * disp_step] = saturate_cast(con_st -> cdata_weight * con_st -> cmax_data_term);
170 ///////////////////////////////////////////////////////////////
171 //////////////////////// data step down ///////////////////////
172 ///////////////////////////////////////////////////////////////
173 __kernel void data_step_down(__global T *src, int src_rows,
174 __global T *dst, int dst_rows, int dst_cols,
175 int src_step, int dst_step,
178 const int x = get_global_id(0);
179 const int y = get_global_id(1);
181 if (x < dst_cols && y < dst_rows)
183 src_step /= sizeof(T);
184 dst_step /= sizeof(T);
185 for (int d = 0; d < cndisp; ++d)
188 dst_reg = src[(d * src_rows + min(2*y+0, src_rows-1)) * src_step + 2*x+0];
189 dst_reg += src[(d * src_rows + min(2*y+1, src_rows-1)) * src_step + 2*x+0];
190 dst_reg += src[(d * src_rows + min(2*y+0, src_rows-1)) * src_step + 2*x+1];
191 dst_reg += src[(d * src_rows + min(2*y+1, src_rows-1)) * src_step + 2*x+1];
193 dst[(d * dst_rows + y) * dst_step + x] = saturate_cast(dst_reg);
198 ///////////////////////////////////////////////////////////////
199 /////////////////// level up messages ////////////////////////
200 ///////////////////////////////////////////////////////////////
201 __kernel void level_up_message(__global T *src, int src_rows, int src_step,
202 __global T *dst, int dst_rows, int dst_cols, int dst_step,
205 const int x = get_global_id(0);
206 const int y = get_global_id(1);
208 if (x < dst_cols && y < dst_rows)
210 src_step /= sizeof(T);
211 dst_step /= sizeof(T);
213 const int dst_disp_step = dst_step * dst_rows;
214 const int src_disp_step = src_step * src_rows;
216 __global T *dstr = dst + y * dst_step + x;
217 __global const T *srcr = src + (y / 2 * src_step) + (x / 2);
219 for (int d = 0; d < cndisp; ++d)
220 dstr[d * dst_disp_step] = srcr[d * src_disp_step];
224 ///////////////////////////////////////////////////////////////
225 //////////////////// calc all iterations /////////////////////
226 ///////////////////////////////////////////////////////////////
227 void calc_min_linear_penalty(__global T * dst, int disp_step,
228 int cndisp, float cdisc_single_jump)
233 for (int disp = 1; disp < cndisp; ++disp)
235 prev += cdisc_single_jump;
236 cur = dst[disp_step * disp];
241 dst[disp_step * disp] = saturate_cast(prev);
247 prev = dst[(cndisp - 1) * disp_step];
248 for (int disp = cndisp - 2; disp >= 0; disp--)
250 prev += cdisc_single_jump;
251 cur = dst[disp_step * disp];
256 dst[disp_step * disp] = saturate_cast(prev);
261 void message(const __global T *msg1, const __global T *msg2,
262 const __global T *msg3, const __global T *data, __global T *dst,
263 int msg_disp_step, int data_disp_step, int cndisp, float cmax_disc_term, float cdisc_single_jump)
265 float minimum = FLOAT_MAX;
267 for(int i = 0; i < cndisp; ++i)
270 dst_reg = msg1[msg_disp_step * i];
271 dst_reg += msg2[msg_disp_step * i];
272 dst_reg += msg3[msg_disp_step * i];
273 dst_reg += data[data_disp_step * i];
275 if (dst_reg < minimum)
278 dst[msg_disp_step * i] = saturate_cast(dst_reg);
281 calc_min_linear_penalty(dst, msg_disp_step, cndisp, cdisc_single_jump);
283 minimum += cmax_disc_term;
286 for(int i = 0; i < cndisp; ++i)
288 float dst_reg = dst[msg_disp_step * i];
289 if (dst_reg > minimum)
292 dst[msg_disp_step * i] = saturate_cast(minimum);
298 for(int i = 0; i < cndisp; ++i)
299 dst[msg_disp_step * i] -= sum;
301 __kernel void one_iteration(__global T *u, int u_step,
302 __global T *data, int data_step,
303 __global T *d, __global T *l, __global T *r,
304 int t, int cols, int rows,
305 int cndisp, float cmax_disc_term, float cdisc_single_jump)
307 const int y = get_global_id(1);
308 const int x = ((get_global_id(0)) << 1) + ((y + t) & 1);
310 if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1))
313 data_step /= sizeof(T);
315 __global T *us = u + y * u_step + x;
316 __global T *ds = d + y * u_step + x;
317 __global T *ls = l + y * u_step + x;
318 __global T *rs = r + y * u_step + x;
319 const __global T *dt = data + y * data_step + x;
321 int msg_disp_step = u_step * rows;
322 int data_disp_step = data_step * rows;
324 message(us + u_step, ls + 1, rs - 1, dt, us, msg_disp_step, data_disp_step, cndisp,
325 cmax_disc_term, cdisc_single_jump);
326 message(ds - u_step, ls + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step, cndisp,
327 cmax_disc_term, cdisc_single_jump);
329 message(us + u_step, ds - u_step, rs - 1, dt, rs, msg_disp_step, data_disp_step, cndisp,
330 cmax_disc_term, cdisc_single_jump);
331 message(us + u_step, ds - u_step, ls + 1, dt, ls, msg_disp_step, data_disp_step, cndisp,
332 cmax_disc_term, cdisc_single_jump);
336 ///////////////////////////////////////////////////////////////
337 /////////////////////////// output ////////////////////////////
338 ///////////////////////////////////////////////////////////////
339 __kernel void output(const __global T *u, int u_step,
340 const __global T *d, const __global T *l,
341 const __global T *r, const __global T *data,
342 __global T *disp, int disp_rows, int disp_cols, int disp_step,
345 const int x = get_global_id(0);
346 const int y = get_global_id(1);
348 if (y > 0 && y < disp_rows - 1 && x > 0 && x < disp_cols - 1)
351 disp_step /= sizeof(T);
352 const __global T *us = u + (y + 1) * u_step + x;
353 const __global T *ds = d + (y - 1) * u_step + x;
354 const __global T *ls = l + y * u_step + (x + 1);
355 const __global T *rs = r + y * u_step + (x - 1);
356 const __global T *dt = data + y * u_step + x;
358 int disp_steps = disp_rows * u_step;
361 float best_val = FLOAT_MAX;
362 for (int d = 0; d < cndisp; ++d)
365 val = us[d * disp_steps];
366 val += ds[d * disp_steps];
367 val += ls[d * disp_steps];
368 val += rs[d * disp_steps];
369 val += dt[d * disp_steps];
378 (disp + y * disp_step)[x] = convert_short_sat(best);