1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Third party copyrights are property of their respective owners.
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
45 This is a variation of
46 "Stereo Processing by Semiglobal Matching and Mutual Information"
47 by Heiko Hirschmuller.
49 We match blocks rather than individual pixels, thus the algorithm is called
50 SGBM (Semi-global block matching)
53 #include "precomp.hpp"
59 typedef uchar PixType;
60 typedef short CostType;
61 typedef short DispType;
63 enum { NR = 16, NR2 = NR/2 };
66 struct StereoSGBMParams
70 minDisparity = numDisparities = 0;
76 speckleWindowSize = 0;
78 mode = StereoSGBM::MODE_SGBM;
81 StereoSGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize,
82 int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
83 int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
86 minDisparity = _minDisparity;
87 numDisparities = _numDisparities;
88 SADWindowSize = _SADWindowSize;
91 disp12MaxDiff = _disp12MaxDiff;
92 preFilterCap = _preFilterCap;
93 uniquenessRatio = _uniquenessRatio;
94 speckleWindowSize = _speckleWindowSize;
95 speckleRange = _speckleRange;
106 int speckleWindowSize;
113 For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
114 and for each disparity minD<=d<maxD the function
115 computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
116 row1[x] and row2[x-d]. The subpixel algorithm from
117 "Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
118 is used, hence the suffix BT.
120 the temporary buffer should contain width2*2 elements
122 static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
123 int minD, int maxD, CostType* cost,
124 PixType* buffer, const PixType* tab,
127 int x, c, width = img1.cols, cn = img1.channels();
128 int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
129 int minX2 = std::max(minX1 - maxD, 0), maxX2 = std::min(maxX1 - minD, width);
130 int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
131 const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
132 PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
136 for( c = 0; c < cn*2; c++ )
138 prow1[width*c] = prow1[width*c + width-1] =
139 prow2[width*c] = prow2[width*c + width-1] = tab[0];
142 int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
143 int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
147 for( x = 1; x < width-1; x++ )
149 prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
150 prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
152 prow1[x+width] = row1[x];
153 prow2[width-1-x+width] = row2[x];
158 for( x = 1; x < width-1; x++ )
160 prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
161 prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
162 prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
164 prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
165 prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
166 prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
168 prow1[x+width*3] = row1[x*3];
169 prow1[x+width*4] = row1[x*3+1];
170 prow1[x+width*5] = row1[x*3+2];
172 prow2[width-1-x+width*3] = row2[x*3];
173 prow2[width-1-x+width*4] = row2[x*3+1];
174 prow2[width-1-x+width*5] = row2[x*3+2];
178 memset( cost, 0, width1*D*sizeof(cost[0]) );
181 cost -= minX1*D + minD; // simplify the cost indices inside the loop
184 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
188 for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
190 int diff_scale = c < cn ? 0 : 2;
193 // v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
194 // v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
195 for( x = minX2; x < maxX2; x++ )
198 int vl = x > 0 ? (v + prow2[x-1])/2 : v;
199 int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
200 int v0 = std::min(vl, vr); v0 = std::min(v0, v);
201 int v1 = std::max(vl, vr); v1 = std::max(v1, v);
202 buffer[x] = (PixType)v0;
203 buffer[x + width2] = (PixType)v1;
206 for( x = minX1; x < maxX1; x++ )
209 int ul = x > 0 ? (u + prow1[x-1])/2 : u;
210 int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
211 int u0 = std::min(ul, ur); u0 = std::min(u0, u);
212 int u1 = std::max(ul, ur); u1 = std::max(u1, u);
217 __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
218 __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
219 __m128i ds = _mm_cvtsi32_si128(diff_scale);
221 for( int d = minD; d < maxD; d += 16 )
223 __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
224 __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
225 __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
226 __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
227 __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
228 __m128i diff = _mm_min_epu8(c0, c1);
230 c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
231 c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
233 _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
234 _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
240 for( int d = minD; d < maxD; d++ )
242 int v = prow2[width-x-1 + d];
243 int v0 = buffer[width-x-1 + d];
244 int v1 = buffer[width-x-1 + d + width2];
245 int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u);
246 int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v);
248 cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale));
254 for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
256 for( x = minX1; x < maxX1; x++ )
262 __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
264 for( int d = minD; d < maxD; d += 16 )
266 __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
267 __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
268 __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
269 __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
271 _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
272 _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
278 for( int d = minD; d < maxD; d++ )
280 int v = prow2[width-1-x + d];
281 cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
291 computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
292 that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
294 disp2full is the reverse disparity map, that is:
295 disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
297 note that disp1buf will have the same size as the roi and
298 disp2full will have the same size as img1 (or img2).
299 On exit disp2buf is not the final disparity, it is an intermediate result that becomes
300 final after all the tiles are processed.
302 the disparity in disp1buf is written with sub-pixel accuracy
303 (4 fractional bits, see StereoSGBM::DISP_SCALE),
304 using quadratic interpolation, while the disparity in disp2buf
305 is written as is, without interpolation.
307 disp2cost also has the same size as img1 (or img2).
308 It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
310 static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
311 Mat& disp1, const StereoSGBMParams& params,
315 static const uchar LSBTab[] =
317 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
318 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
319 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
320 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
321 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
322 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
323 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
324 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
327 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
330 const int ALIGN = 16;
331 const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
332 const int DISP_SCALE = (1 << DISP_SHIFT);
333 const CostType MAX_COST = SHRT_MAX;
335 int minD = params.minDisparity, maxD = minD + params.numDisparities;
337 SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
338 int ftzero = std::max(params.preFilterCap, 15) | 1;
339 int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
340 int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
341 int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
342 int k, width = disp1.cols, height = disp1.rows;
343 int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
344 int D = maxD - minD, width1 = maxX1 - minX1;
345 int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
346 int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
347 bool fullDP = params.mode == StereoSGBM::MODE_HH;
348 int npasses = fullDP ? 2 : 1;
349 const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
350 PixType clipTab[TAB_SIZE];
352 for( k = 0; k < TAB_SIZE; k++ )
353 clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
357 disp1 = Scalar::all(INVALID_DISP_SCALED);
361 CV_Assert( D % 16 == 0 );
363 // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
364 // if you change NR, please, modify the loop as well.
365 int D2 = D+16, NRD2 = NR2*D2;
367 // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
368 // for 8-way dynamic programming we need the current row and
369 // the previous row, i.e. 2 rows in total
371 const int LrBorder = NLR - 1;
373 // for each possible stereo match (img1(x,y) <=> img2(x-d,y))
374 // we keep pixel difference cost (C) and the summary cost over NR directions (S).
375 // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
376 size_t costBufSize = width1*D;
377 size_t CSBufSize = costBufSize*(fullDP ? height : 1);
378 size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
379 int hsumBufNRows = SH2*2 + 2;
380 size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
381 costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
382 CSBufSize*2*sizeof(CostType) + // C, S
383 width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
384 width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
386 if( !buffer.data || !buffer.isContinuous() ||
387 buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
388 buffer.create(1, (int)totalBufSize, CV_8U);
390 // summary cost over different (nDirs) directions
391 CostType* Cbuf = (CostType*)alignPtr(buffer.data, ALIGN);
392 CostType* Sbuf = Cbuf + CSBufSize;
393 CostType* hsumBuf = Sbuf + CSBufSize;
394 CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
396 CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
397 DispType* disp2ptr = (DispType*)(disp2cost + width);
398 PixType* tempBuf = (PixType*)(disp2ptr + width);
400 // add P2 to every C(x,y). it saves a few operations in the inner loops
401 for( k = 0; k < width1*D; k++ )
402 Cbuf[k] = (CostType)P2;
404 for( int pass = 1; pass <= npasses; pass++ )
406 int x1, y1, x2, y2, dx, dy;
410 y1 = 0; y2 = height; dy = 1;
411 x1 = 0; x2 = width1; dx = 1;
415 y1 = height-1; y2 = -1; dy = -1;
416 x1 = width1-1; x2 = -1; dx = -1;
419 CostType *Lr[NLR]={0}, *minLr[NLR]={0};
421 for( k = 0; k < NLR; k++ )
423 // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
424 // and will occasionally use negative indices with the arrays
425 // we need to shift Lr[k] pointers by 1, to give the space for d=-1.
426 // however, then the alignment will be imperfect, i.e. bad for SSE,
427 // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
428 Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
429 memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
430 minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder;
431 memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
434 for( int y = y1; y != y2; y += dy )
437 DispType* disp1ptr = disp1.ptr<DispType>(y);
438 CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize);
439 CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize);
441 if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
443 int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
445 for( k = dy1; k <= dy2; k++ )
447 CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
451 calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
453 memset(hsumAdd, 0, D*sizeof(CostType));
454 for( x = 0; x <= SW2*D; x += D )
456 int scale = x == 0 ? SW2 + 1 : 1;
457 for( d = 0; d < D; d++ )
458 hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
463 const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
464 const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize;
466 for( x = D; x < width1*D; x += D )
468 const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
469 const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
474 for( d = 0; d < D; d += 8 )
476 __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
477 __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
478 hv = _mm_adds_epi16(_mm_subs_epi16(hv,
479 _mm_load_si128((const __m128i*)(pixSub + d))),
480 _mm_load_si128((const __m128i*)(pixAdd + d)));
481 Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
482 _mm_load_si128((const __m128i*)(hsumSub + x + d))),
484 _mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
485 _mm_store_si128((__m128i*)(C + x + d), Cx);
491 for( d = 0; d < D; d++ )
493 int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
494 C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
501 for( x = D; x < width1*D; x += D )
503 const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
504 const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
506 for( d = 0; d < D; d++ )
507 hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
514 int scale = k == 0 ? SH2 + 1 : 1;
515 for( x = 0; x < width1*D; x++ )
516 C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
520 // also, clear the S buffer
521 for( k = 0; k < width1*D; k++ )
525 // clear the left and the right borders
526 memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
527 memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
528 memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
529 memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
532 [formula 13 in the paper]
533 compute L_r(p, d) = C(p, d) +
537 min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
538 where p = (x,y), r is one of the directions.
539 we process all the directions at once:
549 for( x = x1; x != x2; x += dx )
551 int xm = x*NR2, xd = xm*D2;
553 int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
554 int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
556 CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
557 CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
558 CostType* Lr_p2 = Lr[1] + xd + D2*2;
559 CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
561 Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
562 Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
564 CostType* Lr_p = Lr[0] + xd;
565 const CostType* Cp = C + x*D;
566 CostType* Sp = S + x*D;
571 __m128i _P1 = _mm_set1_epi16((short)P1);
573 __m128i _delta0 = _mm_set1_epi16((short)delta0);
574 __m128i _delta1 = _mm_set1_epi16((short)delta1);
575 __m128i _delta2 = _mm_set1_epi16((short)delta2);
576 __m128i _delta3 = _mm_set1_epi16((short)delta3);
577 __m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
579 for( d = 0; d < D; d += 8 )
581 __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
582 __m128i L0, L1, L2, L3;
584 L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
585 L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
586 L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
587 L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
589 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
590 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
592 L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
593 L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
595 L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
596 L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
598 L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
599 L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
601 L0 = _mm_min_epi16(L0, _delta0);
602 L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
604 L1 = _mm_min_epi16(L1, _delta1);
605 L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
607 L2 = _mm_min_epi16(L2, _delta2);
608 L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
610 L3 = _mm_min_epi16(L3, _delta3);
611 L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
613 _mm_store_si128( (__m128i*)(Lr_p + d), L0);
614 _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
615 _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
616 _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
618 __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
619 __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
620 t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
621 _minL0 = _mm_min_epi16(_minL0, t0);
623 __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
625 L0 = _mm_adds_epi16(L0, L1);
626 L2 = _mm_adds_epi16(L2, L3);
627 Sval = _mm_adds_epi16(Sval, L0);
628 Sval = _mm_adds_epi16(Sval, L2);
630 _mm_store_si128((__m128i*)(Sp + d), Sval);
633 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
634 _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
639 int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
641 for( d = 0; d < D; d++ )
643 int Cpd = Cp[d], L0, L1, L2, L3;
645 L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
646 L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1;
647 L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2;
648 L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3;
650 Lr_p[d] = (CostType)L0;
651 minL0 = std::min(minL0, L0);
653 Lr_p[d + D2] = (CostType)L1;
654 minL1 = std::min(minL1, L1);
656 Lr_p[d + D2*2] = (CostType)L2;
657 minL2 = std::min(minL2, L2);
659 Lr_p[d + D2*3] = (CostType)L3;
660 minL3 = std::min(minL3, L3);
662 Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
664 minLr[0][xm] = (CostType)minL0;
665 minLr[0][xm+1] = (CostType)minL1;
666 minLr[0][xm+2] = (CostType)minL2;
667 minLr[0][xm+3] = (CostType)minL3;
671 if( pass == npasses )
673 for( x = 0; x < width; x++ )
675 disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
676 disp2cost[x] = MAX_COST;
679 for( x = width1 - 1; x >= 0; x-- )
681 CostType* Sp = S + x*D;
682 int minS = MAX_COST, bestDisp = -1;
686 int xm = x*NR2, xd = xm*D2;
688 int minL0 = MAX_COST;
689 int delta0 = minLr[0][xm + NR2] + P2;
690 CostType* Lr_p0 = Lr[0] + xd + NRD2;
691 Lr_p0[-1] = Lr_p0[D] = MAX_COST;
692 CostType* Lr_p = Lr[0] + xd;
694 const CostType* Cp = C + x*D;
699 __m128i _P1 = _mm_set1_epi16((short)P1);
700 __m128i _delta0 = _mm_set1_epi16((short)delta0);
702 __m128i _minL0 = _mm_set1_epi16((short)minL0);
703 __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
704 __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
706 for( d = 0; d < D; d += 8 )
708 __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
710 L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
711 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
712 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
713 L0 = _mm_min_epi16(L0, _delta0);
714 L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
716 _mm_store_si128((__m128i*)(Lr_p + d), L0);
717 _minL0 = _mm_min_epi16(_minL0, L0);
718 L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
719 _mm_store_si128((__m128i*)(Sp + d), L0);
721 __m128i mask = _mm_cmpgt_epi16(_minS, L0);
722 _minS = _mm_min_epi16(_minS, L0);
723 _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
724 _d8 = _mm_adds_epi16(_d8, _8);
727 short CV_DECL_ALIGNED(16) bestDispBuf[8];
728 _mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
730 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
731 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
732 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
734 __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
735 qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
736 qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
738 minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
739 minS = (CostType)_mm_cvtsi128_si32(qS);
741 qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
742 qS = _mm_cmpeq_epi16(_minS, qS);
743 int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
745 bestDisp = bestDispBuf[LSBTab[idx]];
750 for( d = 0; d < D; d++ )
752 int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
754 Lr_p[d] = (CostType)L0;
755 minL0 = std::min(minL0, L0);
757 int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
764 minLr[0][xm] = (CostType)minL0;
769 for( d = 0; d < D; d++ )
780 for( d = 0; d < D; d++ )
782 if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
788 int _x2 = x + minX1 - d - minD;
789 if( disp2cost[_x2] > minS )
791 disp2cost[_x2] = (CostType)minS;
792 disp2ptr[_x2] = (DispType)(d + minD);
795 if( 0 < d && d < D-1 )
797 // do subpixel quadratic interpolation:
798 // fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
799 // then find minimum of the parabola.
800 int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
801 d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
805 disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
808 for( x = minX1; x < maxX1; x++ )
810 // we round the computed disparity both towards -inf and +inf and check
811 // if either of the corresponding disparities in disp2 is consistent.
812 // This is to give the computed disparity a chance to look valid if it is.
813 int d1 = disp1ptr[x];
814 if( d1 == INVALID_DISP_SCALED )
816 int _d = d1 >> DISP_SHIFT;
817 int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
818 int _x = x - _d, x_ = x - d_;
819 if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
820 0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
821 disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
825 // now shift the cyclic buffers
826 std::swap( Lr[0], Lr[1] );
827 std::swap( minLr[0], minLr[1] );
832 class StereoSGBMImpl : public StereoSGBM
837 params = StereoSGBMParams();
840 StereoSGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,
841 int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
842 int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
845 params = StereoSGBMParams( _minDisparity, _numDisparities, _SADWindowSize,
846 _P1, _P2, _disp12MaxDiff, _preFilterCap,
847 _uniquenessRatio, _speckleWindowSize, _speckleRange,
851 void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
853 Mat left = leftarr.getMat(), right = rightarr.getMat();
854 CV_Assert( left.size() == right.size() && left.type() == right.type() &&
855 left.depth() == CV_8U );
857 disparr.create( left.size(), CV_16S );
858 Mat disp = disparr.getMat();
860 computeDisparitySGBM( left, right, disp, params, buffer );
861 medianBlur(disp, disp, 3);
863 if( params.speckleWindowSize > 0 )
864 filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize,
865 StereoMatcher::DISP_SCALE*params.speckleRange, buffer);
868 AlgorithmInfo* info() const { return 0; }
870 int getMinDisparity() const { return params.minDisparity; }
871 void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
873 int getNumDisparities() const { return params.numDisparities; }
874 void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }
876 int getBlockSize() const { return params.SADWindowSize; }
877 void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }
879 int getSpeckleWindowSize() const { return params.speckleWindowSize; }
880 void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }
882 int getSpeckleRange() const { return params.speckleRange; }
883 void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }
885 int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
886 void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }
888 int getPreFilterCap() const { return params.preFilterCap; }
889 void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }
891 int getUniquenessRatio() const { return params.uniquenessRatio; }
892 void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }
894 int getP1() const { return params.P1; }
895 void setP1(int P1) { params.P1 = P1; }
897 int getP2() const { return params.P2; }
898 void setP2(int P2) { params.P2 = P2; }
900 int getMode() const { return params.mode; }
901 void setMode(int mode) { params.mode = mode; }
903 void write(FileStorage& fs) const
905 fs << "name" << name_
906 << "minDisparity" << params.minDisparity
907 << "numDisparities" << params.numDisparities
908 << "blockSize" << params.SADWindowSize
909 << "speckleWindowSize" << params.speckleWindowSize
910 << "speckleRange" << params.speckleRange
911 << "disp12MaxDiff" << params.disp12MaxDiff
912 << "preFilterCap" << params.preFilterCap
913 << "uniquenessRatio" << params.uniquenessRatio
916 << "mode" << params.mode;
919 void read(const FileNode& fn)
921 FileNode n = fn["name"];
922 CV_Assert( n.isString() && String(n) == name_ );
923 params.minDisparity = (int)fn["minDisparity"];
924 params.numDisparities = (int)fn["numDisparities"];
925 params.SADWindowSize = (int)fn["blockSize"];
926 params.speckleWindowSize = (int)fn["speckleWindowSize"];
927 params.speckleRange = (int)fn["speckleRange"];
928 params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
929 params.preFilterCap = (int)fn["preFilterCap"];
930 params.uniquenessRatio = (int)fn["uniquenessRatio"];
931 params.P1 = (int)fn["P1"];
932 params.P2 = (int)fn["P2"];
933 params.mode = (int)fn["mode"];
936 StereoSGBMParams params;
938 static const char* name_;
941 const char* StereoSGBMImpl::name_ = "StereoMatcher.SGBM";
944 Ptr<StereoSGBM> createStereoSGBM(int minDisparity, int numDisparities, int SADWindowSize,
945 int P1, int P2, int disp12MaxDiff,
946 int preFilterCap, int uniquenessRatio,
947 int speckleWindowSize, int speckleRange,
950 return Ptr<StereoSGBM>(
951 new StereoSGBMImpl(minDisparity, numDisparities, SADWindowSize,
952 P1, P2, disp12MaxDiff,
953 preFilterCap, uniquenessRatio,
954 speckleWindowSize, speckleRange,
958 Rect getValidDisparityROI( Rect roi1, Rect roi2,
960 int numberOfDisparities,
963 int SW2 = SADWindowSize/2;
964 int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1;
966 int xmin = std::max(roi1.x, roi2.x + maxD) + SW2;
967 int xmax = std::min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2;
968 int ymin = std::max(roi1.y, roi2.y) + SW2;
969 int ymax = std::min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2;
971 Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
973 return r.width > 0 && r.height > 0 ? r : Rect();
976 typedef cv::Point_<short> Point2s;
978 template <typename T>
979 void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDiff, cv::Mat& _buf)
983 int width = img.cols, height = img.rows, npixels = width*height;
984 size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));
985 if( !_buf.isContinuous() || !_buf.data || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )
986 _buf.create(1, (int)bufSize, CV_8U);
988 uchar* buf = _buf.data;
989 int i, j, dstep = (int)(img.step/sizeof(T));
990 int* labels = (int*)buf;
991 buf += npixels*sizeof(labels[0]);
992 Point2s* wbuf = (Point2s*)buf;
993 buf += npixels*sizeof(wbuf[0]);
994 uchar* rtype = (uchar*)buf;
997 // clear out label assignments
998 memset(labels, 0, npixels*sizeof(labels[0]));
1000 for( i = 0; i < height; i++ )
1002 T* ds = img.ptr<T>(i);
1003 int* ls = labels + width*i;
1005 for( j = 0; j < width; j++ )
1007 if( ds[j] != newVal ) // not a bad disparity
1009 if( ls[j] ) // has a label, check for bad label
1011 if( rtype[ls[j]] ) // small region, zero out disparity
1014 // no label, assign and propagate
1017 Point2s* ws = wbuf; // initialize wavefront
1018 Point2s p((short)j, (short)i); // current pixel
1019 curlabel++; // next label
1020 int count = 0; // current region size
1023 // wavefront propagation
1024 while( ws >= wbuf ) // wavefront not empty
1027 // put neighbors onto wavefront
1028 T* dpp = &img.at<T>(p.y, p.x);
1030 int* lpp = labels + width*p.y + p.x;
1032 if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )
1035 *ws++ = Point2s(p.x+1, p.y);
1038 if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )
1041 *ws++ = Point2s(p.x-1, p.y);
1044 if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )
1046 lpp[+width] = curlabel;
1047 *ws++ = Point2s(p.x, p.y+1);
1050 if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )
1052 lpp[-width] = curlabel;
1053 *ws++ = Point2s(p.x, p.y-1);
1056 // pop most recent and propagate
1057 // NB: could try least recent, maybe better convergence
1061 // assign label type
1062 if( count <= maxSpeckleSize ) // speckle region
1064 rtype[ls[j]] = 1; // small region label
1068 rtype[ls[j]] = 0; // large region label
1077 void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize,
1078 double _maxDiff, InputOutputArray __buf )
1080 Mat img = _img.getMat();
1081 int type = img.type();
1082 Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp;
1083 CV_Assert( type == CV_8UC1 || type == CV_16SC1 );
1085 int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff);
1087 #if IPP_VERSION_X100 >= 801 && !defined HAVE_IPP_ICV_ONLY
1089 IppiSize roisize = { img.cols, img.rows };
1090 IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s;
1092 if (!__buf.needed() && (type == CV_8UC1 || type == CV_16SC1))
1094 IppStatus status = ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize);
1095 Ipp8u * buffer = ippsMalloc_8u(bufsize);
1097 if ((int)status >= 0)
1099 if (type == CV_8UC1)
1100 status = ippiMarkSpeckles_8u_C1IR((Ipp8u *)img.data, (int)img.step, roisize,
1101 (Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, buffer);
1103 status = ippiMarkSpeckles_16s_C1IR((Ipp16s *)img.data, (int)img.step, roisize,
1104 (Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, buffer);
1109 setIppErrorStatus();
1113 if (type == CV_8UC1)
1114 filterSpecklesImpl<uchar>(img, newVal, maxSpeckleSize, maxDiff, _buf);
1116 filterSpecklesImpl<short>(img, newVal, maxSpeckleSize, maxDiff, _buf);
1119 void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity,
1120 int numberOfDisparities, int disp12MaxDiff )
1122 Mat disp = _disp.getMat(), cost = _cost.getMat();
1123 int cols = disp.cols, rows = disp.rows;
1124 int minD = minDisparity, maxD = minDisparity + numberOfDisparities;
1125 int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0);
1126 AutoBuffer<int> _disp2buf(cols*2);
1127 int* disp2buf = _disp2buf;
1128 int* disp2cost = disp2buf + cols;
1129 const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;
1130 int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
1131 int costType = cost.type();
1133 disp12MaxDiff *= DISP_SCALE;
1135 CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S &&
1136 (costType == CV_16S || costType == CV_32S) &&
1137 disp.size() == cost.size() );
1139 for( int y = 0; y < rows; y++ )
1141 short* dptr = disp.ptr<short>(y);
1143 for( x = 0; x < cols; x++ )
1145 disp2buf[x] = INVALID_DISP_SCALED;
1146 disp2cost[x] = INT_MAX;
1149 if( costType == CV_16S )
1151 const short* cptr = cost.ptr<short>(y);
1153 for( x = minX1; x < maxX1; x++ )
1155 int d = dptr[x], c = cptr[x];
1156 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1158 if( disp2cost[x2] > c )
1167 const int* cptr = cost.ptr<int>(y);
1169 for( x = minX1; x < maxX1; x++ )
1171 int d = dptr[x], c = cptr[x];
1172 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1174 if( disp2cost[x2] < c )
1182 for( x = minX1; x < maxX1; x++ )
1184 // we round the computed disparity both towards -inf and +inf and check
1185 // if either of the corresponding disparities in disp2 is consistent.
1186 // This is to give the computed disparity a chance to look valid if it is.
1188 if( d == INVALID_DISP_SCALED )
1190 int d0 = d >> DISP_SHIFT;
1191 int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT;
1192 int x0 = x - d0, x1 = x - d1;
1193 if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) &&
1194 (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) )
1195 dptr[x] = (short)INVALID_DISP_SCALED;