2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
13 #include "vpx_mem/vpx_mem.h"
14 #include "vpx_ports/config.h"
18 #include "vp8/common/findnearmv.h"
21 static int mv_ref_ct [31] [4] [2];
22 static int mv_mode_cts [4] [2];
25 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
27 // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
28 // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
29 // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
30 // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
31 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
34 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
36 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
37 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
38 * error_per_bit + 128) >> 8;
41 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
43 /* Calculate sad error cost on full pixel basis. */
44 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
45 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
46 * error_per_bit + 128) >> 8;
49 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
52 int search_site_count = 0;
55 // Generate offsets for 4 search sites per step.
57 x->ss[search_site_count].mv.col = 0;
58 x->ss[search_site_count].mv.row = 0;
59 x->ss[search_site_count].offset = 0;
65 // Compute offsets for search sites.
66 x->ss[search_site_count].mv.col = 0;
67 x->ss[search_site_count].mv.row = -Len;
68 x->ss[search_site_count].offset = -Len * stride;
71 // Compute offsets for search sites.
72 x->ss[search_site_count].mv.col = 0;
73 x->ss[search_site_count].mv.row = Len;
74 x->ss[search_site_count].offset = Len * stride;
77 // Compute offsets for search sites.
78 x->ss[search_site_count].mv.col = -Len;
79 x->ss[search_site_count].mv.row = 0;
80 x->ss[search_site_count].offset = -Len;
83 // Compute offsets for search sites.
84 x->ss[search_site_count].mv.col = Len;
85 x->ss[search_site_count].mv.row = 0;
86 x->ss[search_site_count].offset = Len;
93 x->ss_count = search_site_count;
94 x->searches_per_step = 4;
97 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
100 int search_site_count = 0;
102 // Generate offsets for 8 search sites per step.
103 Len = MAX_FIRST_STEP;
104 x->ss[search_site_count].mv.col = 0;
105 x->ss[search_site_count].mv.row = 0;
106 x->ss[search_site_count].offset = 0;
112 // Compute offsets for search sites.
113 x->ss[search_site_count].mv.col = 0;
114 x->ss[search_site_count].mv.row = -Len;
115 x->ss[search_site_count].offset = -Len * stride;
118 // Compute offsets for search sites.
119 x->ss[search_site_count].mv.col = 0;
120 x->ss[search_site_count].mv.row = Len;
121 x->ss[search_site_count].offset = Len * stride;
124 // Compute offsets for search sites.
125 x->ss[search_site_count].mv.col = -Len;
126 x->ss[search_site_count].mv.row = 0;
127 x->ss[search_site_count].offset = -Len;
130 // Compute offsets for search sites.
131 x->ss[search_site_count].mv.col = Len;
132 x->ss[search_site_count].mv.row = 0;
133 x->ss[search_site_count].offset = Len;
136 // Compute offsets for search sites.
137 x->ss[search_site_count].mv.col = -Len;
138 x->ss[search_site_count].mv.row = -Len;
139 x->ss[search_site_count].offset = -Len * stride - Len;
142 // Compute offsets for search sites.
143 x->ss[search_site_count].mv.col = Len;
144 x->ss[search_site_count].mv.row = -Len;
145 x->ss[search_site_count].offset = -Len * stride + Len;
148 // Compute offsets for search sites.
149 x->ss[search_site_count].mv.col = -Len;
150 x->ss[search_site_count].mv.row = Len;
151 x->ss[search_site_count].offset = Len * stride - Len;
154 // Compute offsets for search sites.
155 x->ss[search_site_count].mv.col = Len;
156 x->ss[search_site_count].mv.row = Len;
157 x->ss[search_site_count].offset = Len * stride + Len;
165 x->ss_count = search_site_count;
166 x->searches_per_step = 8;
170 * To avoid the penalty for crossing cache-line read, preload the reference
171 * area in a small buffer, which is aligned to make sure there won't be crossing
172 * cache-line read while reading from this buffer. This reduced the cpu
173 * cycles spent on reading ref data in sub-pixel filter functions.
174 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
175 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
176 * could reduce the area.
178 #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
179 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
180 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
181 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
182 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
183 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
184 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
185 #define MIN(x,y) (((x)<(y))?(x):(y))
186 #define MAX(x,y) (((x)>(y))?(x):(y))
188 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
189 int_mv *bestmv, int_mv *ref_mv,
191 const vp8_variance_fn_ptr_t *vfp,
192 int *mvcost[2], int *distortion,
195 unsigned char *z = (*(b->base_src) + b->src);
197 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
198 int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
199 int tr = br, tc = bc;
200 unsigned int besterr = INT_MAX;
201 unsigned int left, right, up, down, diag;
203 unsigned int whichdir;
204 unsigned int halfiters = 4;
205 unsigned int quarteriters = 4;
208 int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
209 int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
210 int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
211 int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
216 #if ARCH_X86 || ARCH_X86_64
217 MACROBLOCKD *xd = &x->e_mbd;
218 unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
220 int buf_r1, buf_r2, buf_c1, buf_c2;
222 // Clamping to avoid out-of-range data access
223 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
224 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
225 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
226 buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3;
229 /* Copy to intermediate buffer before searching. */
230 vfp->copymem(y0 - buf_c1 - d->pre_stride*buf_r1, d->pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
231 y = xd->y_buf + y_stride*buf_r1 +buf_c1;
233 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
234 y_stride = d->pre_stride;
237 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
240 bestmv->as_mv.row <<= 3;
241 bestmv->as_mv.col <<= 3;
243 // calculate central point error
244 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
245 *distortion = besterr;
246 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
248 // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
252 CHECK_BETTER(left, tr, tc - 2);
253 CHECK_BETTER(right, tr, tc + 2);
254 CHECK_BETTER(up, tr - 2, tc);
255 CHECK_BETTER(down, tr + 2, tc);
257 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
262 CHECK_BETTER(diag, tr - 2, tc - 2);
265 CHECK_BETTER(diag, tr - 2, tc + 2);
268 CHECK_BETTER(diag, tr + 2, tc - 2);
271 CHECK_BETTER(diag, tr + 2, tc + 2);
275 // no reason to check the same one again.
276 if (tr == br && tc == bc)
283 // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
285 while (--quarteriters)
287 CHECK_BETTER(left, tr, tc - 1);
288 CHECK_BETTER(right, tr, tc + 1);
289 CHECK_BETTER(up, tr - 1, tc);
290 CHECK_BETTER(down, tr + 1, tc);
292 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
297 CHECK_BETTER(diag, tr - 1, tc - 1);
300 CHECK_BETTER(diag, tr - 1, tc + 1);
303 CHECK_BETTER(diag, tr + 1, tc - 1);
306 CHECK_BETTER(diag, tr + 1, tc + 1);
310 // no reason to check the same one again.
311 if (tr == br && tc == bc)
318 bestmv->as_mv.row = br << 1;
319 bestmv->as_mv.col = bc << 1;
321 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
322 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
336 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
337 int_mv *bestmv, int_mv *ref_mv,
339 const vp8_variance_fn_ptr_t *vfp,
340 int *mvcost[2], int *distortion,
343 int bestmse = INT_MAX;
346 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
347 unsigned char *z = (*(b->base_src) + b->src);
348 int left, right, up, down, diag;
354 bestmv->as_mv.row <<= 3;
355 bestmv->as_mv.col <<= 3;
358 // calculate central point error
359 bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
360 *distortion = bestmse;
361 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
363 // go left then right and check error
364 this_mv.as_mv.row = startmv.as_mv.row;
365 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
366 thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
367 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
373 *distortion = thismse;
377 this_mv.as_mv.col += 8;
378 thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
379 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
385 *distortion = thismse;
389 // go up then down and check error
390 this_mv.as_mv.col = startmv.as_mv.col;
391 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
392 thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
393 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
399 *distortion = thismse;
403 this_mv.as_mv.row += 8;
404 thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
405 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
411 *distortion = thismse;
416 // now check 1 more diagonal
417 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
418 //for(whichdir =0;whichdir<4;whichdir++)
425 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
426 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
427 thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
430 this_mv.as_mv.col += 4;
431 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
432 thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
435 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
436 this_mv.as_mv.row += 4;
437 thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
441 this_mv.as_mv.col += 4;
442 this_mv.as_mv.row += 4;
443 thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
447 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
453 *distortion = thismse;
460 // time to check quarter pels.
461 if (bestmv->as_mv.row < startmv.as_mv.row)
464 if (bestmv->as_mv.col < startmv.as_mv.col)
471 // go left then right and check error
472 this_mv.as_mv.row = startmv.as_mv.row;
474 if (startmv.as_mv.col & 7)
476 this_mv.as_mv.col = startmv.as_mv.col - 2;
477 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
481 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
482 thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
485 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
491 *distortion = thismse;
495 this_mv.as_mv.col += 4;
496 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
497 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
503 *distortion = thismse;
507 // go up then down and check error
508 this_mv.as_mv.col = startmv.as_mv.col;
510 if (startmv.as_mv.row & 7)
512 this_mv.as_mv.row = startmv.as_mv.row - 2;
513 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
517 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
518 thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
521 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
527 *distortion = thismse;
531 this_mv.as_mv.row += 4;
532 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
533 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
539 *distortion = thismse;
544 // now check 1 more diagonal
545 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
547 // for(whichdir=0;whichdir<4;whichdir++)
555 if (startmv.as_mv.row & 7)
557 this_mv.as_mv.row -= 2;
559 if (startmv.as_mv.col & 7)
561 this_mv.as_mv.col -= 2;
562 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
566 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
567 thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
572 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
574 if (startmv.as_mv.col & 7)
576 this_mv.as_mv.col -= 2;
577 thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
581 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
582 thismse = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
588 this_mv.as_mv.col += 2;
590 if (startmv.as_mv.row & 7)
592 this_mv.as_mv.row -= 2;
593 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
597 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
598 thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
603 this_mv.as_mv.row += 2;
605 if (startmv.as_mv.col & 7)
607 this_mv.as_mv.col -= 2;
608 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
612 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
613 thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
618 this_mv.as_mv.col += 2;
619 this_mv.as_mv.row += 2;
620 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
624 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
630 *distortion = thismse;
637 int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
638 int_mv *bestmv, int_mv *ref_mv,
640 const vp8_variance_fn_ptr_t *vfp,
641 int *mvcost[2], int *distortion,
644 int bestmse = INT_MAX;
647 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
648 unsigned char *z = (*(b->base_src) + b->src);
649 int left, right, up, down, diag;
654 bestmv->as_mv.row <<= 3;
655 bestmv->as_mv.col <<= 3;
658 // calculate central point error
659 bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
660 *distortion = bestmse;
661 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
663 // go left then right and check error
664 this_mv.as_mv.row = startmv.as_mv.row;
665 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
666 thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
667 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
673 *distortion = thismse;
677 this_mv.as_mv.col += 8;
678 thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
679 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
685 *distortion = thismse;
689 // go up then down and check error
690 this_mv.as_mv.col = startmv.as_mv.col;
691 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
692 thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
693 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
699 *distortion = thismse;
703 this_mv.as_mv.row += 8;
704 thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
705 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
711 *distortion = thismse;
715 // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
717 // now check 1 more diagonal -
718 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
724 this_mv.col = (this_mv.col - 8) | 4;
725 this_mv.row = (this_mv.row - 8) | 4;
726 diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
730 this_mv.row = (this_mv.row - 8) | 4;
731 diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
734 this_mv.col = (this_mv.col - 8) | 4;
736 diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
741 diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
745 diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
754 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
755 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
756 thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
757 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
763 *distortion = thismse;
767 this_mv.as_mv.col += 8;
768 thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
769 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
775 *distortion = thismse;
779 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
780 this_mv.as_mv.row = startmv.as_mv.row + 4;
781 thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
782 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
788 *distortion = thismse;
792 this_mv.as_mv.col += 8;
793 thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
794 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
800 *distortion = thismse;
808 #define CHECK_BOUNDS(range) \
811 all_in &= ((br-range) >= x->mv_row_min);\
812 all_in &= ((br+range) <= x->mv_row_max);\
813 all_in &= ((bc-range) >= x->mv_col_min);\
814 all_in &= ((bc+range) <= x->mv_col_max);\
817 #define CHECK_POINT \
819 if (this_mv.as_mv.col < x->mv_col_min) continue;\
820 if (this_mv.as_mv.col > x->mv_col_max) continue;\
821 if (this_mv.as_mv.row < x->mv_row_min) continue;\
822 if (this_mv.as_mv.row > x->mv_row_max) continue;\
825 #define CHECK_BETTER \
827 if (thissad < bestsad)\
829 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
830 if (thissad < bestsad)\
838 static const MV next_chkpts[6][3] =
840 {{ -2, 0}, { -1, -2}, {1, -2}},
841 {{ -1, -2}, {1, -2}, {2, 0}},
842 {{1, -2}, {2, 0}, {1, 2}},
843 {{2, 0}, {1, 2}, { -1, 2}},
844 {{1, 2}, { -1, 2}, { -2, 0}},
845 {{ -1, 2}, { -2, 0}, { -1, -2}}
857 const vp8_variance_fn_ptr_t *vfp,
863 MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
864 MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
867 unsigned char *what = (*(b->base_src) + b->src);
868 int what_stride = b->src_stride;
869 int in_what_stride = d->pre_stride;
872 unsigned int bestsad = 0x7fffffff;
873 unsigned int thissad;
874 unsigned char *base_offset;
875 unsigned char *this_offset;
881 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
882 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
884 // adjust ref_mv to make sure it is within MV range
885 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
886 br = ref_mv->as_mv.row;
887 bc = ref_mv->as_mv.col;
889 // Work out the start point for the search
890 base_offset = (unsigned char *)(*(d->base_pre) + d->pre);
891 this_offset = base_offset + (br * (d->pre_stride)) + bc;
892 this_mv.as_mv.row = br;
893 this_mv.as_mv.col = bc;
894 bestsad = vfp->sdf( what, what_stride, this_offset,
895 in_what_stride, 0x7fffffff)
896 + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
904 for (i = 0; i < 6; i++)
906 this_mv.as_mv.row = br + hex[i].row;
907 this_mv.as_mv.col = bc + hex[i].col;
908 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
909 thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
914 for (i = 0; i < 6; i++)
916 this_mv.as_mv.row = br + hex[i].row;
917 this_mv.as_mv.col = bc + hex[i].col;
919 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
920 thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
929 br += hex[best_site].row;
930 bc += hex[best_site].col;
934 for (j = 1; j < 127; j++)
941 for (i = 0; i < 3; i++)
943 this_mv.as_mv.row = br + next_chkpts[k][i].row;
944 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
945 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
946 thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
951 for (i = 0; i < 3; i++)
953 this_mv.as_mv.row = br + next_chkpts[k][i].row;
954 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
956 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
957 thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
966 br += next_chkpts[k][best_site].row;
967 bc += next_chkpts[k][best_site].col;
969 if (k >= 12) k -= 12;
970 else if (k >= 6) k -= 6;
974 // check 4 1-away neighbors
976 for (j = 0; j < 32; j++)
983 for (i = 0; i < 4; i++)
985 this_mv.as_mv.row = br + neighbors[i].row;
986 this_mv.as_mv.col = bc + neighbors[i].col;
987 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
988 thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
993 for (i = 0; i < 4; i++)
995 this_mv.as_mv.row = br + neighbors[i].row;
996 this_mv.as_mv.col = bc + neighbors[i].col;
998 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
999 thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
1004 if (best_site == -1)
1008 br += neighbors[best_site].row;
1009 bc += neighbors[best_site].col;
1013 best_mv->as_mv.row = br;
1014 best_mv->as_mv.col = bc;
1022 int vp8_diamond_search_sad
1032 vp8_variance_fn_ptr_t *fn_ptr,
1039 unsigned char *what = (*(b->base_src) + b->src);
1040 int what_stride = b->src_stride;
1041 unsigned char *in_what;
1042 int in_what_stride = d->pre_stride;
1043 unsigned char *best_address;
1048 int bestsad = INT_MAX;
1054 int this_row_offset;
1055 int this_col_offset;
1058 unsigned char *check_here;
1061 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1063 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1064 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1066 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1067 ref_row = ref_mv->as_mv.row;
1068 ref_col = ref_mv->as_mv.col;
1070 best_mv->as_mv.row = ref_row;
1071 best_mv->as_mv.col = ref_col;
1073 // Work out the start point for the search
1074 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
1075 best_address = in_what;
1077 // Check the starting position
1078 bestsad = fn_ptr->sdf(what, what_stride, in_what,
1079 in_what_stride, 0x7fffffff)
1080 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1082 // search_param determines the length of the initial step and hence the number of iterations
1083 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1084 ss = &x->ss[search_param * x->searches_per_step];
1085 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1089 for (step = 0; step < tot_steps ; step++)
1091 for (j = 0 ; j < x->searches_per_step ; j++)
1093 // Trap illegal vectors
1094 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1095 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1097 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1098 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1101 check_here = ss[i].offset + best_address;
1102 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1104 if (thissad < bestsad)
1106 this_mv.as_mv.row = this_row_offset;
1107 this_mv.as_mv.col = this_col_offset;
1108 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1109 mvsadcost, sad_per_bit);
1111 if (thissad < bestsad)
1122 if (best_site != last_site)
1124 best_mv->as_mv.row += ss[best_site].mv.row;
1125 best_mv->as_mv.col += ss[best_site].mv.col;
1126 best_address += ss[best_site].offset;
1127 last_site = best_site;
1129 else if (best_address == in_what)
1133 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1134 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1136 if (bestsad == INT_MAX)
1139 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1140 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1143 int vp8_diamond_search_sadx4
1153 vp8_variance_fn_ptr_t *fn_ptr,
1160 unsigned char *what = (*(b->base_src) + b->src);
1161 int what_stride = b->src_stride;
1162 unsigned char *in_what;
1163 int in_what_stride = d->pre_stride;
1164 unsigned char *best_address;
1169 int bestsad = INT_MAX;
1175 int this_row_offset;
1176 int this_col_offset;
1179 unsigned char *check_here;
1180 unsigned int thissad;
1182 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1184 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1185 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1187 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1188 ref_row = ref_mv->as_mv.row;
1189 ref_col = ref_mv->as_mv.col;
1191 best_mv->as_mv.row = ref_row;
1192 best_mv->as_mv.col = ref_col;
1194 // Work out the start point for the search
1195 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
1196 best_address = in_what;
1198 // Check the starting position
1199 bestsad = fn_ptr->sdf(what, what_stride,
1200 in_what, in_what_stride, 0x7fffffff)
1201 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1203 // search_param determines the length of the initial step and hence the number of iterations
1204 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1205 ss = &x->ss[search_param * x->searches_per_step];
1206 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1210 for (step = 0; step < tot_steps ; step++)
1214 // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
1215 // checking 4 bounds for each points.
1216 all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
1217 all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
1218 all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
1219 all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
1223 unsigned int sad_array[4];
1225 for (j = 0 ; j < x->searches_per_step ; j += 4)
1227 unsigned char *block_offset[4];
1229 for (t = 0; t < 4; t++)
1230 block_offset[t] = ss[i+t].offset + best_address;
1232 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1234 for (t = 0; t < 4; t++, i++)
1236 if (sad_array[t] < bestsad)
1238 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1239 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1240 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1241 mvsadcost, sad_per_bit);
1243 if (sad_array[t] < bestsad)
1245 bestsad = sad_array[t];
1254 for (j = 0 ; j < x->searches_per_step ; j++)
1256 // Trap illegal vectors
1257 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1258 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1260 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1261 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1263 check_here = ss[i].offset + best_address;
1264 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1266 if (thissad < bestsad)
1268 this_mv.as_mv.row = this_row_offset;
1269 this_mv.as_mv.col = this_col_offset;
1270 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1271 mvsadcost, sad_per_bit);
1273 if (thissad < bestsad)
1284 if (best_site != last_site)
1286 best_mv->as_mv.row += ss[best_site].mv.row;
1287 best_mv->as_mv.col += ss[best_site].mv.col;
1288 best_address += ss[best_site].offset;
1289 last_site = best_site;
1291 else if (best_address == in_what)
1295 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1296 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1298 if (bestsad == INT_MAX)
1301 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1302 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1305 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1306 int sad_per_bit, int distance,
1307 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1310 unsigned char *what = (*(b->base_src) + b->src);
1311 int what_stride = b->src_stride;
1312 unsigned char *in_what;
1313 int in_what_stride = d->pre_stride;
1314 int mv_stride = d->pre_stride;
1315 unsigned char *bestaddress;
1316 int_mv *best_mv = &d->bmi.mv;
1318 int bestsad = INT_MAX;
1321 unsigned char *check_here;
1324 int ref_row = ref_mv->as_mv.row;
1325 int ref_col = ref_mv->as_mv.col;
1327 int row_min = ref_row - distance;
1328 int row_max = ref_row + distance;
1329 int col_min = ref_col - distance;
1330 int col_max = ref_col + distance;
1332 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1334 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1335 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1337 // Work out the mid point for the search
1338 in_what = *(d->base_pre) + d->pre;
1339 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1341 best_mv->as_mv.row = ref_row;
1342 best_mv->as_mv.col = ref_col;
1344 // Baseline value at the centre
1345 bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1346 in_what_stride, 0x7fffffff)
1347 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1349 // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1350 if (col_min < x->mv_col_min)
1351 col_min = x->mv_col_min;
1353 if (col_max > x->mv_col_max)
1354 col_max = x->mv_col_max;
1356 if (row_min < x->mv_row_min)
1357 row_min = x->mv_row_min;
1359 if (row_max > x->mv_row_max)
1360 row_max = x->mv_row_max;
1362 for (r = row_min; r < row_max ; r++)
1364 this_mv.as_mv.row = r;
1365 check_here = r * mv_stride + in_what + col_min;
1367 for (c = col_min; c < col_max; c++)
1369 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1371 this_mv.as_mv.col = c;
1372 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1373 mvsadcost, sad_per_bit);
1375 if (thissad < bestsad)
1378 best_mv->as_mv.row = r;
1379 best_mv->as_mv.col = c;
1380 bestaddress = check_here;
1387 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1388 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1390 if (bestsad < INT_MAX)
1391 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1392 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1397 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1398 int sad_per_bit, int distance,
1399 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1402 unsigned char *what = (*(b->base_src) + b->src);
1403 int what_stride = b->src_stride;
1404 unsigned char *in_what;
1405 int in_what_stride = d->pre_stride;
1406 int mv_stride = d->pre_stride;
1407 unsigned char *bestaddress;
1408 int_mv *best_mv = &d->bmi.mv;
1410 int bestsad = INT_MAX;
1413 unsigned char *check_here;
1414 unsigned int thissad;
1416 int ref_row = ref_mv->as_mv.row;
1417 int ref_col = ref_mv->as_mv.col;
1419 int row_min = ref_row - distance;
1420 int row_max = ref_row + distance;
1421 int col_min = ref_col - distance;
1422 int col_max = ref_col + distance;
1424 unsigned int sad_array[3];
1426 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1428 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1429 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1431 // Work out the mid point for the search
1432 in_what = *(d->base_pre) + d->pre;
1433 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1435 best_mv->as_mv.row = ref_row;
1436 best_mv->as_mv.col = ref_col;
1438 // Baseline value at the centre
1439 bestsad = fn_ptr->sdf(what, what_stride,
1440 bestaddress, in_what_stride, 0x7fffffff)
1441 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1443 // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1444 if (col_min < x->mv_col_min)
1445 col_min = x->mv_col_min;
1447 if (col_max > x->mv_col_max)
1448 col_max = x->mv_col_max;
1450 if (row_min < x->mv_row_min)
1451 row_min = x->mv_row_min;
1453 if (row_max > x->mv_row_max)
1454 row_max = x->mv_row_max;
1456 for (r = row_min; r < row_max ; r++)
1458 this_mv.as_mv.row = r;
1459 check_here = r * mv_stride + in_what + col_min;
1462 while ((c + 2) < col_max)
1466 fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1468 for (i = 0; i < 3; i++)
1470 thissad = sad_array[i];
1472 if (thissad < bestsad)
1474 this_mv.as_mv.col = c;
1475 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1476 mvsadcost, sad_per_bit);
1478 if (thissad < bestsad)
1481 best_mv->as_mv.row = r;
1482 best_mv->as_mv.col = c;
1483 bestaddress = check_here;
1494 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1496 if (thissad < bestsad)
1498 this_mv.as_mv.col = c;
1499 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1500 mvsadcost, sad_per_bit);
1502 if (thissad < bestsad)
1505 best_mv->as_mv.row = r;
1506 best_mv->as_mv.col = c;
1507 bestaddress = check_here;
1517 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1518 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1520 if (bestsad < INT_MAX)
1521 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1522 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1527 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1528 int sad_per_bit, int distance,
1529 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1532 unsigned char *what = (*(b->base_src) + b->src);
1533 int what_stride = b->src_stride;
1534 unsigned char *in_what;
1535 int in_what_stride = d->pre_stride;
1536 int mv_stride = d->pre_stride;
1537 unsigned char *bestaddress;
1538 int_mv *best_mv = &d->bmi.mv;
1540 int bestsad = INT_MAX;
1543 unsigned char *check_here;
1544 unsigned int thissad;
1546 int ref_row = ref_mv->as_mv.row;
1547 int ref_col = ref_mv->as_mv.col;
1549 int row_min = ref_row - distance;
1550 int row_max = ref_row + distance;
1551 int col_min = ref_col - distance;
1552 int col_max = ref_col + distance;
1554 DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
1555 unsigned int sad_array[3];
1557 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1559 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1560 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1562 // Work out the mid point for the search
1563 in_what = *(d->base_pre) + d->pre;
1564 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1566 best_mv->as_mv.row = ref_row;
1567 best_mv->as_mv.col = ref_col;
1569 // Baseline value at the centre
1570 bestsad = fn_ptr->sdf(what, what_stride,
1571 bestaddress, in_what_stride, 0x7fffffff)
1572 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1574 // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1575 if (col_min < x->mv_col_min)
1576 col_min = x->mv_col_min;
1578 if (col_max > x->mv_col_max)
1579 col_max = x->mv_col_max;
1581 if (row_min < x->mv_row_min)
1582 row_min = x->mv_row_min;
1584 if (row_max > x->mv_row_max)
1585 row_max = x->mv_row_max;
1587 for (r = row_min; r < row_max ; r++)
1589 this_mv.as_mv.row = r;
1590 check_here = r * mv_stride + in_what + col_min;
1593 while ((c + 7) < col_max)
1597 fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
1599 for (i = 0; i < 8; i++)
1601 thissad = (unsigned int)sad_array8[i];
1603 if (thissad < bestsad)
1605 this_mv.as_mv.col = c;
1606 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1607 mvsadcost, sad_per_bit);
1609 if (thissad < bestsad)
1612 best_mv->as_mv.row = r;
1613 best_mv->as_mv.col = c;
1614 bestaddress = check_here;
1623 while ((c + 2) < col_max)
1627 fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1629 for (i = 0; i < 3; i++)
1631 thissad = sad_array[i];
1633 if (thissad < bestsad)
1635 this_mv.as_mv.col = c;
1636 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1637 mvsadcost, sad_per_bit);
1639 if (thissad < bestsad)
1642 best_mv->as_mv.row = r;
1643 best_mv->as_mv.col = c;
1644 bestaddress = check_here;
1655 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1657 if (thissad < bestsad)
1659 this_mv.as_mv.col = c;
1660 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1661 mvsadcost, sad_per_bit);
1663 if (thissad < bestsad)
1666 best_mv->as_mv.row = r;
1667 best_mv->as_mv.col = c;
1668 bestaddress = check_here;
1677 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1678 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1680 if (bestsad < INT_MAX)
1681 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1682 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1687 int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1688 int error_per_bit, int search_range,
1689 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1692 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1694 short this_row_offset, this_col_offset;
1696 int what_stride = b->src_stride;
1697 int in_what_stride = d->pre_stride;
1698 unsigned char *what = (*(b->base_src) + b->src);
1699 unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre +
1700 (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col);
1701 unsigned char *check_here;
1702 unsigned int thissad;
1704 unsigned int bestsad = INT_MAX;
1706 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1709 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1710 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1712 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1714 for (i=0; i<search_range; i++)
1718 for (j = 0 ; j < 4 ; j++)
1720 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1721 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1723 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1724 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1726 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1727 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1729 if (thissad < bestsad)
1731 this_mv.as_mv.row = this_row_offset;
1732 this_mv.as_mv.col = this_col_offset;
1733 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1735 if (thissad < bestsad)
1744 if (best_site == -1)
1748 ref_mv->as_mv.row += neighbors[best_site].row;
1749 ref_mv->as_mv.col += neighbors[best_site].col;
1750 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1754 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1755 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1757 if (bestsad < INT_MAX)
1758 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1759 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1764 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1765 int_mv *ref_mv, int error_per_bit,
1766 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1767 int *mvcost[2], int_mv *center_mv)
1769 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1771 short this_row_offset, this_col_offset;
1773 int what_stride = b->src_stride;
1774 int in_what_stride = d->pre_stride;
1775 unsigned char *what = (*(b->base_src) + b->src);
1776 unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre +
1777 (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col);
1778 unsigned char *check_here;
1779 unsigned int thissad;
1781 unsigned int bestsad = INT_MAX;
1783 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1786 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1787 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1789 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1791 for (i=0; i<search_range; i++)
1796 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1797 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1798 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1799 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1803 unsigned int sad_array[4];
1804 unsigned char *block_offset[4];
1805 block_offset[0] = best_address - in_what_stride;
1806 block_offset[1] = best_address - 1;
1807 block_offset[2] = best_address + 1;
1808 block_offset[3] = best_address + in_what_stride;
1810 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1812 for (j = 0; j < 4; j++)
1814 if (sad_array[j] < bestsad)
1816 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1817 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1818 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1820 if (sad_array[j] < bestsad)
1822 bestsad = sad_array[j];
1830 for (j = 0 ; j < 4 ; j++)
1832 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1833 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1835 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1836 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1838 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1839 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1841 if (thissad < bestsad)
1843 this_mv.as_mv.row = this_row_offset;
1844 this_mv.as_mv.col = this_col_offset;
1845 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1847 if (thissad < bestsad)
1857 if (best_site == -1)
1861 ref_mv->as_mv.row += neighbors[best_site].row;
1862 ref_mv->as_mv.col += neighbors[best_site].col;
1863 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1867 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1868 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1870 if (bestsad < INT_MAX)
1871 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1872 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1877 #ifdef ENTROPY_STATS
1878 void print_mode_context(void)
1880 FILE *f = fopen("modecont.c", "w");
1883 fprintf(f, "#include \"entropy.h\"\n");
1884 fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1887 for (j = 0; j < 6; j++)
1889 fprintf(f, " { // %d \n", j);
1892 for (i = 0; i < 4; i++)
1896 int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
1899 count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1902 overal_prob = 256 * mv_mode_cts[i][0] / count;
1906 if (overal_prob == 0)
1910 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1913 this_prob = 256 * mv_ref_ct[j][i][0] / count;
1920 fprintf(f, "%5d, ", this_prob);
1921 //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
1922 //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
1925 fprintf(f, " },\n");
1932 /* MV ref count ENTROPY_STATS stats code */
1933 #ifdef ENTROPY_STATS
1934 void init_mv_ref_counts()
1936 vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1937 vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1940 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
1944 ++mv_ref_ct [ct[0]] [0] [0];
1945 ++mv_mode_cts[0][0];
1949 ++mv_ref_ct [ct[0]] [0] [1];
1950 ++mv_mode_cts[0][1];
1954 ++mv_ref_ct [ct[1]] [1] [0];
1955 ++mv_mode_cts[1][0];
1959 ++mv_ref_ct [ct[1]] [1] [1];
1960 ++mv_mode_cts[1][1];
1964 ++mv_ref_ct [ct[2]] [2] [0];
1965 ++mv_mode_cts[2][0];
1969 ++mv_ref_ct [ct[2]] [2] [1];
1970 ++mv_mode_cts[2][1];
1974 ++mv_ref_ct [ct[3]] [3] [0];
1975 ++mv_mode_cts[3][0];
1979 ++mv_ref_ct [ct[3]] [3] [1];
1980 ++mv_mode_cts[3][1];
1987 #endif/* END MV ref count ENTROPY_STATS stats code */