2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
13 #include "vpx_mem/vpx_mem.h"
14 #include "vpx_ports/config.h"
20 static int mv_ref_ct [31] [4] [2];
21 static int mv_mode_cts [4] [2];
24 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
26 // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
27 // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
28 // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
29 // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
30 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
33 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
35 return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
36 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
37 * error_per_bit + 128) >> 8;
40 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
42 /* Calculate sad error cost on full pixel basis. */
43 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
44 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
45 * error_per_bit + 128) >> 8;
48 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
51 int search_site_count = 0;
54 // Generate offsets for 4 search sites per step.
56 x->ss[search_site_count].mv.col = 0;
57 x->ss[search_site_count].mv.row = 0;
58 x->ss[search_site_count].offset = 0;
64 // Compute offsets for search sites.
65 x->ss[search_site_count].mv.col = 0;
66 x->ss[search_site_count].mv.row = -Len;
67 x->ss[search_site_count].offset = -Len * stride;
70 // Compute offsets for search sites.
71 x->ss[search_site_count].mv.col = 0;
72 x->ss[search_site_count].mv.row = Len;
73 x->ss[search_site_count].offset = Len * stride;
76 // Compute offsets for search sites.
77 x->ss[search_site_count].mv.col = -Len;
78 x->ss[search_site_count].mv.row = 0;
79 x->ss[search_site_count].offset = -Len;
82 // Compute offsets for search sites.
83 x->ss[search_site_count].mv.col = Len;
84 x->ss[search_site_count].mv.row = 0;
85 x->ss[search_site_count].offset = Len;
92 x->ss_count = search_site_count;
93 x->searches_per_step = 4;
96 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
99 int search_site_count = 0;
101 // Generate offsets for 8 search sites per step.
102 Len = MAX_FIRST_STEP;
103 x->ss[search_site_count].mv.col = 0;
104 x->ss[search_site_count].mv.row = 0;
105 x->ss[search_site_count].offset = 0;
111 // Compute offsets for search sites.
112 x->ss[search_site_count].mv.col = 0;
113 x->ss[search_site_count].mv.row = -Len;
114 x->ss[search_site_count].offset = -Len * stride;
117 // Compute offsets for search sites.
118 x->ss[search_site_count].mv.col = 0;
119 x->ss[search_site_count].mv.row = Len;
120 x->ss[search_site_count].offset = Len * stride;
123 // Compute offsets for search sites.
124 x->ss[search_site_count].mv.col = -Len;
125 x->ss[search_site_count].mv.row = 0;
126 x->ss[search_site_count].offset = -Len;
129 // Compute offsets for search sites.
130 x->ss[search_site_count].mv.col = Len;
131 x->ss[search_site_count].mv.row = 0;
132 x->ss[search_site_count].offset = Len;
135 // Compute offsets for search sites.
136 x->ss[search_site_count].mv.col = -Len;
137 x->ss[search_site_count].mv.row = -Len;
138 x->ss[search_site_count].offset = -Len * stride - Len;
141 // Compute offsets for search sites.
142 x->ss[search_site_count].mv.col = Len;
143 x->ss[search_site_count].mv.row = -Len;
144 x->ss[search_site_count].offset = -Len * stride + Len;
147 // Compute offsets for search sites.
148 x->ss[search_site_count].mv.col = -Len;
149 x->ss[search_site_count].mv.row = Len;
150 x->ss[search_site_count].offset = Len * stride - Len;
153 // Compute offsets for search sites.
154 x->ss[search_site_count].mv.col = Len;
155 x->ss[search_site_count].mv.row = Len;
156 x->ss[search_site_count].offset = Len * stride + Len;
164 x->ss_count = search_site_count;
165 x->searches_per_step = 8;
169 * To avoid the penalty for crossing cache-line read, preload the reference
170 * area in a small buffer, which is aligned to make sure there won't be crossing
171 * cache-line read while reading from this buffer. This reduced the cpu
172 * cycles spent on reading ref data in sub-pixel filter functions.
173 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
174 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
175 * could reduce the area.
177 #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
178 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
179 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
180 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
181 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
182 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
183 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
184 #define MIN(x,y) (((x)<(y))?(x):(y))
185 #define MAX(x,y) (((x)>(y))?(x):(y))
187 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
188 int_mv *bestmv, int_mv *ref_mv,
190 const vp8_variance_fn_ptr_t *vfp,
191 int *mvcost[2], int *distortion,
194 unsigned char *z = (*(b->base_src) + b->src);
196 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
197 int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
198 int tr = br, tc = bc;
199 unsigned int besterr = INT_MAX;
200 unsigned int left, right, up, down, diag;
202 unsigned int whichdir;
203 unsigned int halfiters = 4;
204 unsigned int quarteriters = 4;
207 int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
208 int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
209 int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
210 int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
215 #if ARCH_X86 || ARCH_X86_64
216 MACROBLOCKD *xd = &x->e_mbd;
217 unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
219 int buf_r1, buf_r2, buf_c1, buf_c2;
221 // Clamping to avoid out-of-range data access
222 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
223 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
224 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
225 buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3;
228 /* Copy to intermediate buffer before searching. */
229 vfp->copymem(y0 - buf_c1 - d->pre_stride*buf_r1, d->pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
230 y = xd->y_buf + y_stride*buf_r1 +buf_c1;
232 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
233 y_stride = d->pre_stride;
236 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
239 bestmv->as_mv.row <<= 3;
240 bestmv->as_mv.col <<= 3;
242 // calculate central point error
243 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
244 *distortion = besterr;
245 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
247 // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
251 CHECK_BETTER(left, tr, tc - 2);
252 CHECK_BETTER(right, tr, tc + 2);
253 CHECK_BETTER(up, tr - 2, tc);
254 CHECK_BETTER(down, tr + 2, tc);
256 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
261 CHECK_BETTER(diag, tr - 2, tc - 2);
264 CHECK_BETTER(diag, tr - 2, tc + 2);
267 CHECK_BETTER(diag, tr + 2, tc - 2);
270 CHECK_BETTER(diag, tr + 2, tc + 2);
274 // no reason to check the same one again.
275 if (tr == br && tc == bc)
282 // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
284 while (--quarteriters)
286 CHECK_BETTER(left, tr, tc - 1);
287 CHECK_BETTER(right, tr, tc + 1);
288 CHECK_BETTER(up, tr - 1, tc);
289 CHECK_BETTER(down, tr + 1, tc);
291 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
296 CHECK_BETTER(diag, tr - 1, tc - 1);
299 CHECK_BETTER(diag, tr - 1, tc + 1);
302 CHECK_BETTER(diag, tr + 1, tc - 1);
305 CHECK_BETTER(diag, tr + 1, tc + 1);
309 // no reason to check the same one again.
310 if (tr == br && tc == bc)
317 bestmv->as_mv.row = br << 1;
318 bestmv->as_mv.col = bc << 1;
320 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
321 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
335 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
336 int_mv *bestmv, int_mv *ref_mv,
338 const vp8_variance_fn_ptr_t *vfp,
339 int *mvcost[2], int *distortion,
342 int bestmse = INT_MAX;
345 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
346 unsigned char *z = (*(b->base_src) + b->src);
347 int left, right, up, down, diag;
353 bestmv->as_mv.row <<= 3;
354 bestmv->as_mv.col <<= 3;
357 // calculate central point error
358 bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
359 *distortion = bestmse;
360 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
362 // go left then right and check error
363 this_mv.as_mv.row = startmv.as_mv.row;
364 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
365 thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
366 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
372 *distortion = thismse;
376 this_mv.as_mv.col += 8;
377 thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
378 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
384 *distortion = thismse;
388 // go up then down and check error
389 this_mv.as_mv.col = startmv.as_mv.col;
390 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
391 thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
392 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
398 *distortion = thismse;
402 this_mv.as_mv.row += 8;
403 thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
404 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
410 *distortion = thismse;
415 // now check 1 more diagonal
416 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
417 //for(whichdir =0;whichdir<4;whichdir++)
424 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
425 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
426 thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
429 this_mv.as_mv.col += 4;
430 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
431 thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
434 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
435 this_mv.as_mv.row += 4;
436 thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
440 this_mv.as_mv.col += 4;
441 this_mv.as_mv.row += 4;
442 thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
446 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
452 *distortion = thismse;
459 // time to check quarter pels.
460 if (bestmv->as_mv.row < startmv.as_mv.row)
463 if (bestmv->as_mv.col < startmv.as_mv.col)
470 // go left then right and check error
471 this_mv.as_mv.row = startmv.as_mv.row;
473 if (startmv.as_mv.col & 7)
475 this_mv.as_mv.col = startmv.as_mv.col - 2;
476 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
480 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
481 thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
484 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
490 *distortion = thismse;
494 this_mv.as_mv.col += 4;
495 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
496 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
502 *distortion = thismse;
506 // go up then down and check error
507 this_mv.as_mv.col = startmv.as_mv.col;
509 if (startmv.as_mv.row & 7)
511 this_mv.as_mv.row = startmv.as_mv.row - 2;
512 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
516 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
517 thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
520 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
526 *distortion = thismse;
530 this_mv.as_mv.row += 4;
531 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
532 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
538 *distortion = thismse;
543 // now check 1 more diagonal
544 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
546 // for(whichdir=0;whichdir<4;whichdir++)
554 if (startmv.as_mv.row & 7)
556 this_mv.as_mv.row -= 2;
558 if (startmv.as_mv.col & 7)
560 this_mv.as_mv.col -= 2;
561 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
565 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
566 thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
571 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
573 if (startmv.as_mv.col & 7)
575 this_mv.as_mv.col -= 2;
576 thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
580 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
581 thismse = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
587 this_mv.as_mv.col += 2;
589 if (startmv.as_mv.row & 7)
591 this_mv.as_mv.row -= 2;
592 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
596 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
597 thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
602 this_mv.as_mv.row += 2;
604 if (startmv.as_mv.col & 7)
606 this_mv.as_mv.col -= 2;
607 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
611 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
612 thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
617 this_mv.as_mv.col += 2;
618 this_mv.as_mv.row += 2;
619 thismse = vfp->svf(y, d->pre_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
623 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
629 *distortion = thismse;
636 int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
637 int_mv *bestmv, int_mv *ref_mv,
639 const vp8_variance_fn_ptr_t *vfp,
640 int *mvcost[2], int *distortion,
643 int bestmse = INT_MAX;
646 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
647 unsigned char *z = (*(b->base_src) + b->src);
648 int left, right, up, down, diag;
653 bestmv->as_mv.row <<= 3;
654 bestmv->as_mv.col <<= 3;
657 // calculate central point error
658 bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
659 *distortion = bestmse;
660 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
662 // go left then right and check error
663 this_mv.as_mv.row = startmv.as_mv.row;
664 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
665 thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
666 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
672 *distortion = thismse;
676 this_mv.as_mv.col += 8;
677 thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
678 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
684 *distortion = thismse;
688 // go up then down and check error
689 this_mv.as_mv.col = startmv.as_mv.col;
690 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
691 thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
692 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
698 *distortion = thismse;
702 this_mv.as_mv.row += 8;
703 thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
704 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
710 *distortion = thismse;
714 // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
716 // now check 1 more diagonal -
717 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
723 this_mv.col = (this_mv.col - 8) | 4;
724 this_mv.row = (this_mv.row - 8) | 4;
725 diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
729 this_mv.row = (this_mv.row - 8) | 4;
730 diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
733 this_mv.col = (this_mv.col - 8) | 4;
735 diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
740 diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
744 diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
753 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
754 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
755 thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
756 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
762 *distortion = thismse;
766 this_mv.as_mv.col += 8;
767 thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
768 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
774 *distortion = thismse;
778 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
779 this_mv.as_mv.row = startmv.as_mv.row + 4;
780 thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
781 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
787 *distortion = thismse;
791 this_mv.as_mv.col += 8;
792 thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
793 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
799 *distortion = thismse;
807 #define CHECK_BOUNDS(range) \
810 all_in &= ((br-range) >= x->mv_row_min);\
811 all_in &= ((br+range) <= x->mv_row_max);\
812 all_in &= ((bc-range) >= x->mv_col_min);\
813 all_in &= ((bc+range) <= x->mv_col_max);\
816 #define CHECK_POINT \
818 if (this_mv.as_mv.col < x->mv_col_min) continue;\
819 if (this_mv.as_mv.col > x->mv_col_max) continue;\
820 if (this_mv.as_mv.row < x->mv_row_min) continue;\
821 if (this_mv.as_mv.row > x->mv_row_max) continue;\
824 #define CHECK_BETTER \
826 if (thissad < bestsad)\
828 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
829 if (thissad < bestsad)\
837 static const MV next_chkpts[6][3] =
839 {{ -2, 0}, { -1, -2}, {1, -2}},
840 {{ -1, -2}, {1, -2}, {2, 0}},
841 {{1, -2}, {2, 0}, {1, 2}},
842 {{2, 0}, {1, 2}, { -1, 2}},
843 {{1, 2}, { -1, 2}, { -2, 0}},
844 {{ -1, 2}, { -2, 0}, { -1, -2}}
856 const vp8_variance_fn_ptr_t *vfp,
862 MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
863 MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
866 unsigned char *what = (*(b->base_src) + b->src);
867 int what_stride = b->src_stride;
868 int in_what_stride = d->pre_stride;
869 int br = ref_mv->as_mv.row, bc = ref_mv->as_mv.col;
871 unsigned int bestsad = 0x7fffffff;
872 unsigned int thissad;
873 unsigned char *base_offset;
874 unsigned char *this_offset;
880 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
881 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
883 // Work out the start point for the search
884 base_offset = (unsigned char *)(*(d->base_pre) + d->pre);
885 this_offset = base_offset + (br * (d->pre_stride)) + bc;
886 this_mv.as_mv.row = br;
887 this_mv.as_mv.col = bc;
888 bestsad = vfp->sdf( what, what_stride, this_offset,
889 in_what_stride, 0x7fffffff)
890 + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
898 for (i = 0; i < 6; i++)
900 this_mv.as_mv.row = br + hex[i].row;
901 this_mv.as_mv.col = bc + hex[i].col;
902 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
903 thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
908 for (i = 0; i < 6; i++)
910 this_mv.as_mv.row = br + hex[i].row;
911 this_mv.as_mv.col = bc + hex[i].col;
913 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
914 thissad=vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
923 br += hex[best_site].row;
924 bc += hex[best_site].col;
928 for (j = 1; j < 127; j++)
935 for (i = 0; i < 3; i++)
937 this_mv.as_mv.row = br + next_chkpts[k][i].row;
938 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
939 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
940 thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
945 for (i = 0; i < 3; i++)
947 this_mv.as_mv.row = br + next_chkpts[k][i].row;
948 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
950 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
951 thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
960 br += next_chkpts[k][best_site].row;
961 bc += next_chkpts[k][best_site].col;
963 if (k >= 12) k -= 12;
964 else if (k >= 6) k -= 6;
968 // check 4 1-away neighbors
970 for (j = 0; j < 32; j++)
977 for (i = 0; i < 4; i++)
979 this_mv.as_mv.row = br + neighbors[i].row;
980 this_mv.as_mv.col = bc + neighbors[i].col;
981 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
982 thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
987 for (i = 0; i < 4; i++)
989 this_mv.as_mv.row = br + neighbors[i].row;
990 this_mv.as_mv.col = bc + neighbors[i].col;
992 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
993 thissad = vfp->sdf( what, what_stride, this_offset, in_what_stride, bestsad);
1002 br += neighbors[best_site].row;
1003 bc += neighbors[best_site].col;
1007 best_mv->as_mv.row = br;
1008 best_mv->as_mv.col = bc;
1016 int vp8_diamond_search_sad
1026 vp8_variance_fn_ptr_t *fn_ptr,
1033 unsigned char *what = (*(b->base_src) + b->src);
1034 int what_stride = b->src_stride;
1035 unsigned char *in_what;
1036 int in_what_stride = d->pre_stride;
1037 unsigned char *best_address;
1042 int bestsad = INT_MAX;
1046 int ref_row = ref_mv->as_mv.row;
1047 int ref_col = ref_mv->as_mv.col;
1048 int this_row_offset;
1049 int this_col_offset;
1052 unsigned char *check_here;
1055 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1057 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1058 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1062 best_mv->as_mv.row = ref_row;
1063 best_mv->as_mv.col = ref_col;
1065 // Work out the start point for the search
1066 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
1067 best_address = in_what;
1069 // Check the starting position
1070 bestsad = fn_ptr->sdf(what, what_stride, in_what,
1071 in_what_stride, 0x7fffffff)
1072 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1074 // search_param determines the length of the initial step and hence the number of iterations
1075 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1076 ss = &x->ss[search_param * x->searches_per_step];
1077 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1081 for (step = 0; step < tot_steps ; step++)
1083 for (j = 0 ; j < x->searches_per_step ; j++)
1085 // Trap illegal vectors
1086 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1087 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1089 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1090 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1093 check_here = ss[i].offset + best_address;
1094 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1096 if (thissad < bestsad)
1098 this_mv.as_mv.row = this_row_offset;
1099 this_mv.as_mv.col = this_col_offset;
1100 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1101 mvsadcost, sad_per_bit);
1103 if (thissad < bestsad)
1114 if (best_site != last_site)
1116 best_mv->as_mv.row += ss[best_site].mv.row;
1117 best_mv->as_mv.col += ss[best_site].mv.col;
1118 best_address += ss[best_site].offset;
1119 last_site = best_site;
1121 else if (best_address == in_what)
1125 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1126 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1128 if (bestsad == INT_MAX)
1131 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1132 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1135 int vp8_diamond_search_sadx4
1145 vp8_variance_fn_ptr_t *fn_ptr,
1152 unsigned char *what = (*(b->base_src) + b->src);
1153 int what_stride = b->src_stride;
1154 unsigned char *in_what;
1155 int in_what_stride = d->pre_stride;
1156 unsigned char *best_address;
1161 int bestsad = INT_MAX;
1165 int ref_row = ref_mv->as_mv.row;
1166 int ref_col = ref_mv->as_mv.col;
1167 int this_row_offset;
1168 int this_col_offset;
1171 unsigned char *check_here;
1172 unsigned int thissad;
1174 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1176 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1177 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1180 best_mv->as_mv.row = ref_row;
1181 best_mv->as_mv.col = ref_col;
1183 // Work out the start point for the search
1184 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
1185 best_address = in_what;
1187 // Check the starting position
1188 bestsad = fn_ptr->sdf(what, what_stride,
1189 in_what, in_what_stride, 0x7fffffff)
1190 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1192 // search_param determines the length of the initial step and hence the number of iterations
1193 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1194 ss = &x->ss[search_param * x->searches_per_step];
1195 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1199 for (step = 0; step < tot_steps ; step++)
1203 // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
1204 // checking 4 bounds for each points.
1205 all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
1206 all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
1207 all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
1208 all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
1212 unsigned int sad_array[4];
1214 for (j = 0 ; j < x->searches_per_step ; j += 4)
1216 unsigned char *block_offset[4];
1218 for (t = 0; t < 4; t++)
1219 block_offset[t] = ss[i+t].offset + best_address;
1221 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1223 for (t = 0; t < 4; t++, i++)
1225 if (sad_array[t] < bestsad)
1227 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1228 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1229 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1230 mvsadcost, sad_per_bit);
1232 if (sad_array[t] < bestsad)
1234 bestsad = sad_array[t];
1243 for (j = 0 ; j < x->searches_per_step ; j++)
1245 // Trap illegal vectors
1246 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1247 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1249 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1250 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1252 check_here = ss[i].offset + best_address;
1253 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1255 if (thissad < bestsad)
1257 this_mv.as_mv.row = this_row_offset;
1258 this_mv.as_mv.col = this_col_offset;
1259 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1260 mvsadcost, sad_per_bit);
1262 if (thissad < bestsad)
1273 if (best_site != last_site)
1275 best_mv->as_mv.row += ss[best_site].mv.row;
1276 best_mv->as_mv.col += ss[best_site].mv.col;
1277 best_address += ss[best_site].offset;
1278 last_site = best_site;
1280 else if (best_address == in_what)
1284 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1285 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1287 if (bestsad == INT_MAX)
1290 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1291 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1294 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1295 int sad_per_bit, int distance,
1296 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1299 unsigned char *what = (*(b->base_src) + b->src);
1300 int what_stride = b->src_stride;
1301 unsigned char *in_what;
1302 int in_what_stride = d->pre_stride;
1303 int mv_stride = d->pre_stride;
1304 unsigned char *bestaddress;
1305 int_mv *best_mv = &d->bmi.mv;
1307 int bestsad = INT_MAX;
1310 unsigned char *check_here;
1313 int ref_row = ref_mv->as_mv.row;
1314 int ref_col = ref_mv->as_mv.col;
1316 int row_min = ref_row - distance;
1317 int row_max = ref_row + distance;
1318 int col_min = ref_col - distance;
1319 int col_max = ref_col + distance;
1321 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1323 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1324 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1326 // Work out the mid point for the search
1327 in_what = *(d->base_pre) + d->pre;
1328 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1330 best_mv->as_mv.row = ref_row;
1331 best_mv->as_mv.col = ref_col;
1333 // Baseline value at the centre
1334 bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1335 in_what_stride, 0x7fffffff)
1336 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1338 // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1339 if (col_min < x->mv_col_min)
1340 col_min = x->mv_col_min;
1342 if (col_max > x->mv_col_max)
1343 col_max = x->mv_col_max;
1345 if (row_min < x->mv_row_min)
1346 row_min = x->mv_row_min;
1348 if (row_max > x->mv_row_max)
1349 row_max = x->mv_row_max;
1351 for (r = row_min; r < row_max ; r++)
1353 this_mv.as_mv.row = r;
1354 check_here = r * mv_stride + in_what + col_min;
1356 for (c = col_min; c < col_max; c++)
1358 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1360 this_mv.as_mv.col = c;
1361 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1362 mvsadcost, sad_per_bit);
1364 if (thissad < bestsad)
1367 best_mv->as_mv.row = r;
1368 best_mv->as_mv.col = c;
1369 bestaddress = check_here;
1376 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1377 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1379 if (bestsad < INT_MAX)
1380 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1381 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1386 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1387 int sad_per_bit, int distance,
1388 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1391 unsigned char *what = (*(b->base_src) + b->src);
1392 int what_stride = b->src_stride;
1393 unsigned char *in_what;
1394 int in_what_stride = d->pre_stride;
1395 int mv_stride = d->pre_stride;
1396 unsigned char *bestaddress;
1397 int_mv *best_mv = &d->bmi.mv;
1399 int bestsad = INT_MAX;
1402 unsigned char *check_here;
1403 unsigned int thissad;
1405 int ref_row = ref_mv->as_mv.row;
1406 int ref_col = ref_mv->as_mv.col;
1408 int row_min = ref_row - distance;
1409 int row_max = ref_row + distance;
1410 int col_min = ref_col - distance;
1411 int col_max = ref_col + distance;
1413 unsigned int sad_array[3];
1415 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1417 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1418 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1420 // Work out the mid point for the search
1421 in_what = *(d->base_pre) + d->pre;
1422 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1424 best_mv->as_mv.row = ref_row;
1425 best_mv->as_mv.col = ref_col;
1427 // Baseline value at the centre
1428 bestsad = fn_ptr->sdf(what, what_stride,
1429 bestaddress, in_what_stride, 0x7fffffff)
1430 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1432 // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1433 if (col_min < x->mv_col_min)
1434 col_min = x->mv_col_min;
1436 if (col_max > x->mv_col_max)
1437 col_max = x->mv_col_max;
1439 if (row_min < x->mv_row_min)
1440 row_min = x->mv_row_min;
1442 if (row_max > x->mv_row_max)
1443 row_max = x->mv_row_max;
1445 for (r = row_min; r < row_max ; r++)
1447 this_mv.as_mv.row = r;
1448 check_here = r * mv_stride + in_what + col_min;
1451 while ((c + 2) < col_max)
1455 fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1457 for (i = 0; i < 3; i++)
1459 thissad = sad_array[i];
1461 if (thissad < bestsad)
1463 this_mv.as_mv.col = c;
1464 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1465 mvsadcost, sad_per_bit);
1467 if (thissad < bestsad)
1470 best_mv->as_mv.row = r;
1471 best_mv->as_mv.col = c;
1472 bestaddress = check_here;
1483 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1485 if (thissad < bestsad)
1487 this_mv.as_mv.col = c;
1488 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1489 mvsadcost, sad_per_bit);
1491 if (thissad < bestsad)
1494 best_mv->as_mv.row = r;
1495 best_mv->as_mv.col = c;
1496 bestaddress = check_here;
1506 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1507 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1509 if (bestsad < INT_MAX)
1510 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1511 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1516 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1517 int sad_per_bit, int distance,
1518 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1521 unsigned char *what = (*(b->base_src) + b->src);
1522 int what_stride = b->src_stride;
1523 unsigned char *in_what;
1524 int in_what_stride = d->pre_stride;
1525 int mv_stride = d->pre_stride;
1526 unsigned char *bestaddress;
1527 int_mv *best_mv = &d->bmi.mv;
1529 int bestsad = INT_MAX;
1532 unsigned char *check_here;
1533 unsigned int thissad;
1535 int ref_row = ref_mv->as_mv.row;
1536 int ref_col = ref_mv->as_mv.col;
1538 int row_min = ref_row - distance;
1539 int row_max = ref_row + distance;
1540 int col_min = ref_col - distance;
1541 int col_max = ref_col + distance;
1543 DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
1544 unsigned int sad_array[3];
1546 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1548 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1549 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1551 // Work out the mid point for the search
1552 in_what = *(d->base_pre) + d->pre;
1553 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1555 best_mv->as_mv.row = ref_row;
1556 best_mv->as_mv.col = ref_col;
1558 // Baseline value at the centre
1559 bestsad = fn_ptr->sdf(what, what_stride,
1560 bestaddress, in_what_stride, 0x7fffffff)
1561 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1563 // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1564 if (col_min < x->mv_col_min)
1565 col_min = x->mv_col_min;
1567 if (col_max > x->mv_col_max)
1568 col_max = x->mv_col_max;
1570 if (row_min < x->mv_row_min)
1571 row_min = x->mv_row_min;
1573 if (row_max > x->mv_row_max)
1574 row_max = x->mv_row_max;
1576 for (r = row_min; r < row_max ; r++)
1578 this_mv.as_mv.row = r;
1579 check_here = r * mv_stride + in_what + col_min;
1582 while ((c + 7) < col_max)
1586 fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
1588 for (i = 0; i < 8; i++)
1590 thissad = (unsigned int)sad_array8[i];
1592 if (thissad < bestsad)
1594 this_mv.as_mv.col = c;
1595 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1596 mvsadcost, sad_per_bit);
1598 if (thissad < bestsad)
1601 best_mv->as_mv.row = r;
1602 best_mv->as_mv.col = c;
1603 bestaddress = check_here;
1612 while ((c + 2) < col_max)
1616 fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1618 for (i = 0; i < 3; i++)
1620 thissad = sad_array[i];
1622 if (thissad < bestsad)
1624 this_mv.as_mv.col = c;
1625 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1626 mvsadcost, sad_per_bit);
1628 if (thissad < bestsad)
1631 best_mv->as_mv.row = r;
1632 best_mv->as_mv.col = c;
1633 bestaddress = check_here;
1644 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1646 if (thissad < bestsad)
1648 this_mv.as_mv.col = c;
1649 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1650 mvsadcost, sad_per_bit);
1652 if (thissad < bestsad)
1655 best_mv->as_mv.row = r;
1656 best_mv->as_mv.col = c;
1657 bestaddress = check_here;
1666 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1667 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1669 if (bestsad < INT_MAX)
1670 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1671 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1676 int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1677 int error_per_bit, int search_range,
1678 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1681 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1683 short this_row_offset, this_col_offset;
1685 int what_stride = b->src_stride;
1686 int in_what_stride = d->pre_stride;
1687 unsigned char *what = (*(b->base_src) + b->src);
1688 unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre +
1689 (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col);
1690 unsigned char *check_here;
1691 unsigned int thissad;
1693 unsigned int bestsad = INT_MAX;
1695 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1698 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1699 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1701 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1703 for (i=0; i<search_range; i++)
1707 for (j = 0 ; j < 4 ; j++)
1709 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1710 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1712 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1713 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1715 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1716 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1718 if (thissad < bestsad)
1720 this_mv.as_mv.row = this_row_offset;
1721 this_mv.as_mv.col = this_col_offset;
1722 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1724 if (thissad < bestsad)
1733 if (best_site == -1)
1737 ref_mv->as_mv.row += neighbors[best_site].row;
1738 ref_mv->as_mv.col += neighbors[best_site].col;
1739 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1743 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1744 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1746 if (bestsad < INT_MAX)
1747 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1748 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1753 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1754 int_mv *ref_mv, int error_per_bit,
1755 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1756 int *mvcost[2], int_mv *center_mv)
1758 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
1760 short this_row_offset, this_col_offset;
1762 int what_stride = b->src_stride;
1763 int in_what_stride = d->pre_stride;
1764 unsigned char *what = (*(b->base_src) + b->src);
1765 unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre +
1766 (ref_mv->as_mv.row * (d->pre_stride)) + ref_mv->as_mv.col);
1767 unsigned char *check_here;
1768 unsigned int thissad;
1770 unsigned int bestsad = INT_MAX;
1772 int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
1775 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1776 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1778 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1780 for (i=0; i<search_range; i++)
1785 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1786 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1787 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1788 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1792 unsigned int sad_array[4];
1793 unsigned char *block_offset[4];
1794 block_offset[0] = best_address - in_what_stride;
1795 block_offset[1] = best_address - 1;
1796 block_offset[2] = best_address + 1;
1797 block_offset[3] = best_address + in_what_stride;
1799 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1801 for (j = 0; j < 4; j++)
1803 if (sad_array[j] < bestsad)
1805 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1806 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1807 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1809 if (sad_array[j] < bestsad)
1811 bestsad = sad_array[j];
1819 for (j = 0 ; j < 4 ; j++)
1821 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1822 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1824 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1825 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1827 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
1828 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1830 if (thissad < bestsad)
1832 this_mv.as_mv.row = this_row_offset;
1833 this_mv.as_mv.col = this_col_offset;
1834 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1836 if (thissad < bestsad)
1846 if (best_site == -1)
1850 ref_mv->as_mv.row += neighbors[best_site].row;
1851 ref_mv->as_mv.col += neighbors[best_site].col;
1852 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
1856 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1857 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1859 if (bestsad < INT_MAX)
1860 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1861 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1866 #ifdef ENTROPY_STATS
1867 void print_mode_context(void)
1869 FILE *f = fopen("modecont.c", "w");
1872 fprintf(f, "#include \"entropy.h\"\n");
1873 fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1876 for (j = 0; j < 6; j++)
1878 fprintf(f, " { // %d \n", j);
1881 for (i = 0; i < 4; i++)
1885 int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
1888 count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1891 overal_prob = 256 * mv_mode_cts[i][0] / count;
1895 if (overal_prob == 0)
1899 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1902 this_prob = 256 * mv_ref_ct[j][i][0] / count;
1909 fprintf(f, "%5d, ", this_prob);
1910 //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
1911 //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
1914 fprintf(f, " },\n");
1921 /* MV ref count ENTROPY_STATS stats code */
1922 #ifdef ENTROPY_STATS
1923 void init_mv_ref_counts()
1925 vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1926 vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1929 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
1933 ++mv_ref_ct [ct[0]] [0] [0];
1934 ++mv_mode_cts[0][0];
1938 ++mv_ref_ct [ct[0]] [0] [1];
1939 ++mv_mode_cts[0][1];
1943 ++mv_ref_ct [ct[1]] [1] [0];
1944 ++mv_mode_cts[1][0];
1948 ++mv_ref_ct [ct[1]] [1] [1];
1949 ++mv_mode_cts[1][1];
1953 ++mv_ref_ct [ct[2]] [2] [0];
1954 ++mv_mode_cts[2][0];
1958 ++mv_ref_ct [ct[2]] [2] [1];
1959 ++mv_mode_cts[2][1];
1963 ++mv_ref_ct [ct[3]] [3] [0];
1964 ++mv_mode_cts[3][0];
1968 ++mv_ref_ct [ct[3]] [3] [1];
1969 ++mv_mode_cts[3][1];
1976 #endif/* END MV ref count ENTROPY_STATS stats code */