2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "./vp8_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpx_config.h"
20 #include "vp8/common/findnearmv.h"
21 #include "vp8/common/common.h"
22 #include "vpx_dsp/vpx_dsp_common.h"
24 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
25 /* MV costing is based on the distribution of vectors in the previous
26 * frame and as such will tend to over state the cost of vectors. In
27 * addition coding a new vector can have a knock on effect on the cost
28 * of subsequent vectors and the quality of prediction from NEAR and
29 * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
30 * limited extent, for some account to be taken of these factors.
32 const int mv_idx_row =
33 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
34 const int mv_idx_col =
35 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
36 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
39 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
41 /* Ignore mv costing if mvcost is NULL */
43 const int mv_idx_row =
44 clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
45 const int mv_idx_col =
46 clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
47 return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
54 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
56 /* Calculate sad error cost on full pixel basis. */
57 /* Ignore mv costing if mvsadcost is NULL */
59 return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
60 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
68 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
70 int search_site_count = 0;
72 /* Generate offsets for 4 search sites per step. */
74 x->ss[search_site_count].mv.col = 0;
75 x->ss[search_site_count].mv.row = 0;
76 x->ss[search_site_count].offset = 0;
80 /* Compute offsets for search sites. */
81 x->ss[search_site_count].mv.col = 0;
82 x->ss[search_site_count].mv.row = -Len;
83 x->ss[search_site_count].offset = -Len * stride;
86 /* Compute offsets for search sites. */
87 x->ss[search_site_count].mv.col = 0;
88 x->ss[search_site_count].mv.row = Len;
89 x->ss[search_site_count].offset = Len * stride;
92 /* Compute offsets for search sites. */
93 x->ss[search_site_count].mv.col = -Len;
94 x->ss[search_site_count].mv.row = 0;
95 x->ss[search_site_count].offset = -Len;
98 /* Compute offsets for search sites. */
99 x->ss[search_site_count].mv.col = Len;
100 x->ss[search_site_count].mv.row = 0;
101 x->ss[search_site_count].offset = Len;
108 x->ss_count = search_site_count;
109 x->searches_per_step = 4;
112 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
114 int search_site_count = 0;
116 /* Generate offsets for 8 search sites per step. */
117 Len = MAX_FIRST_STEP;
118 x->ss[search_site_count].mv.col = 0;
119 x->ss[search_site_count].mv.row = 0;
120 x->ss[search_site_count].offset = 0;
124 /* Compute offsets for search sites. */
125 x->ss[search_site_count].mv.col = 0;
126 x->ss[search_site_count].mv.row = -Len;
127 x->ss[search_site_count].offset = -Len * stride;
130 /* Compute offsets for search sites. */
131 x->ss[search_site_count].mv.col = 0;
132 x->ss[search_site_count].mv.row = Len;
133 x->ss[search_site_count].offset = Len * stride;
136 /* Compute offsets for search sites. */
137 x->ss[search_site_count].mv.col = -Len;
138 x->ss[search_site_count].mv.row = 0;
139 x->ss[search_site_count].offset = -Len;
142 /* Compute offsets for search sites. */
143 x->ss[search_site_count].mv.col = Len;
144 x->ss[search_site_count].mv.row = 0;
145 x->ss[search_site_count].offset = Len;
148 /* Compute offsets for search sites. */
149 x->ss[search_site_count].mv.col = -Len;
150 x->ss[search_site_count].mv.row = -Len;
151 x->ss[search_site_count].offset = -Len * stride - Len;
154 /* Compute offsets for search sites. */
155 x->ss[search_site_count].mv.col = Len;
156 x->ss[search_site_count].mv.row = -Len;
157 x->ss[search_site_count].offset = -Len * stride + Len;
160 /* Compute offsets for search sites. */
161 x->ss[search_site_count].mv.col = -Len;
162 x->ss[search_site_count].mv.row = Len;
163 x->ss[search_site_count].offset = Len * stride - Len;
166 /* Compute offsets for search sites. */
167 x->ss[search_site_count].mv.col = Len;
168 x->ss[search_site_count].mv.row = Len;
169 x->ss[search_site_count].offset = Len * stride + Len;
176 x->ss_count = search_site_count;
177 x->searches_per_step = 8;
181 * To avoid the penalty for crossing cache-line read, preload the reference
182 * area in a small buffer, which is aligned to make sure there won't be crossing
183 * cache-line read while reading from this buffer. This reduced the cpu
184 * cycles spent on reading ref data in sub-pixel filter functions.
185 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
186 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
187 * could reduce the area.
190 /* estimated cost of a motion vector (r,c) */
193 ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
195 /* pointer to predictor base of a motionvector */
196 #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
197 /* convert motion vector component to offset for svf calc */
198 #define SP(x) (((x)&3) << 1)
199 /* returns subpixel variance error function. */
201 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
202 #define IFMVCV(r, c, s, e) \
203 if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
204 /* returns distortion + motion vector cost */
205 #define ERR(r, c) (MVC(r, c) + DIST(r, c))
206 /* checks if (r,c) has better score than previous best */
207 #define CHECK_BETTER(v, r, c) \
210 thismse = DIST(r, c); \
211 if ((v = (MVC(r, c) + thismse)) < besterr) { \
215 *distortion = thismse; \
221 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
222 int_mv *bestmv, int_mv *ref_mv,
224 const vp8_variance_fn_ptr_t *vfp,
225 int *mvcost[2], int *distortion,
226 unsigned int *sse1) {
227 unsigned char *z = (*(b->base_src) + b->src);
229 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
230 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
231 int tr = br, tc = bc;
232 unsigned int besterr;
233 unsigned int left, right, up, down, diag;
235 unsigned int whichdir;
236 unsigned int halfiters = 4;
237 unsigned int quarteriters = 4;
240 int minc = VPXMAX(x->mv_col_min * 4,
241 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
242 int maxc = VPXMIN(x->mv_col_max * 4,
243 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
244 int minr = VPXMAX(x->mv_row_min * 4,
245 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
246 int maxr = VPXMIN(x->mv_row_max * 4,
247 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
251 int pre_stride = x->e_mbd.pre.y_stride;
252 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
254 #if ARCH_X86 || ARCH_X86_64
255 MACROBLOCKD *xd = &x->e_mbd;
256 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
259 int buf_r1, buf_r2, buf_c1;
261 /* Clamping to avoid out-of-range data access */
262 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
263 ? (bestmv->as_mv.row - x->mv_row_min)
265 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
266 ? (x->mv_row_max - bestmv->as_mv.row)
268 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
269 ? (bestmv->as_mv.col - x->mv_col_min)
273 /* Copy to intermediate buffer before searching. */
274 vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
275 y_stride, 16 + buf_r1 + buf_r2);
276 y = xd->y_buf + y_stride * buf_r1 + buf_c1;
278 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
280 y_stride = pre_stride;
283 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
286 bestmv->as_mv.row *= 8;
287 bestmv->as_mv.col *= 8;
289 /* calculate central point error */
290 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
291 *distortion = besterr;
292 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
294 /* TODO: Each subsequent iteration checks at least one point in common
295 * with the last iteration could be 2 ( if diag selected)
297 while (--halfiters) {
299 CHECK_BETTER(left, tr, tc - 2);
300 CHECK_BETTER(right, tr, tc + 2);
301 CHECK_BETTER(up, tr - 2, tc);
302 CHECK_BETTER(down, tr + 2, tc);
304 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
307 case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
308 case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
309 case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
310 case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
313 /* no reason to check the same one again. */
314 if (tr == br && tc == bc) break;
320 /* TODO: Each subsequent iteration checks at least one point in common
321 * with the last iteration could be 2 ( if diag selected)
325 while (--quarteriters) {
326 CHECK_BETTER(left, tr, tc - 1);
327 CHECK_BETTER(right, tr, tc + 1);
328 CHECK_BETTER(up, tr - 1, tc);
329 CHECK_BETTER(down, tr + 1, tc);
331 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
334 case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
335 case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
336 case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
337 case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
340 /* no reason to check the same one again. */
341 if (tr == br && tc == bc) break;
347 bestmv->as_mv.row = br * 2;
348 bestmv->as_mv.col = bc * 2;
350 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
351 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
365 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
366 int_mv *bestmv, int_mv *ref_mv,
368 const vp8_variance_fn_ptr_t *vfp,
369 int *mvcost[2], int *distortion,
370 unsigned int *sse1) {
371 int bestmse = INT_MAX;
374 unsigned char *z = (*(b->base_src) + b->src);
375 int left, right, up, down, diag;
380 int pre_stride = x->e_mbd.pre.y_stride;
381 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
383 #if ARCH_X86 || ARCH_X86_64
384 MACROBLOCKD *xd = &x->e_mbd;
385 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
390 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
391 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
392 y = xd->y_buf + y_stride + 1;
394 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
396 y_stride = pre_stride;
400 bestmv->as_mv.row *= 8;
401 bestmv->as_mv.col *= 8;
404 /* calculate central point error */
405 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
406 *distortion = bestmse;
407 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
409 /* go left then right and check error */
410 this_mv.as_mv.row = startmv.as_mv.row;
411 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
412 /* "halfpix" horizontal variance */
413 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
414 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
416 if (left < bestmse) {
419 *distortion = thismse;
423 this_mv.as_mv.col += 8;
424 /* "halfpix" horizontal variance */
425 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
426 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
428 if (right < bestmse) {
431 *distortion = thismse;
435 /* go up then down and check error */
436 this_mv.as_mv.col = startmv.as_mv.col;
437 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
438 /* "halfpix" vertical variance */
439 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
440 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
445 *distortion = thismse;
449 this_mv.as_mv.row += 8;
450 /* "halfpix" vertical variance */
451 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
452 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
454 if (down < bestmse) {
457 *distortion = thismse;
461 /* now check 1 more diagonal */
462 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
467 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
468 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
469 /* "halfpix" horizontal/vertical variance */
471 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
474 this_mv.as_mv.col += 4;
475 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
476 /* "halfpix" horizontal/vertical variance */
477 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
480 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
481 this_mv.as_mv.row += 4;
482 /* "halfpix" horizontal/vertical variance */
483 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
487 this_mv.as_mv.col += 4;
488 this_mv.as_mv.row += 4;
489 /* "halfpix" horizontal/vertical variance */
490 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
494 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
496 if (diag < bestmse) {
499 *distortion = thismse;
503 /* time to check quarter pels. */
504 if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
506 if (bestmv->as_mv.col < startmv.as_mv.col) y--;
510 /* go left then right and check error */
511 this_mv.as_mv.row = startmv.as_mv.row;
513 if (startmv.as_mv.col & 7) {
514 this_mv.as_mv.col = startmv.as_mv.col - 2;
515 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
516 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
518 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
519 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
520 b->src_stride, &sse);
523 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
525 if (left < bestmse) {
528 *distortion = thismse;
532 this_mv.as_mv.col += 4;
533 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
534 z, b->src_stride, &sse);
535 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
537 if (right < bestmse) {
540 *distortion = thismse;
544 /* go up then down and check error */
545 this_mv.as_mv.col = startmv.as_mv.col;
547 if (startmv.as_mv.row & 7) {
548 this_mv.as_mv.row = startmv.as_mv.row - 2;
549 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
550 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
552 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
553 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
554 b->src_stride, &sse);
557 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
562 *distortion = thismse;
566 this_mv.as_mv.row += 4;
567 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
568 z, b->src_stride, &sse);
569 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
571 if (down < bestmse) {
574 *distortion = thismse;
578 /* now check 1 more diagonal */
579 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
586 if (startmv.as_mv.row & 7) {
587 this_mv.as_mv.row -= 2;
589 if (startmv.as_mv.col & 7) {
590 this_mv.as_mv.col -= 2;
591 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
592 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
594 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
595 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
596 b->src_stride, &sse);
599 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
601 if (startmv.as_mv.col & 7) {
602 this_mv.as_mv.col -= 2;
603 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
604 z, b->src_stride, &sse);
606 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
607 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
614 this_mv.as_mv.col += 2;
616 if (startmv.as_mv.row & 7) {
617 this_mv.as_mv.row -= 2;
618 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
619 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
621 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
622 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
623 b->src_stride, &sse);
628 this_mv.as_mv.row += 2;
630 if (startmv.as_mv.col & 7) {
631 this_mv.as_mv.col -= 2;
632 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
633 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
635 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
636 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
637 b->src_stride, &sse);
642 this_mv.as_mv.col += 2;
643 this_mv.as_mv.row += 2;
644 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
645 this_mv.as_mv.row & 7, z, b->src_stride, &sse);
649 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
651 if (diag < bestmse) {
654 *distortion = thismse;
661 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
662 int_mv *bestmv, int_mv *ref_mv,
664 const vp8_variance_fn_ptr_t *vfp,
665 int *mvcost[2], int *distortion,
666 unsigned int *sse1) {
667 int bestmse = INT_MAX;
670 unsigned char *z = (*(b->base_src) + b->src);
671 int left, right, up, down, diag;
676 int pre_stride = x->e_mbd.pre.y_stride;
677 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
679 #if ARCH_X86 || ARCH_X86_64
680 MACROBLOCKD *xd = &x->e_mbd;
681 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
686 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
687 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
688 y = xd->y_buf + y_stride + 1;
690 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
692 y_stride = pre_stride;
696 bestmv->as_mv.row *= 8;
697 bestmv->as_mv.col *= 8;
700 /* calculate central point error */
701 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
702 *distortion = bestmse;
703 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
705 /* go left then right and check error */
706 this_mv.as_mv.row = startmv.as_mv.row;
707 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
708 /* "halfpix" horizontal variance */
709 thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
710 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
712 if (left < bestmse) {
715 *distortion = thismse;
719 this_mv.as_mv.col += 8;
720 /* "halfpix" horizontal variance */
721 thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
722 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
724 if (right < bestmse) {
727 *distortion = thismse;
731 /* go up then down and check error */
732 this_mv.as_mv.col = startmv.as_mv.col;
733 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
734 /* "halfpix" vertical variance */
735 thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
736 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
741 *distortion = thismse;
745 this_mv.as_mv.row += 8;
746 /* "halfpix" vertical variance */
747 thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
748 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
750 if (down < bestmse) {
753 *distortion = thismse;
757 /* now check 1 more diagonal - */
758 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
763 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
764 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
765 /* "halfpix" horizontal/vertical variance */
767 vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
770 this_mv.as_mv.col += 4;
771 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
772 /* "halfpix" horizontal/vertical variance */
773 thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
776 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
777 this_mv.as_mv.row += 4;
778 /* "halfpix" horizontal/vertical variance */
779 thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
783 this_mv.as_mv.col += 4;
784 this_mv.as_mv.row += 4;
785 /* "halfpix" horizontal/vertical variance */
786 thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
790 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
792 if (diag < bestmse) {
795 *distortion = thismse;
802 #define CHECK_BOUNDS(range) \
805 all_in &= ((br - range) >= x->mv_row_min); \
806 all_in &= ((br + range) <= x->mv_row_max); \
807 all_in &= ((bc - range) >= x->mv_col_min); \
808 all_in &= ((bc + range) <= x->mv_col_max); \
811 #define CHECK_POINT \
813 if (this_mv.as_mv.col < x->mv_col_min) continue; \
814 if (this_mv.as_mv.col > x->mv_col_max) continue; \
815 if (this_mv.as_mv.row < x->mv_row_min) continue; \
816 if (this_mv.as_mv.row > x->mv_row_max) continue; \
819 #define CHECK_BETTER \
821 if (thissad < bestsad) { \
823 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
824 if (thissad < bestsad) { \
831 static const MV next_chkpts[6][3] = {
832 { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
833 { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } },
834 { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } }
837 int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
838 int_mv *best_mv, int search_param, int sad_per_bit,
839 const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
840 int *mvcost[2], int_mv *center_mv) {
842 { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
844 MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
847 unsigned char *what = (*(b->base_src) + b->src);
848 int what_stride = b->src_stride;
849 int pre_stride = x->e_mbd.pre.y_stride;
850 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
852 int in_what_stride = pre_stride;
855 unsigned int bestsad;
856 unsigned int thissad;
857 unsigned char *base_offset;
858 unsigned char *this_offset;
866 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
867 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
871 /* adjust ref_mv to make sure it is within MV range */
872 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
874 br = ref_mv->as_mv.row;
875 bc = ref_mv->as_mv.col;
877 /* Work out the start point for the search */
878 base_offset = (unsigned char *)(base_pre + d->offset);
879 this_offset = base_offset + (br * (pre_stride)) + bc;
880 this_mv.as_mv.row = br;
881 this_mv.as_mv.col = bc;
882 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
883 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
885 #if CONFIG_MULTI_RES_ENCODING
886 /* Lower search range based on prediction info */
887 if (search_param >= 6)
889 else if (search_param >= 5)
891 else if (search_param >= 4)
893 else if (search_param >= 3)
895 else if (search_param >= 2)
897 else if (search_param >= 1)
909 for (i = 0; i < 6; ++i) {
910 this_mv.as_mv.row = br + hex[i].row;
911 this_mv.as_mv.col = bc + hex[i].col;
912 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
914 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
918 for (i = 0; i < 6; ++i) {
919 this_mv.as_mv.row = br + hex[i].row;
920 this_mv.as_mv.col = bc + hex[i].col;
922 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
924 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
929 if (best_site == -1) {
932 br += hex[best_site].row;
933 bc += hex[best_site].col;
937 for (j = 1; j < hex_range; ++j) {
942 for (i = 0; i < 3; ++i) {
943 this_mv.as_mv.row = br + next_chkpts[k][i].row;
944 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
945 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
947 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
951 for (i = 0; i < 3; ++i) {
952 this_mv.as_mv.row = br + next_chkpts[k][i].row;
953 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
955 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
957 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
962 if (best_site == -1) {
965 br += next_chkpts[k][best_site].row;
966 bc += next_chkpts[k][best_site].col;
976 /* check 4 1-away neighbors */
978 for (j = 0; j < dia_range; ++j) {
983 for (i = 0; i < 4; ++i) {
984 this_mv.as_mv.row = br + neighbors[i].row;
985 this_mv.as_mv.col = bc + neighbors[i].col;
986 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
988 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
992 for (i = 0; i < 4; ++i) {
993 this_mv.as_mv.row = br + neighbors[i].row;
994 this_mv.as_mv.col = bc + neighbors[i].col;
996 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
998 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1003 if (best_site == -1) {
1006 br += neighbors[best_site].row;
1007 bc += neighbors[best_site].col;
1011 best_mv->as_mv.row = br;
1012 best_mv->as_mv.col = bc;
1020 int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1021 int_mv *best_mv, int search_param, int sad_per_bit,
1022 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1023 int *mvcost[2], int_mv *center_mv) {
1026 unsigned char *what = (*(b->base_src) + b->src);
1027 int what_stride = b->src_stride;
1028 unsigned char *in_what;
1029 int pre_stride = x->e_mbd.pre.y_stride;
1030 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1031 int in_what_stride = pre_stride;
1032 unsigned char *best_address;
1037 unsigned int bestsad;
1038 unsigned int thissad;
1044 int this_row_offset;
1045 int this_col_offset;
1048 unsigned char *check_here;
1053 mvsadcost[0] = x->mvsadcost[0];
1054 mvsadcost[1] = x->mvsadcost[1];
1055 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1056 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1058 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1060 ref_row = ref_mv->as_mv.row;
1061 ref_col = ref_mv->as_mv.col;
1063 best_mv->as_mv.row = ref_row;
1064 best_mv->as_mv.col = ref_col;
1066 /* Work out the start point for the search */
1067 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1069 best_address = in_what;
1071 /* Check the starting position */
1072 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1073 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1075 /* search_param determines the length of the initial step and hence
1076 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1077 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1079 ss = &x->ss[search_param * x->searches_per_step];
1080 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1084 for (step = 0; step < tot_steps; ++step) {
1085 for (j = 0; j < x->searches_per_step; ++j) {
1086 /* Trap illegal vectors */
1087 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1088 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1090 if ((this_col_offset > x->mv_col_min) &&
1091 (this_col_offset < x->mv_col_max) &&
1092 (this_row_offset > x->mv_row_min) &&
1093 (this_row_offset < x->mv_row_max))
1096 check_here = ss[i].offset + best_address;
1097 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1099 if (thissad < bestsad) {
1100 this_mv.as_mv.row = this_row_offset;
1101 this_mv.as_mv.col = this_col_offset;
1103 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1105 if (thissad < bestsad) {
1115 if (best_site != last_site) {
1116 best_mv->as_mv.row += ss[best_site].mv.row;
1117 best_mv->as_mv.col += ss[best_site].mv.col;
1118 best_address += ss[best_site].offset;
1119 last_site = best_site;
1120 } else if (best_address == in_what) {
1125 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1126 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1128 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1129 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1132 int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1133 int_mv *best_mv, int search_param, int sad_per_bit,
1134 int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1135 int *mvcost[2], int_mv *center_mv) {
1138 unsigned char *what = (*(b->base_src) + b->src);
1139 int what_stride = b->src_stride;
1140 unsigned char *in_what;
1141 int pre_stride = x->e_mbd.pre.y_stride;
1142 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1143 int in_what_stride = pre_stride;
1144 unsigned char *best_address;
1149 unsigned int bestsad;
1150 unsigned int thissad;
1156 int this_row_offset;
1157 int this_col_offset;
1160 unsigned char *check_here;
1165 mvsadcost[0] = x->mvsadcost[0];
1166 mvsadcost[1] = x->mvsadcost[1];
1167 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1168 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1170 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1172 ref_row = ref_mv->as_mv.row;
1173 ref_col = ref_mv->as_mv.col;
1175 best_mv->as_mv.row = ref_row;
1176 best_mv->as_mv.col = ref_col;
1178 /* Work out the start point for the search */
1179 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1181 best_address = in_what;
1183 /* Check the starting position */
1184 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1185 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1187 /* search_param determines the length of the initial step and hence the
1188 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1189 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1191 ss = &x->ss[search_param * x->searches_per_step];
1192 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1196 for (step = 0; step < tot_steps; ++step) {
1199 /* To know if all neighbor points are within the bounds, 4 bounds
1200 * checking are enough instead of checking 4 bounds for each
1203 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1204 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1205 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1206 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1209 unsigned int sad_array[4];
1211 for (j = 0; j < x->searches_per_step; j += 4) {
1212 const unsigned char *block_offset[4];
1214 for (t = 0; t < 4; ++t) {
1215 block_offset[t] = ss[i + t].offset + best_address;
1218 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1221 for (t = 0; t < 4; t++, i++) {
1222 if (sad_array[t] < bestsad) {
1223 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1224 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1226 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1228 if (sad_array[t] < bestsad) {
1229 bestsad = sad_array[t];
1236 for (j = 0; j < x->searches_per_step; ++j) {
1237 /* Trap illegal vectors */
1238 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1239 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1241 if ((this_col_offset > x->mv_col_min) &&
1242 (this_col_offset < x->mv_col_max) &&
1243 (this_row_offset > x->mv_row_min) &&
1244 (this_row_offset < x->mv_row_max)) {
1245 check_here = ss[i].offset + best_address;
1246 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1248 if (thissad < bestsad) {
1249 this_mv.as_mv.row = this_row_offset;
1250 this_mv.as_mv.col = this_col_offset;
1252 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1254 if (thissad < bestsad) {
1264 if (best_site != last_site) {
1265 best_mv->as_mv.row += ss[best_site].mv.row;
1266 best_mv->as_mv.col += ss[best_site].mv.col;
1267 best_address += ss[best_site].offset;
1268 last_site = best_site;
1269 } else if (best_address == in_what) {
1274 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1275 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1277 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1278 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1281 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1282 int sad_per_bit, int distance,
1283 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1284 int_mv *center_mv) {
1285 unsigned char *what = (*(b->base_src) + b->src);
1286 int what_stride = b->src_stride;
1287 unsigned char *in_what;
1288 int pre_stride = x->e_mbd.pre.y_stride;
1289 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1290 int in_what_stride = pre_stride;
1291 int mv_stride = pre_stride;
1292 unsigned char *bestaddress;
1293 int_mv *best_mv = &d->bmi.mv;
1295 unsigned int bestsad;
1296 unsigned int thissad;
1299 unsigned char *check_here;
1301 int ref_row = ref_mv->as_mv.row;
1302 int ref_col = ref_mv->as_mv.col;
1304 int row_min = ref_row - distance;
1305 int row_max = ref_row + distance;
1306 int col_min = ref_col - distance;
1307 int col_max = ref_col + distance;
1312 mvsadcost[0] = x->mvsadcost[0];
1313 mvsadcost[1] = x->mvsadcost[1];
1314 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1315 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1317 /* Work out the mid point for the search */
1318 in_what = base_pre + d->offset;
1319 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1321 best_mv->as_mv.row = ref_row;
1322 best_mv->as_mv.col = ref_col;
1324 /* Baseline value at the centre */
1325 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1326 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1328 /* Apply further limits to prevent us looking using vectors that
1329 * stretch beyiond the UMV border
1331 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1333 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1335 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1337 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1339 for (r = row_min; r < row_max; ++r) {
1340 this_mv.as_mv.row = r;
1341 check_here = r * mv_stride + in_what + col_min;
1343 for (c = col_min; c < col_max; ++c) {
1344 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1346 this_mv.as_mv.col = c;
1347 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1349 if (thissad < bestsad) {
1351 best_mv->as_mv.row = r;
1352 best_mv->as_mv.col = c;
1353 bestaddress = check_here;
1360 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1361 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1363 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1364 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1367 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1368 int sad_per_bit, int distance,
1369 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1370 int_mv *center_mv) {
1371 unsigned char *what = (*(b->base_src) + b->src);
1372 int what_stride = b->src_stride;
1373 unsigned char *in_what;
1374 int pre_stride = x->e_mbd.pre.y_stride;
1375 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1376 int in_what_stride = pre_stride;
1377 int mv_stride = pre_stride;
1378 unsigned char *bestaddress;
1379 int_mv *best_mv = &d->bmi.mv;
1381 unsigned int bestsad;
1382 unsigned int thissad;
1385 unsigned char *check_here;
1387 int ref_row = ref_mv->as_mv.row;
1388 int ref_col = ref_mv->as_mv.col;
1390 int row_min = ref_row - distance;
1391 int row_max = ref_row + distance;
1392 int col_min = ref_col - distance;
1393 int col_max = ref_col + distance;
1395 unsigned int sad_array[3];
1400 mvsadcost[0] = x->mvsadcost[0];
1401 mvsadcost[1] = x->mvsadcost[1];
1402 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1403 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1405 /* Work out the mid point for the search */
1406 in_what = base_pre + d->offset;
1407 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1409 best_mv->as_mv.row = ref_row;
1410 best_mv->as_mv.col = ref_col;
1412 /* Baseline value at the centre */
1413 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1414 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1416 /* Apply further limits to prevent us looking using vectors that stretch
1417 * beyond the UMV border
1419 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1421 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1423 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1425 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1427 for (r = row_min; r < row_max; ++r) {
1428 this_mv.as_mv.row = r;
1429 check_here = r * mv_stride + in_what + col_min;
1432 while ((c + 2) < col_max) {
1435 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1437 for (i = 0; i < 3; ++i) {
1438 thissad = sad_array[i];
1440 if (thissad < bestsad) {
1441 this_mv.as_mv.col = c;
1443 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1445 if (thissad < bestsad) {
1447 best_mv->as_mv.row = r;
1448 best_mv->as_mv.col = c;
1449 bestaddress = check_here;
1458 while (c < col_max) {
1459 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1461 if (thissad < bestsad) {
1462 this_mv.as_mv.col = c;
1464 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1466 if (thissad < bestsad) {
1468 best_mv->as_mv.row = r;
1469 best_mv->as_mv.col = c;
1470 bestaddress = check_here;
1479 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1480 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1482 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1483 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1486 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1487 int sad_per_bit, int distance,
1488 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1489 int_mv *center_mv) {
1490 unsigned char *what = (*(b->base_src) + b->src);
1491 int what_stride = b->src_stride;
1492 int pre_stride = x->e_mbd.pre.y_stride;
1493 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1494 unsigned char *in_what;
1495 int in_what_stride = pre_stride;
1496 int mv_stride = pre_stride;
1497 unsigned char *bestaddress;
1498 int_mv *best_mv = &d->bmi.mv;
1500 unsigned int bestsad;
1501 unsigned int thissad;
1504 unsigned char *check_here;
1506 int ref_row = ref_mv->as_mv.row;
1507 int ref_col = ref_mv->as_mv.col;
1509 int row_min = ref_row - distance;
1510 int row_max = ref_row + distance;
1511 int col_min = ref_col - distance;
1512 int col_max = ref_col + distance;
1514 DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1515 unsigned int sad_array[3];
1520 mvsadcost[0] = x->mvsadcost[0];
1521 mvsadcost[1] = x->mvsadcost[1];
1522 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1523 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1525 /* Work out the mid point for the search */
1526 in_what = base_pre + d->offset;
1527 bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1529 best_mv->as_mv.row = ref_row;
1530 best_mv->as_mv.col = ref_col;
1532 /* Baseline value at the centre */
1533 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1534 mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1536 /* Apply further limits to prevent us looking using vectors that stretch
1537 * beyond the UMV border
1539 if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1541 if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1543 if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1545 if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1547 for (r = row_min; r < row_max; ++r) {
1548 this_mv.as_mv.row = r;
1549 check_here = r * mv_stride + in_what + col_min;
1552 while ((c + 7) < col_max) {
1555 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1557 for (i = 0; i < 8; ++i) {
1558 thissad = sad_array8[i];
1560 if (thissad < bestsad) {
1561 this_mv.as_mv.col = c;
1563 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1565 if (thissad < bestsad) {
1567 best_mv->as_mv.row = r;
1568 best_mv->as_mv.col = c;
1569 bestaddress = check_here;
1578 while ((c + 2) < col_max) {
1581 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1583 for (i = 0; i < 3; ++i) {
1584 thissad = sad_array[i];
1586 if (thissad < bestsad) {
1587 this_mv.as_mv.col = c;
1589 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1591 if (thissad < bestsad) {
1593 best_mv->as_mv.row = r;
1594 best_mv->as_mv.col = c;
1595 bestaddress = check_here;
1604 while (c < col_max) {
1605 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1607 if (thissad < bestsad) {
1608 this_mv.as_mv.col = c;
1610 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1612 if (thissad < bestsad) {
1614 best_mv->as_mv.row = r;
1615 best_mv->as_mv.col = c;
1616 bestaddress = check_here;
1625 this_mv.as_mv.row = best_mv->as_mv.row * 8;
1626 this_mv.as_mv.col = best_mv->as_mv.col * 8;
1628 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1629 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1632 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1633 int_mv *ref_mv, int error_per_bit,
1634 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1635 int *mvcost[2], int_mv *center_mv) {
1636 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1638 short this_row_offset, this_col_offset;
1640 int what_stride = b->src_stride;
1641 int pre_stride = x->e_mbd.pre.y_stride;
1642 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1643 int in_what_stride = pre_stride;
1644 unsigned char *what = (*(b->base_src) + b->src);
1645 unsigned char *best_address =
1646 (unsigned char *)(base_pre + d->offset +
1647 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1648 unsigned char *check_here;
1650 unsigned int bestsad;
1651 unsigned int thissad;
1656 mvsadcost[0] = x->mvsadcost[0];
1657 mvsadcost[1] = x->mvsadcost[1];
1658 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1659 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1661 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1662 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1664 for (i = 0; i < search_range; ++i) {
1667 for (j = 0; j < 4; ++j) {
1668 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1669 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1671 if ((this_col_offset > x->mv_col_min) &&
1672 (this_col_offset < x->mv_col_max) &&
1673 (this_row_offset > x->mv_row_min) &&
1674 (this_row_offset < x->mv_row_max)) {
1675 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1677 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1679 if (thissad < bestsad) {
1680 this_mv.as_mv.row = this_row_offset;
1681 this_mv.as_mv.col = this_col_offset;
1683 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1685 if (thissad < bestsad) {
1693 if (best_site == -1) {
1696 ref_mv->as_mv.row += neighbors[best_site].row;
1697 ref_mv->as_mv.col += neighbors[best_site].col;
1698 best_address += (neighbors[best_site].row) * in_what_stride +
1699 neighbors[best_site].col;
1703 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1704 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1706 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1707 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1710 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1711 int_mv *ref_mv, int error_per_bit,
1712 int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1713 int *mvcost[2], int_mv *center_mv) {
1714 MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1716 short this_row_offset, this_col_offset;
1718 int what_stride = b->src_stride;
1719 int pre_stride = x->e_mbd.pre.y_stride;
1720 unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1721 int in_what_stride = pre_stride;
1722 unsigned char *what = (*(b->base_src) + b->src);
1723 unsigned char *best_address =
1724 (unsigned char *)(base_pre + d->offset +
1725 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1726 unsigned char *check_here;
1728 unsigned int bestsad;
1729 unsigned int thissad;
1734 mvsadcost[0] = x->mvsadcost[0];
1735 mvsadcost[1] = x->mvsadcost[1];
1736 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1737 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1739 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1740 mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1742 for (i = 0; i < search_range; ++i) {
1746 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1747 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1748 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1749 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1752 unsigned int sad_array[4];
1753 const unsigned char *block_offset[4];
1754 block_offset[0] = best_address - in_what_stride;
1755 block_offset[1] = best_address - 1;
1756 block_offset[2] = best_address + 1;
1757 block_offset[3] = best_address + in_what_stride;
1759 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1762 for (j = 0; j < 4; ++j) {
1763 if (sad_array[j] < bestsad) {
1764 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1765 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1767 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1769 if (sad_array[j] < bestsad) {
1770 bestsad = sad_array[j];
1776 for (j = 0; j < 4; ++j) {
1777 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1778 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1780 if ((this_col_offset > x->mv_col_min) &&
1781 (this_col_offset < x->mv_col_max) &&
1782 (this_row_offset > x->mv_row_min) &&
1783 (this_row_offset < x->mv_row_max)) {
1784 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1786 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1788 if (thissad < bestsad) {
1789 this_mv.as_mv.row = this_row_offset;
1790 this_mv.as_mv.col = this_col_offset;
1792 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1794 if (thissad < bestsad) {
1803 if (best_site == -1) {
1806 ref_mv->as_mv.row += neighbors[best_site].row;
1807 ref_mv->as_mv.col += neighbors[best_site].col;
1808 best_address += (neighbors[best_site].row) * in_what_stride +
1809 neighbors[best_site].col;
1813 this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1814 this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1816 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1817 mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);