2 * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vpx_ports/config.h"
16 #include "reconinter.h"
17 #if CONFIG_RUNTIME_CPU_DETECT
18 #include "onyxc_int.h"
21 // use this define on systems where unaligned int reads and writes are
22 // not allowed, i.e. ARM architectures
23 //#define MUST_BE_ALIGNED
26 static const int bbb[4] = {0, 2, 8, 10};
30 void vp8_copy_mem16x16_c(
39 for (r = 0; r < 16; r++)
41 #ifdef MUST_BE_ALIGNED
60 ((int *)dst)[0] = ((int *)src)[0] ;
61 ((int *)dst)[1] = ((int *)src)[1] ;
62 ((int *)dst)[2] = ((int *)src)[2] ;
63 ((int *)dst)[3] = ((int *)src)[3] ;
73 void vp8_copy_mem8x8_c(
81 for (r = 0; r < 8; r++)
83 #ifdef MUST_BE_ALIGNED
93 ((int *)dst)[0] = ((int *)src)[0] ;
94 ((int *)dst)[1] = ((int *)src)[1] ;
103 void vp8_copy_mem8x4_c(
111 for (r = 0; r < 4; r++)
113 #ifdef MUST_BE_ALIGNED
123 ((int *)dst)[0] = ((int *)src)[0] ;
124 ((int *)dst)[1] = ((int *)src)[1] ;
135 void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
138 unsigned char *ptr_base;
140 unsigned char *pred_ptr = d->predictor;
142 ptr_base = *(d->base_pre);
144 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
146 ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
147 sppf(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
151 ptr_base += d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
154 for (r = 0; r < 4; r++)
156 #ifdef MUST_BE_ALIGNED
157 pred_ptr[0] = ptr[0];
158 pred_ptr[1] = ptr[1];
159 pred_ptr[2] = ptr[2];
160 pred_ptr[3] = ptr[3];
162 *(int *)pred_ptr = *(int *)ptr ;
165 ptr += d->pre_stride;
170 void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
172 unsigned char *ptr_base;
174 unsigned char *pred_ptr = d->predictor;
176 ptr_base = *(d->base_pre);
177 ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
179 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
181 x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
185 RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, pred_ptr, pitch);
189 void vp8_build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
191 unsigned char *ptr_base;
193 unsigned char *pred_ptr = d->predictor;
195 ptr_base = *(d->base_pre);
196 ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
198 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
200 x->subpixel_predict8x4(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
204 RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d->pre_stride, pred_ptr, pitch);
209 void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
213 if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
215 unsigned char *uptr, *vptr;
216 unsigned char *upred_ptr = &x->predictor[256];
217 unsigned char *vpred_ptr = &x->predictor[320];
219 int mv_row = x->block[16].bmi.mv.as_mv.row;
220 int mv_col = x->block[16].bmi.mv.as_mv.col;
222 int pre_stride = x->block[16].pre_stride;
224 offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
225 uptr = x->pre.u_buffer + offset;
226 vptr = x->pre.v_buffer + offset;
228 if ((mv_row | mv_col) & 7)
230 x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
231 x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
235 RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
236 RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vpred_ptr, 8);
241 for (i = 16; i < 24; i += 2)
243 BLOCKD *d0 = &x->block[i];
244 BLOCKD *d1 = &x->block[i+1];
246 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
247 vp8_build_inter_predictors2b(x, d0, 8);
250 vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
251 vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
258 void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
260 if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
262 unsigned char *ptr_base;
264 unsigned char *pred_ptr = x->predictor;
265 int mv_row = x->mbmi.mv.as_mv.row;
266 int mv_col = x->mbmi.mv.as_mv.col;
267 int pre_stride = x->block[0].pre_stride;
269 ptr_base = x->pre.y_buffer;
270 ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
272 if ((mv_row | mv_col) & 7)
274 x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
278 RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
285 if (x->mbmi.partitioning < 3)
287 for (i = 0; i < 4; i++)
289 BLOCKD *d = &x->block[bbb[i]];
290 vp8_build_inter_predictors4b(x, d, 16);
296 for (i = 0; i < 16; i += 2)
298 BLOCKD *d0 = &x->block[i];
299 BLOCKD *d1 = &x->block[i+1];
301 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
302 vp8_build_inter_predictors2b(x, d0, 16);
305 vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
306 vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
314 void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
316 if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
319 unsigned char *ptr_base;
321 unsigned char *uptr, *vptr;
322 unsigned char *pred_ptr = x->predictor;
323 unsigned char *upred_ptr = &x->predictor[256];
324 unsigned char *vpred_ptr = &x->predictor[320];
326 int mv_row = x->mbmi.mv.as_mv.row;
327 int mv_col = x->mbmi.mv.as_mv.col;
328 int pre_stride = x->block[0].pre_stride;
330 ptr_base = x->pre.y_buffer;
331 ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
333 if ((mv_row | mv_col) & 7)
335 x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
339 RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
342 mv_row = x->block[16].bmi.mv.as_mv.row;
343 mv_col = x->block[16].bmi.mv.as_mv.col;
345 offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
346 uptr = x->pre.u_buffer + offset;
347 vptr = x->pre.v_buffer + offset;
349 if ((mv_row | mv_col) & 7)
351 x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
352 x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
356 RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
357 RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vpred_ptr, 8);
364 if (x->mbmi.partitioning < 3)
366 for (i = 0; i < 4; i++)
368 BLOCKD *d = &x->block[bbb[i]];
369 vp8_build_inter_predictors4b(x, d, 16);
374 for (i = 0; i < 16; i += 2)
376 BLOCKD *d0 = &x->block[i];
377 BLOCKD *d1 = &x->block[i+1];
379 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
380 vp8_build_inter_predictors2b(x, d0, 16);
383 vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
384 vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
391 for (i = 16; i < 24; i += 2)
393 BLOCKD *d0 = &x->block[i];
394 BLOCKD *d1 = &x->block[i+1];
396 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
397 vp8_build_inter_predictors2b(x, d0, 8);
400 vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
401 vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
409 void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
413 if (x->mbmi.mode == SPLITMV)
415 for (i = 0; i < 2; i++)
417 for (j = 0; j < 2; j++)
419 int yoffset = i * 8 + j * 2;
420 int uoffset = 16 + i * 2 + j;
421 int voffset = 20 + i * 2 + j;
425 temp = x->block[yoffset ].bmi.mv.as_mv.row
426 + x->block[yoffset+1].bmi.mv.as_mv.row
427 + x->block[yoffset+4].bmi.mv.as_mv.row
428 + x->block[yoffset+5].bmi.mv.as_mv.row;
430 if (temp < 0) temp -= 4;
433 x->block[uoffset].bmi.mv.as_mv.row = temp / 8;
436 x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & 0xfffffff8;
438 temp = x->block[yoffset ].bmi.mv.as_mv.col
439 + x->block[yoffset+1].bmi.mv.as_mv.col
440 + x->block[yoffset+4].bmi.mv.as_mv.col
441 + x->block[yoffset+5].bmi.mv.as_mv.col;
443 if (temp < 0) temp -= 4;
446 x->block[uoffset].bmi.mv.as_mv.col = temp / 8;
449 x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & 0xfffffff8;
451 x->block[voffset].bmi.mv.as_mv.row = x->block[uoffset].bmi.mv.as_mv.row ;
452 x->block[voffset].bmi.mv.as_mv.col = x->block[uoffset].bmi.mv.as_mv.col ;
458 int mvrow = x->mbmi.mv.as_mv.row;
459 int mvcol = x->mbmi.mv.as_mv.col;
474 for (i = 0; i < 8; i++)
476 x->block[ 16 + i].bmi.mv.as_mv.row = mvrow;
477 x->block[ 16 + i].bmi.mv.as_mv.col = mvcol;
481 x->block[ 16 + i].bmi.mv.as_mv.row = mvrow & 0xfffffff8;
482 x->block[ 16 + i].bmi.mv.as_mv.col = mvcol & 0xfffffff8;
489 // The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
490 // situation, we can write the result directly to dst buffer instead of writing it to predictor
491 // buffer and then copying it to dst buffer.
492 static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf)
495 unsigned char *ptr_base;
497 //unsigned char *pred_ptr = d->predictor;
498 int dst_stride = d->dst_stride;
499 int pre_stride = d->pre_stride;
501 ptr_base = *(d->base_pre);
503 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
505 ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
506 sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, dst_stride);
510 ptr_base += d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
513 for (r = 0; r < 4; r++)
515 #ifdef MUST_BE_ALIGNED
521 *(int *)dst_ptr = *(int *)ptr ;
523 dst_ptr += dst_stride;
531 void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
533 //unsigned char *pred_ptr = x->block[0].predictor;
534 //unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;
535 unsigned char *pred_ptr = x->predictor;
536 unsigned char *dst_ptr = x->dst.y_buffer;
538 if (x->mbmi.mode != SPLITMV)
541 unsigned char *ptr_base;
543 unsigned char *uptr, *vptr;
544 //unsigned char *pred_ptr = x->predictor;
545 //unsigned char *upred_ptr = &x->predictor[256];
546 //unsigned char *vpred_ptr = &x->predictor[320];
547 unsigned char *udst_ptr = x->dst.u_buffer;
548 unsigned char *vdst_ptr = x->dst.v_buffer;
550 int mv_row = x->mbmi.mv.as_mv.row;
551 int mv_col = x->mbmi.mv.as_mv.col;
552 int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
554 ptr_base = x->pre.y_buffer;
555 ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
557 if ((mv_row | mv_col) & 7)
559 x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
563 RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
566 mv_row = x->block[16].bmi.mv.as_mv.row;
567 mv_col = x->block[16].bmi.mv.as_mv.col;
569 offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
570 uptr = x->pre.u_buffer + offset;
571 vptr = x->pre.v_buffer + offset;
573 if ((mv_row | mv_col) & 7)
575 x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride);
576 x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride);
580 RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride);
581 RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride);
586 //note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
587 //if sth is wrong, go back to what it is in build_inter_predictors_mb.
590 if (x->mbmi.partitioning < 3)
592 for (i = 0; i < 4; i++)
594 BLOCKD *d = &x->block[bbb[i]];
595 //vp8_build_inter_predictors4b(x, d, 16);
598 unsigned char *ptr_base;
600 unsigned char *pred_ptr = d->predictor;
602 ptr_base = *(d->base_pre);
603 ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
605 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
607 x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
611 RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
618 for (i = 0; i < 16; i += 2)
620 BLOCKD *d0 = &x->block[i];
621 BLOCKD *d1 = &x->block[i+1];
623 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
625 //vp8_build_inter_predictors2b(x, d0, 16);
626 unsigned char *ptr_base;
628 unsigned char *pred_ptr = d0->predictor;
630 ptr_base = *(d0->base_pre);
631 ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
633 if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
635 x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
639 RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
644 vp8_build_inter_predictors_b_s(d0, dst_ptr, x->subpixel_predict);
645 vp8_build_inter_predictors_b_s(d1, dst_ptr, x->subpixel_predict);
650 for (i = 16; i < 24; i += 2)
652 BLOCKD *d0 = &x->block[i];
653 BLOCKD *d1 = &x->block[i+1];
655 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
657 //vp8_build_inter_predictors2b(x, d0, 8);
658 unsigned char *ptr_base;
660 unsigned char *pred_ptr = d0->predictor;
662 ptr_base = *(d0->base_pre);
663 ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
665 if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
667 x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
671 RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
676 vp8_build_inter_predictors_b_s(d0, dst_ptr, x->subpixel_predict);
677 vp8_build_inter_predictors_b_s(d1, dst_ptr, x->subpixel_predict);