From: Kostya Shishkov Date: Wed, 10 Aug 2011 09:26:39 +0000 (+0200) Subject: Add weighted motion compensation for RV40 B-frames X-Git-Tag: v0.8b1~1823 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b86ab38137be34376c90d45d08d49dbd28f2a72f;p=platform%2Fupstream%2Flibav.git Add weighted motion compensation for RV40 B-frames Signed-off-by: Ronald S. Bultje --- diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index 58e4552..cdc559f 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -717,7 +717,7 @@ static const int chroma_coeffs[3] = { 0, 3, 5 }; static inline void rv34_mc(RV34DecContext *r, const int block_type, const int xoff, const int yoff, int mv_off, const int width, const int height, int dir, - const int thirdpel, + const int thirdpel, int weighted, qpel_mc_func (*qpel_mc)[16], h264_chroma_mc_func (*chroma_mc)) { @@ -781,9 +781,15 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type, srcU = uvbuf; srcV = uvbuf + 16; } - Y = s->dest[0] + xoff + yoff *s->linesize; - U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize; - V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize; + if(!weighted){ + Y = s->dest[0] + xoff + yoff *s->linesize; + U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize; + V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize; + }else{ + Y = r->tmp_b_block_y [dir] + xoff + yoff *s->linesize; + U = r->tmp_b_block_uv[dir*2] + (xoff>>1) + (yoff>>1)*s->uvlinesize; + V = r->tmp_b_block_uv[dir*2+1] + (xoff>>1) + (yoff>>1)*s->uvlinesize; + } if(block_type == RV34_MB_P_16x8){ qpel_mc[1][dxy](Y, srcY, s->linesize); @@ -804,33 +810,70 @@ static void rv34_mc_1mv(RV34DecContext *r, const int block_type, const int xoff, const int yoff, int mv_off, const int width, const int height, int dir) { - rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30, + rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30, 0, r->rdsp.put_pixels_tab, r->rdsp.put_chroma_pixels_tab); } +static void rv4_weight(RV34DecContext *r) +{ + r->rdsp.rv40_weight_pixels_tab[0](r->s.dest[0], + r->tmp_b_block_y[0], + r->tmp_b_block_y[1], + r->weight1, + r->weight2, + r->s.linesize); + r->rdsp.rv40_weight_pixels_tab[1](r->s.dest[1], + r->tmp_b_block_uv[0], + r->tmp_b_block_uv[2], + r->weight1, + r->weight2, + r->s.uvlinesize); + r->rdsp.rv40_weight_pixels_tab[1](r->s.dest[2], + r->tmp_b_block_uv[1], + r->tmp_b_block_uv[3], + r->weight1, + r->weight2, + r->s.uvlinesize); +} + static void rv34_mc_2mv(RV34DecContext *r, const int block_type) { - rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30, + int weighted = !r->rv30 && block_type != RV34_MB_B_BIDIR && r->weight1 != 8192; + + rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30, weighted, r->rdsp.put_pixels_tab, r->rdsp.put_chroma_pixels_tab); - rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, - r->rdsp.avg_pixels_tab, - r->rdsp.avg_chroma_pixels_tab); + if(!weighted){ + rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, 0, + r->rdsp.avg_pixels_tab, + r->rdsp.avg_chroma_pixels_tab); + }else{ + rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, 1, + r->rdsp.put_pixels_tab, + r->rdsp.put_chroma_pixels_tab); + rv4_weight(r); + } } static void rv34_mc_2mv_skip(RV34DecContext *r) { int i, j; + int weighted = !r->rv30 && r->weight1 != 8192; + for(j = 0; j < 2; j++) for(i = 0; i < 2; i++){ rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30, + weighted, r->rdsp.put_pixels_tab, r->rdsp.put_chroma_pixels_tab); rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30, - r->rdsp.avg_pixels_tab, - r->rdsp.avg_chroma_pixels_tab); + weighted, + weighted ? r->rdsp.put_pixels_tab : r->rdsp.avg_pixels_tab, + weighted ? r->rdsp.put_chroma_pixels_tab : r->rdsp.avg_chroma_pixels_tab); } + if(weighted) + rv4_weight(r); } /** number of motion vectors in each macroblock type */ @@ -1265,6 +1308,16 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int if(MPV_frame_start(s, s->avctx) < 0) return -1; ff_er_frame_start(s); + if (!r->tmp_b_block_base || s->width != r->si.width || s->height != r->si.height) { + int i; + + r->tmp_b_block_base = av_realloc(r->tmp_b_block_base, s->linesize * 48); + for (i = 0; i < 2; i++) + r->tmp_b_block_y[i] = r->tmp_b_block_base + i * 16 * s->linesize; + for (i = 0; i < 4; i++) + r->tmp_b_block_uv[i] = r->tmp_b_block_base + 32 * s->linesize + + (i >> 1) * 8 * s->uvlinesize + (i & 1) * 16; + } r->cur_pts = r->si.pts; if(s->pict_type != AV_PICTURE_TYPE_B){ r->last_pts = r->next_pts; @@ -1500,6 +1553,7 @@ av_cold int ff_rv34_decode_end(AVCodecContext *avctx) av_freep(&r->intra_types_hist); r->intra_types = NULL; + av_freep(&r->tmp_b_block_base); av_freep(&r->mb_type); av_freep(&r->cbp_luma); av_freep(&r->cbp_chroma); diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h index ef19813..12607fb 100644 --- a/libavcodec/rv34.h +++ b/libavcodec/rv34.h @@ -116,6 +116,11 @@ typedef struct RV34DecContext{ /** 8x8 block available flags (for MV prediction) */ DECLARE_ALIGNED(8, uint32_t, avail_cache)[3*4]; + /** temporary blocks for RV4 weighted MC */ + uint8_t *tmp_b_block_y[2]; + uint8_t *tmp_b_block_uv[4]; + uint8_t *tmp_b_block_base; + int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si); int (*decode_mb_info)(struct RV34DecContext *r); int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst); diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h index 771a6c0..e1def7d 100644 --- a/libavcodec/rv34dsp.h +++ b/libavcodec/rv34dsp.h @@ -29,11 +29,17 @@ #include "dsputil.h" +typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/, + uint8_t *src1/*align width (8 or 16)*/, + uint8_t *src2/*align width (8 or 16)*/, + int w1, int w2, int stride); + typedef struct RV34DSPContext { qpel_mc_func put_pixels_tab[4][16]; qpel_mc_func avg_pixels_tab[4][16]; h264_chroma_mc_func put_chroma_pixels_tab[3]; h264_chroma_mc_func avg_chroma_pixels_tab[3]; + rv40_weight_func rv40_weight_pixels_tab[2]; } RV34DSPContext; void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp); diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c index 132f063..ca620ab 100644 --- a/libavcodec/rv40dsp.c +++ b/libavcodec/rv40dsp.c @@ -285,6 +285,23 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a RV40_CHROMA_MC(put_, op_put) RV40_CHROMA_MC(avg_, op_avg) +#define RV40_WEIGHT_FUNC(size) \ +static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)\ +{\ + int i, j;\ +\ + for (j = 0; j < size; j++) {\ + for (i = 0; i < size; i++)\ + dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\ + src1 += stride;\ + src2 += stride;\ + dst += stride;\ + }\ +} + +RV40_WEIGHT_FUNC(16) +RV40_WEIGHT_FUNC(8) + av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0]; c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c; @@ -356,6 +373,9 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c; c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c; + c->rv40_weight_pixels_tab[0] = rv40_weight_func_16; + c->rv40_weight_pixels_tab[1] = rv40_weight_func_8; + if (HAVE_MMX) ff_rv40dsp_init_x86(c, dsp); } diff --git a/tests/ref/fate/real-rv40 b/tests/ref/fate/real-rv40 index 2a445d0..990a530 100644 --- a/tests/ref/fate/real-rv40 +++ b/tests/ref/fate/real-rv40 @@ -16,106 +16,106 @@ 0, 112500, 276480, 0x5f7a0d4f 0, 120000, 276480, 0x5f7a0d4f 0, 127500, 276480, 0x5f7a0d4f -0, 135000, 276480, 0x2d722f8a -0, 142500, 276480, 0xebbb3c8f -0, 150000, 276480, 0x8574c868 +0, 135000, 276480, 0x75641594 +0, 142500, 276480, 0x32ee3526 +0, 150000, 276480, 0x5ce39368 0, 157500, 276480, 0x4ec1e418 -0, 165000, 276480, 0x95f22651 -0, 172500, 276480, 0x071d897e -0, 180000, 276480, 0x9f7623f9 -0, 187500, 276480, 0x86d4dedf -0, 195000, 276480, 0xc0a0be22 -0, 202500, 276480, 0xc5902aec -0, 210000, 276480, 0xe000f066 -0, 217500, 276480, 0x0b2a48d5 -0, 225000, 276480, 0xa1565256 -0, 232500, 276480, 0x8de3ceb3 -0, 240000, 276480, 0x654b564a +0, 165000, 276480, 0x85cbc3b5 +0, 172500, 276480, 0x377c7b46 +0, 180000, 276480, 0x756a4a2e +0, 187500, 276480, 0xcb379547 +0, 195000, 276480, 0x99c085be +0, 202500, 276480, 0xe479ffed +0, 210000, 276480, 0x1e4fae19 +0, 217500, 276480, 0x776412ef +0, 225000, 276480, 0x58ce0f38 +0, 232500, 276480, 0x5ab69b27 +0, 240000, 276480, 0xc3db9706 0, 247500, 276480, 0xc9c57884 -0, 255000, 276480, 0x89cdcdd4 -0, 262500, 276480, 0x3594fe61 -0, 270000, 276480, 0x9d082a81 -0, 277500, 276480, 0x4e6cd0c3 -0, 285000, 276480, 0xc129765f -0, 292500, 276480, 0x92a04c99 -0, 300000, 276480, 0x5ca62953 -0, 307500, 276480, 0xb7e478aa -0, 315000, 276480, 0x932735d5 -0, 322500, 276480, 0xaaa2d7aa -0, 330000, 276480, 0xd1329996 +0, 255000, 276480, 0x000b5269 +0, 262500, 276480, 0x27ff7a5d +0, 270000, 276480, 0x70647530 +0, 277500, 276480, 0x97612c4b +0, 285000, 276480, 0xdf4e04d7 +0, 292500, 276480, 0xbd98f57c +0, 300000, 276480, 0x5163b29b +0, 307500, 276480, 0x99170e64 +0, 315000, 276480, 0x8a4e991f +0, 322500, 276480, 0x6a45425f +0, 330000, 276480, 0x7bf6b1ef 0, 337500, 276480, 0x6de1e34b -0, 345000, 276480, 0x8c963c9b -0, 352500, 276480, 0xce6eff29 -0, 360000, 276480, 0x25412f7e -0, 367500, 276480, 0x11a5ad85 -0, 375000, 276480, 0x26ea3248 -0, 382500, 276480, 0x86c35fa4 -0, 390000, 276480, 0xa98a2d38 -0, 397500, 276480, 0xed827333 -0, 405000, 276480, 0x5d44a824 -0, 412500, 276480, 0x46d54d04 -0, 420000, 276480, 0x413fd26a +0, 345000, 276480, 0xdcaaa99a +0, 352500, 276480, 0xd1e98808 +0, 360000, 276480, 0x6e2d524e +0, 367500, 276480, 0x22c50a3d +0, 375000, 276480, 0x62b76407 +0, 382500, 276480, 0x51e9b3eb +0, 390000, 276480, 0x441f7afd +0, 397500, 276480, 0xfb01efc6 +0, 405000, 276480, 0x294bb441 +0, 412500, 276480, 0xe04ac45e +0, 420000, 276480, 0x58f275ea 0, 427500, 276480, 0xf0b3b71b -0, 435000, 276480, 0x459bc06d -0, 442500, 276480, 0x4199cd45 -0, 450000, 276480, 0xa8d35683 -0, 457500, 276480, 0x9a3e7de0 -0, 465000, 276480, 0x5a30f666 -0, 472500, 276480, 0x40152668 -0, 480000, 276480, 0x90c4d22c -0, 487500, 276480, 0x5cbaacc9 -0, 495000, 276480, 0x72b658f1 -0, 502500, 276480, 0x0ba3dcc9 -0, 510000, 276480, 0x259ed5c1 +0, 435000, 276480, 0x674e34e4 +0, 442500, 276480, 0x41dda2d9 +0, 450000, 276480, 0xf46ba7fb +0, 457500, 276480, 0x28b54815 +0, 465000, 276480, 0xaf2b5d89 +0, 472500, 276480, 0x8facba58 +0, 480000, 276480, 0x28a63236 +0, 487500, 276480, 0x1ad43fd7 +0, 495000, 276480, 0x71507bd2 +0, 502500, 276480, 0x35626022 +0, 510000, 276480, 0x7c1139b3 0, 517500, 276480, 0x7fd73a99 -0, 525000, 276480, 0x488980c5 -0, 532500, 276480, 0x1d4c96a5 -0, 540000, 276480, 0x41ced7f2 -0, 547500, 276480, 0xd62d1837 -0, 555000, 276480, 0xf5fd9d20 -0, 562500, 276480, 0x2af91fda -0, 570000, 276480, 0x38ce229d -0, 577500, 276480, 0xf3a712c0 -0, 585000, 276480, 0x57b111d2 -0, 592500, 276480, 0x8556b792 -0, 600000, 276480, 0xb32d0896 +0, 525000, 276480, 0xb52e1aa2 +0, 532500, 276480, 0xd6f82cae +0, 540000, 276480, 0xf88f75d4 +0, 547500, 276480, 0x04a8e3ee +0, 555000, 276480, 0xa29f5b01 +0, 562500, 276480, 0x754ceaf5 +0, 570000, 276480, 0x5a38b4af +0, 577500, 276480, 0xfcebc261 +0, 585000, 276480, 0x3d3ca985 +0, 592500, 276480, 0x94a03c75 +0, 600000, 276480, 0x2f98911c 0, 607500, 276480, 0x923b9937 -0, 615000, 276480, 0x0da1e7e3 -0, 622500, 276480, 0x7f172382 -0, 630000, 276480, 0x93622b88 -0, 637500, 276480, 0x2599d540 -0, 645000, 276480, 0xed20c105 -0, 652500, 276480, 0x62ce256e -0, 660000, 276480, 0x286a04bb -0, 667500, 276480, 0x423f7e7c -0, 675000, 276480, 0x21fc252a -0, 682500, 276480, 0xf8a8e8ee -0, 690000, 276480, 0x770d4a8d +0, 615000, 276480, 0xefab7ffd +0, 622500, 276480, 0x6b9fbc80 +0, 630000, 276480, 0xe4bdbd1e +0, 637500, 276480, 0x225a56c0 +0, 645000, 276480, 0xf58b1b7c +0, 652500, 276480, 0xbaffcdcc +0, 660000, 276480, 0xeb6eb88f +0, 667500, 276480, 0xdb753d35 +0, 675000, 276480, 0xea80a82e +0, 682500, 276480, 0x2aae902a +0, 690000, 276480, 0x9b9ee961 0, 697500, 276480, 0xaa12b6fd -0, 705000, 276480, 0xdc7221a8 -0, 712500, 276480, 0x487eeb30 -0, 720000, 276480, 0x1e74f2db -0, 727500, 276480, 0x40ae2bc3 -0, 735000, 276480, 0x9ca9b930 -0, 742500, 276480, 0x9fb19b0f -0, 750000, 276480, 0x7bdf836c -0, 757500, 276480, 0x1e607ba7 -0, 765000, 276480, 0xbd96578b -0, 772500, 276480, 0x2124bf07 -0, 780000, 276480, 0x4895e27a +0, 705000, 276480, 0x50c31e73 +0, 712500, 276480, 0xdd9fb89f +0, 720000, 276480, 0xaf82399a +0, 727500, 276480, 0x7ce5f23c +0, 735000, 276480, 0x5aaa7519 +0, 742500, 276480, 0xe45a5599 +0, 750000, 276480, 0x704411fb +0, 757500, 276480, 0x9d7430a1 +0, 765000, 276480, 0x2c230702 +0, 772500, 276480, 0x4a4f76cd +0, 780000, 276480, 0x27f54854 0, 787500, 276480, 0x694d76e3 -0, 795000, 276480, 0xe70df513 -0, 802500, 276480, 0xcacafe6b -0, 810000, 276480, 0x64087748 -0, 817500, 276480, 0x571fda23 -0, 825000, 276480, 0x8c86cbe9 -0, 832500, 276480, 0xc8ea4671 -0, 840000, 276480, 0xbfb74300 -0, 847500, 276480, 0xbe1e3770 -0, 855000, 276480, 0x757a0232 -0, 862500, 276480, 0xa5f50c84 -0, 870000, 276480, 0x6d95f808 +0, 795000, 276480, 0x525463e2 +0, 802500, 276480, 0x819898f9 +0, 810000, 276480, 0xeeed00fc +0, 817500, 276480, 0xb6f99ee3 +0, 825000, 276480, 0xefc83107 +0, 832500, 276480, 0xbb22e024 +0, 840000, 276480, 0x300f922a +0, 847500, 276480, 0x826fc3bd +0, 855000, 276480, 0x679a53f8 +0, 862500, 276480, 0x976c9e93 +0, 870000, 276480, 0xb194656e 0, 877500, 276480, 0xf002c5ca -0, 885000, 276480, 0x1a2abb26 -0, 892500, 276480, 0x6cf69bf2 +0, 885000, 276480, 0xb243dda5 +0, 892500, 276480, 0x1700efbb 0, 900000, 276480, 0x8f316c66