From 6cecd63005b29a1dc3a5104e6ac85fd112705122 Mon Sep 17 00:00:00 2001 From: David Conrad Date: Tue, 14 Apr 2009 19:26:33 +0000 Subject: [PATCH] VC1: Do qpel when needed for both MVs in a B frame Originally committed as revision 18511 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/dsputil.c | 3 ++ libavcodec/dsputil.h | 1 + libavcodec/vc1.c | 18 +++++-- libavcodec/vc1dsp.c | 136 +++++++++++++++++++++++++++++++-------------------- 4 files changed, 100 insertions(+), 58 deletions(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 3e7bfc3..e2e1371 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2737,6 +2737,9 @@ void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { put_pixels8_c(dst, src, stride, 8); } +void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { + avg_pixels8_c(dst, src, stride, 8); +} #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 345dca9..7ef6e6a 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -485,6 +485,7 @@ typedef struct DSPContext { * last argument is actually round value instead of height */ op_pixels_func put_vc1_mspel_pixels_tab[16]; + op_pixels_func avg_vc1_mspel_pixels_tab[16]; /* intrax8 functions */ void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c index ef6f8b2..7d0387e 100644 --- a/libavcodec/vc1.c +++ b/libavcodec/vc1.c @@ -1904,11 +1904,21 @@ static void vc1_interp_mc(VC1Context *v) srcY += s->mspel * (1 + s->linesize); } - mx >>= 1; - my >>= 1; - dxy = ((my & 1) << 1) | (mx & 1); + if(s->mspel) { + dxy = ((my & 3) << 2) | (mx & 3); + dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd); + dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd); + srcY += s->linesize * 8; + dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd); + dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); + } else { // hpel mc + dxy = (my & 2) | ((mx & 2) >> 1); - dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16); + if(!v->rnd) + dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16); + else + dsp->avg_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16); + } if(s->flags & CODEC_FLAG_GRAY) return; /* Chroma MC always uses qpel blilinear */ diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index ab7a9a6..3effbb7 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -348,69 +348,80 @@ static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int /** Function used to do motion compensation with bicubic interpolation */ -static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd) -{ - int i, j; - - if (vmode) { /* Horizontal filter to apply */ - int r; - - if (hmode) { /* Vertical filter to apply, output to tmp */ - static const int shift_value[] = { 0, 5, 1, 5 }; - int shift = (shift_value[hmode]+shift_value[vmode])>>1; - int16_t tmp[11*8], *tptr = tmp; - - r = (1<<(shift-1)) + rnd-1; - - src -= 1; - for(j = 0; j < 8; j++) { - for(i = 0; i < 11; i++) - tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift; - src += stride; - tptr += 11; - } - - r = 64-rnd; - tptr = tmp+1; - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) - dst[i] = av_clip_uint8((vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7); - dst += stride; - tptr += 11; - } - - return; - } - else { /* No horizontal filter, output 8 lines to dst */ - r = 1-rnd; - - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) - dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, stride, vmode, r)); - src += stride; - dst += stride; - } - return; - } - } - - /* Horizontal mode with no vertical mode */ - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) - dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, 1, hmode, rnd)); - dst += stride; - src += stride; - } +#define VC1_MSPEL_MC(OP, OPNAME)\ +static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\ +{\ + int i, j;\ +\ + if (vmode) { /* Horizontal filter to apply */\ + int r;\ +\ + if (hmode) { /* Vertical filter to apply, output to tmp */\ + static const int shift_value[] = { 0, 5, 1, 5 };\ + int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ + int16_t tmp[11*8], *tptr = tmp;\ +\ + r = (1<<(shift-1)) + rnd-1;\ +\ + src -= 1;\ + for(j = 0; j < 8; j++) {\ + for(i = 0; i < 11; i++)\ + tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\ + src += stride;\ + tptr += 11;\ + }\ +\ + r = 64-rnd;\ + tptr = tmp+1;\ + for(j = 0; j < 8; j++) {\ + for(i = 0; i < 8; i++)\ + OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\ + dst += stride;\ + tptr += 11;\ + }\ +\ + return;\ + }\ + else { /* No horizontal filter, output 8 lines to dst */\ + r = 1-rnd;\ +\ + for(j = 0; j < 8; j++) {\ + for(i = 0; i < 8; i++)\ + OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\ + src += stride;\ + dst += stride;\ + }\ + return;\ + }\ + }\ +\ + /* Horizontal mode with no vertical mode */\ + for(j = 0; j < 8; j++) {\ + for(i = 0; i < 8; i++)\ + OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\ + dst += stride;\ + src += stride;\ + }\ } +#define op_put(a, b) a = av_clip_uint8(b) +#define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1 + +VC1_MSPEL_MC(op_put, put_) +VC1_MSPEL_MC(op_avg, avg_) + /* pixel functions - really are entry points to vc1_mspel_mc */ /* this one is defined in dsputil.c */ void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); +void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); #define PUT_VC1_MSPEL(a, b)\ static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ - vc1_mspel_mc(dst, src, stride, a, b, rnd); \ + put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ +}\ +static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ + avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ } PUT_VC1_MSPEL(1, 0) @@ -456,4 +467,21 @@ void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c; dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c; dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c; + + dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_c; + dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_c; + dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_c; + dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_c; + dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_c; + dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_c; + dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_c; + dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_c; + dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_c; + dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_c; + dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c; + dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c; + dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c; + dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c; + dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c; + dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c; } -- 2.7.4