From 6cecd63005b29a1dc3a5104e6ac85fd112705122 Mon Sep 17 00:00:00 2001
From: David Conrad <lessen42@gmail.com>
Date: Tue, 14 Apr 2009 19:26:33 +0000
Subject: [PATCH] VC1: Do qpel when needed for both MVs in a B frame

Originally committed as revision 18511 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/dsputil.c |   3 ++
 libavcodec/dsputil.h |   1 +
 libavcodec/vc1.c     |  18 +++++--
 libavcodec/vc1dsp.c  | 136 +++++++++++++++++++++++++++++++--------------------
 4 files changed, 100 insertions(+), 58 deletions(-)

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 3e7bfc3..e2e1371 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2737,6 +2737,9 @@ void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
     put_pixels8_c(dst, src, stride, 8);
 }
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
 
 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 345dca9..7ef6e6a 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -485,6 +485,7 @@ typedef struct DSPContext {
      * last argument is actually round value instead of height
      */
     op_pixels_func put_vc1_mspel_pixels_tab[16];
+    op_pixels_func avg_vc1_mspel_pixels_tab[16];
 
     /* intrax8 functions */
     void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index ef6f8b2..7d0387e 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -1904,11 +1904,21 @@ static void vc1_interp_mc(VC1Context *v)
         srcY += s->mspel * (1 + s->linesize);
     }
 
-    mx >>= 1;
-    my >>= 1;
-    dxy = ((my & 1) << 1) | (mx & 1);
+    if(s->mspel) {
+        dxy = ((my & 3) << 2) | (mx & 3);
+        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
+        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
+        srcY += s->linesize * 8;
+        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
+        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+    } else { // hpel mc
+        dxy = (my & 2) | ((mx & 2) >> 1);
 
-    dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+        if(!v->rnd)
+            dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+        else
+            dsp->avg_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+    }
 
     if(s->flags & CODEC_FLAG_GRAY) return;
     /* Chroma MC always uses qpel blilinear */
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index ab7a9a6..3effbb7 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -348,69 +348,80 @@ static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int
 
 /** Function used to do motion compensation with bicubic interpolation
  */
-static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)
-{
-    int     i, j;
-
-    if (vmode) { /* Horizontal filter to apply */
-        int r;
-
-        if (hmode) { /* Vertical filter to apply, output to tmp */
-            static const int shift_value[] = { 0, 5, 1, 5 };
-            int              shift = (shift_value[hmode]+shift_value[vmode])>>1;
-            int16_t          tmp[11*8], *tptr = tmp;
-
-            r = (1<<(shift-1)) + rnd-1;
-
-            src -= 1;
-            for(j = 0; j < 8; j++) {
-                for(i = 0; i < 11; i++)
-                    tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;
-                src += stride;
-                tptr += 11;
-            }
-
-            r = 64-rnd;
-            tptr = tmp+1;
-            for(j = 0; j < 8; j++) {
-                for(i = 0; i < 8; i++)
-                    dst[i] = av_clip_uint8((vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);
-                dst += stride;
-                tptr += 11;
-            }
-
-            return;
-        }
-        else { /* No horizontal filter, output 8 lines to dst */
-            r = 1-rnd;
-
-            for(j = 0; j < 8; j++) {
-                for(i = 0; i < 8; i++)
-                    dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, stride, vmode, r));
-                src += stride;
-                dst += stride;
-            }
-            return;
-        }
-    }
-
-    /* Horizontal mode with no vertical mode */
-    for(j = 0; j < 8; j++) {
-        for(i = 0; i < 8; i++)
-            dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, 1, hmode, rnd));
-        dst += stride;
-        src += stride;
-    }
+#define VC1_MSPEL_MC(OP, OPNAME)\
+static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\
+{\
+    int     i, j;\
+\
+    if (vmode) { /* Horizontal filter to apply */\
+        int r;\
+\
+        if (hmode) { /* Vertical filter to apply, output to tmp */\
+            static const int shift_value[] = { 0, 5, 1, 5 };\
+            int              shift = (shift_value[hmode]+shift_value[vmode])>>1;\
+            int16_t          tmp[11*8], *tptr = tmp;\
+\
+            r = (1<<(shift-1)) + rnd-1;\
+\
+            src -= 1;\
+            for(j = 0; j < 8; j++) {\
+                for(i = 0; i < 11; i++)\
+                    tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\
+                src += stride;\
+                tptr += 11;\
+            }\
+\
+            r = 64-rnd;\
+            tptr = tmp+1;\
+            for(j = 0; j < 8; j++) {\
+                for(i = 0; i < 8; i++)\
+                    OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\
+                dst += stride;\
+                tptr += 11;\
+            }\
+\
+            return;\
+        }\
+        else { /* No horizontal filter, output 8 lines to dst */\
+            r = 1-rnd;\
+\
+            for(j = 0; j < 8; j++) {\
+                for(i = 0; i < 8; i++)\
+                    OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\
+                src += stride;\
+                dst += stride;\
+            }\
+            return;\
+        }\
+    }\
+\
+    /* Horizontal mode with no vertical mode */\
+    for(j = 0; j < 8; j++) {\
+        for(i = 0; i < 8; i++)\
+            OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\
+        dst += stride;\
+        src += stride;\
+    }\
 }
 
+#define op_put(a, b) a = av_clip_uint8(b)
+#define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
+
+VC1_MSPEL_MC(op_put, put_)
+VC1_MSPEL_MC(op_avg, avg_)
+
 /* pixel functions - really are entry points to vc1_mspel_mc */
 
 /* this one is defined in dsputil.c */
 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
 
 #define PUT_VC1_MSPEL(a, b)\
 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
-     vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
+     put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
+}\
+static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
+     avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
 }
 
 PUT_VC1_MSPEL(1, 0)
@@ -456,4 +467,21 @@ void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
     dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c;
     dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
     dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
+
+    dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_c;
+    dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c;
+    dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c;
+    dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c;
+    dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c;
+    dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c;
+    dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c;
 }
-- 
2.7.4