Use updated motion compensation routines.
authorFalk Hüffner <mellum@users.sourceforge.net>
Wed, 3 Jul 2002 01:09:44 +0000 (01:09 +0000)
committerFalk Hüffner <mellum@users.sourceforge.net>
Wed, 3 Jul 2002 01:09:44 +0000 (01:09 +0000)
Originally committed as revision 713 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/alpha/dsputil_alpha.c
libavcodec/alpha/pixops.h [deleted file]

index 942eef7..db11d52 100644 (file)
@@ -105,132 +105,137 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
 }
 #endif
 
-/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1
-   Since the immediate result could be greater than 255, we do the
-   shift first. The result is too low by one if the bytes were both
-   odd, so we need to add (l1 & l2) & BYTE_VEC(0x01).  */
-static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2)
+static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
 {
-    UINT64 correction = (l1 & l2) & BYTE_VEC(0x01);
-    l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
-    l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
-    return l1 + l2 + correction;
+    return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
 }
 
-/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1
-   The '1' only has an effect when one byte is even and the other odd,
-   i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01).
-   Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01).  */
-static inline UINT64 avg2(UINT64 l1, UINT64 l2)
+static inline uint64_t avg2(uint64_t a, uint64_t b)
 {
-    UINT64 correction = (l1 | l2) & BYTE_VEC(0x01);
-    l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
-    l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
-    return l1 + l2 + correction;
+    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);    
 }
 
-static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
+static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
 {
-    UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
-             + ((l2 & ~BYTE_VEC(0x03)) >> 2)
-             + ((l3 & ~BYTE_VEC(0x03)) >> 2)
-             + ((l4 & ~BYTE_VEC(0x03)) >> 2);
-    UINT64 r2 = ((  (l1 & BYTE_VEC(0x03))
-                 + (l2 & BYTE_VEC(0x03))
-                 + (l3 & BYTE_VEC(0x03))
-                 + (l4 & BYTE_VEC(0x03))
-                 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+               + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+               + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+               + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+    uint64_t r2 = ((  (l1 & BYTE_VEC(0x03))
+                   + (l2 & BYTE_VEC(0x03))
+                   + (l3 & BYTE_VEC(0x03))
+                   + (l4 & BYTE_VEC(0x03))
+                   + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
     return r1 + r2;
 }
 
-static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
+static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2,
+                                  uint64_t l3, uint64_t l4)
 {
-    UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
-             + ((l2 & ~BYTE_VEC(0x03)) >> 2)
-             + ((l3 & ~BYTE_VEC(0x03)) >> 2)
-             + ((l4 & ~BYTE_VEC(0x03)) >> 2);
-    UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
-                + (l2 & BYTE_VEC(0x03))
-                + (l3 & BYTE_VEC(0x03))
-                + (l4 & BYTE_VEC(0x03))
-                + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
+    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+               + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+               + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+               + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+    uint64_t r2 = ((  (l1 & BYTE_VEC(0x03))
+                   + (l2 & BYTE_VEC(0x03))
+                   + (l3 & BYTE_VEC(0x03))
+                   + (l4 & BYTE_VEC(0x03))
+                   + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
     return r1 + r2;
 }
 
-#define PIXOPNAME(suffix) put ## suffix
-#define BTYPE UINT8
-#define AVG2 avg2
-#define AVG4 avg4
-#define STORE(l, b) stq(l, b)
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
-#undef AVG2
-#undef AVG4
-#undef STORE
+#define OP(LOAD, STORE, INCR)                  \
+    do {                                       \
+       STORE(LOAD(pixels), block);             \
+       pixels += line_size;                    \
+       block += INCR;                          \
+    } while (--h)
 
-#define PIXOPNAME(suffix) put_no_rnd ## suffix
-#define BTYPE UINT8
-#define AVG2 avg2_no_rnd
-#define AVG4 avg4_no_rnd
-#define STORE(l, b) stq(l, b)
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
-#undef AVG2
-#undef AVG4
-#undef STORE
+#define OP_X2(LOAD, STORE, INCR)                               \
+    do {                                                       \
+       uint64_t pix1, pix2;                                    \
+                                                               \
+       pix1 = LOAD(pixels);                                    \
+       pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);        \
+       STORE(AVG2(pix1, pix2), block);                         \
+       pixels += line_size;                                    \
+       block += INCR;                                          \
+    } while (--h)
 
-/* The following functions are untested.  */
-#if 0
+#define OP_Y2(LOAD, STORE, INCR)               \
+    do {                                       \
+       uint64_t pix = LOAD(pixels);            \
+       do {                                    \
+           uint64_t next_pix;                  \
+                                               \
+           pixels += line_size;                \
+           next_pix = LOAD(pixels);            \
+           STORE(AVG2(pix, next_pix), block);  \
+           block += INCR;                      \
+           pix = next_pix;                     \
+       } while (--h);                          \
+    } while (0)
+
+#define OP_XY2(LOAD, STORE, INCR)                                      \
+    do {                                                               \
+       uint64_t pix1 = LOAD(pixels);                                   \
+       uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);       \
+                                                                       \
+       do {                                                            \
+           uint64_t next_pix1, next_pix2;                              \
+                                                                       \
+           pixels += line_size;                                        \
+           next_pix1 = LOAD(pixels);                                   \
+           next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56);  \
+                                                                       \
+           STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);       \
+                                                                       \
+           block += INCR;                                              \
+           pix1 = next_pix1;                                           \
+           pix2 = next_pix2;                                           \
+       } while (--h);                                                  \
+    } while (0)
 
-#define PIXOPNAME(suffix) avg ## suffix
-#define BTYPE UINT8
+#define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR)              \
+static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block,            \
+                                             const uint8_t *pixels,    \
+                                             int line_size, int h)     \
+{                                                                      \
+    if ((size_t) pixels & 0x7) {                                       \
+       OPKIND(uldq, STORE, INCR);                                      \
+    } else {                                                           \
+       OPKIND(ldq, STORE, INCR);                                       \
+    }                                                                  \
+}
+
+#define PIXOP(BTYPE, OPNAME, STORE, INCR)              \
+    MAKE_OP(BTYPE, OPNAME, ,    OP,     STORE, INCR);  \
+    MAKE_OP(BTYPE, OPNAME, _x2,         OP_X2,  STORE, INCR);  \
+    MAKE_OP(BTYPE, OPNAME, _y2,         OP_Y2,  STORE, INCR);  \
+    MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR);
+
+/* Rounding primitives.  */
 #define AVG2 avg2
 #define AVG4 avg4
+#define STORE(l, b) stq(l, b)
+PIXOP(uint8_t, put, STORE, line_size);
+
+#undef STORE
 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
+PIXOP(uint8_t, avg, STORE, line_size);
+
+/* Not rounding primitives.  */
 #undef AVG2
 #undef AVG4
 #undef STORE
-
-#define PIXOPNAME(suffix) avg_no_rnd ## suffix
-#define BTYPE UINT8
 #define AVG2 avg2_no_rnd
 #define AVG4 avg4_no_rnd
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
-#undef AVG2
-#undef AVG4
-#undef STORE
+#define STORE(l, b) stq(l, b)
+PIXOP(uint8_t, put_no_rnd, STORE, line_size);
 
-#define PIXOPNAME(suffix) sub ## suffix
-#define BTYPE DCTELEM
-#define AVG2 avg2
-#define AVG4 avg4
-#define STORE(l, block) do {           \
-    UINT64 xxx = l;                    \
-    (block)[0] -= (xxx >>  0) & 0xff;  \
-    (block)[1] -= (xxx >>  8) & 0xff;  \
-    (block)[2] -= (xxx >> 16) & 0xff;  \
-    (block)[3] -= (xxx >> 24) & 0xff;  \
-    (block)[4] -= (xxx >> 32) & 0xff;  \
-    (block)[5] -= (xxx >> 40) & 0xff;  \
-    (block)[6] -= (xxx >> 48) & 0xff;  \
-    (block)[7] -= (xxx >> 56) & 0xff;  \
-} while (0)
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
-#undef AVG2
-#undef AVG4
 #undef STORE
-
-#endif
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(uint8_t, avg_no_rnd, STORE, line_size);
 
 void dsputil_init_alpha(void)
 {
@@ -244,6 +249,16 @@ void dsputil_init_alpha(void)
     put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp;
     put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp;
 
+    avg_pixels_tab[0] = avg_pixels_axp;
+    avg_pixels_tab[1] = avg_pixels_x2_axp;
+    avg_pixels_tab[2] = avg_pixels_y2_axp;
+    avg_pixels_tab[3] = avg_pixels_xy2_axp;
+
+    avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp;
+    avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp;
+    avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp;
+    avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp;
+
     /* amask clears all bits that correspond to present features.  */
     if (amask(AMASK_MVI) == 0) {
         put_pixels_clamped = put_pixels_clamped_mvi_asm;
diff --git a/libavcodec/alpha/pixops.h b/libavcodec/alpha/pixops.h
deleted file mode 100644 (file)
index 118d7ae..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Alpha optimized DSP utils
- * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-/* This file is intended to be #included with proper definitions of
- * PIXOPNAME, BTYPE, AVG2, AVG4 and STORE.  */
-
-static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels,
-                                  int line_size, int h)
-{
-    if ((size_t) pixels & 0x7) {
-       do {
-           STORE(uldq(pixels), block);
-           pixels += line_size;
-           block  += line_size;
-       } while (--h);
-    } else {
-       do {
-           STORE(ldq(pixels), block);
-           pixels += line_size;
-           block  += line_size;
-       } while (--h);
-    }
-}
-
-static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels,
-                                     int line_size, int h)
-{
-    if ((size_t) pixels & 0x7) {
-       do {
-           UINT64 pix1, pix2;
-
-           pix1 = uldq(pixels);
-           pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
-           STORE(AVG2(pix1, pix2), block);
-           pixels += line_size;
-           block += line_size;
-       } while (--h);
-    } else {
-       do {
-           UINT64 pix1, pix2;
-
-           pix1 = ldq(pixels);
-           pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
-           STORE(AVG2(pix1, pix2), block);
-           pixels += line_size;
-           block += line_size;
-       } while (--h);
-    }
-}
-
-static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels,
-                                     int line_size, int h)
-{
-    if ((size_t) pixels & 0x7) {
-       UINT64 pix = uldq(pixels);
-       do {
-           UINT64 next_pix;
-
-           pixels += line_size;
-           next_pix = uldq(pixels);
-           STORE(AVG2(pix, next_pix), block);
-           block += line_size;
-           pix = next_pix;
-       } while (--h);
-    } else {
-       UINT64 pix = ldq(pixels);
-       do {
-           UINT64 next_pix;
-
-           pixels += line_size;
-           next_pix = ldq(pixels);
-           STORE(AVG2(pix, next_pix), block);
-           block += line_size;
-           pix = next_pix;
-       } while (--h);
-    }
-}
-
-/* This could be further sped up by recycling AVG4 intermediate
-  results from the previous loop pass.  */
-static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels,
-                                      int line_size, int h)
-{
-    if ((size_t) pixels & 0x7) {
-       UINT64 pix1 = uldq(pixels);
-       UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
-
-       do {
-           UINT64 next_pix1, next_pix2;
-
-           pixels += line_size;
-           next_pix1 = uldq(pixels);
-           next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
-
-           STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
-
-           block += line_size;
-           pix1 = next_pix1;
-           pix2 = next_pix2;
-       } while (--h);
-    } else {
-       UINT64 pix1 = ldq(pixels);
-       UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
-
-       do {
-           UINT64 next_pix1, next_pix2;
-
-           pixels += line_size;
-           next_pix1 = ldq(pixels);
-           next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
-
-           STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
-
-           block += line_size;
-           pix1 = next_pix1;
-           pix2 = next_pix2;
-       } while (--h);
-    }
-}