From c7f7bfc9e3a3150ba72bc34366c13fb2210c66ac Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 13 Nov 2013 20:39:56 +0100 Subject: [PATCH] Remove all Alpha architecture optimizations Alpha has been end-of-lifed and no more test machines are available. --- libavcodec/alpha/Makefile | 9 - libavcodec/alpha/asm.h | 186 ------------------ libavcodec/alpha/dsputil_alpha.c | 157 ---------------- libavcodec/alpha/dsputil_alpha.h | 49 ----- libavcodec/alpha/dsputil_alpha_asm.S | 167 ---------------- libavcodec/alpha/hpeldsp_alpha.c | 213 --------------------- libavcodec/alpha/hpeldsp_alpha.h | 28 --- libavcodec/alpha/hpeldsp_alpha_asm.S | 124 ------------ libavcodec/alpha/motion_est_alpha.c | 345 ---------------------------------- libavcodec/alpha/motion_est_mvi_asm.S | 179 ------------------ libavcodec/alpha/mpegvideo_alpha.c | 110 ----------- libavcodec/alpha/regdef.h | 77 -------- libavcodec/alpha/simple_idct_alpha.c | 303 ----------------------------- libavcodec/avcodec.h | 2 + libavcodec/dct-test.c | 6 - libavcodec/dsputil.c | 2 - libavcodec/dsputil.h | 1 - libavcodec/hpeldsp.c | 2 - libavcodec/hpeldsp.h | 1 - libavcodec/mpegvideo.c | 2 - libavcodec/mpegvideo.h | 1 - libavcodec/msmpeg4.c | 5 +- libavcodec/options_table.h | 2 + libavcodec/version.h | 3 + 24 files changed, 8 insertions(+), 1966 deletions(-) delete mode 100644 libavcodec/alpha/Makefile delete mode 100644 libavcodec/alpha/asm.h delete mode 100644 libavcodec/alpha/dsputil_alpha.c delete mode 100644 libavcodec/alpha/dsputil_alpha.h delete mode 100644 libavcodec/alpha/dsputil_alpha_asm.S delete mode 100644 libavcodec/alpha/hpeldsp_alpha.c delete mode 100644 libavcodec/alpha/hpeldsp_alpha.h delete mode 100644 libavcodec/alpha/hpeldsp_alpha_asm.S delete mode 100644 libavcodec/alpha/motion_est_alpha.c delete mode 100644 libavcodec/alpha/motion_est_mvi_asm.S delete mode 100644 libavcodec/alpha/mpegvideo_alpha.c delete mode 100644 libavcodec/alpha/regdef.h delete mode 100644 libavcodec/alpha/simple_idct_alpha.c diff --git a/libavcodec/alpha/Makefile b/libavcodec/alpha/Makefile deleted file mode 100644 index 6f22137..0000000 --- a/libavcodec/alpha/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -OBJS += alpha/dsputil_alpha.o \ - alpha/dsputil_alpha_asm.o \ - alpha/motion_est_alpha.o \ - alpha/motion_est_mvi_asm.o \ - alpha/simple_idct_alpha.o \ - -OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \ - alpha/hpeldsp_alpha_asm.o -OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o diff --git a/libavcodec/alpha/asm.h b/libavcodec/alpha/asm.h deleted file mode 100644 index ab4cfcc..0000000 --- a/libavcodec/alpha/asm.h +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_ALPHA_ASM_H -#define AVCODEC_ALPHA_ASM_H - -#include - -#include "libavutil/common.h" - -#if AV_GCC_VERSION_AT_LEAST(2,96) -# define likely(x) __builtin_expect((x) != 0, 1) -# define unlikely(x) __builtin_expect((x) != 0, 0) -#else -# define likely(x) (x) -# define unlikely(x) (x) -#endif - -#define AMASK_BWX (1 << 0) -#define AMASK_FIX (1 << 1) -#define AMASK_CIX (1 << 2) -#define AMASK_MVI (1 << 8) - -static inline uint64_t BYTE_VEC(uint64_t x) -{ - x |= x << 8; - x |= x << 16; - x |= x << 32; - return x; -} -static inline uint64_t WORD_VEC(uint64_t x) -{ - x |= x << 16; - x |= x << 32; - return x; -} - -#define sextw(x) ((int16_t) (x)) - -#ifdef __GNUC__ -#define ldq(p) \ - (((const union { \ - uint64_t __l; \ - __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \ - } *) (p))->__l) -#define ldl(p) \ - (((const union { \ - int32_t __l; \ - __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \ - } *) (p))->__l) -#define stq(l, p) \ - do { \ - (((union { \ - uint64_t __l; \ - __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \ - } *) (p))->__l) = l; \ - } while (0) -#define stl(l, p) \ - do { \ - (((union { \ - int32_t __l; \ - __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \ - } *) (p))->__l) = l; \ - } while (0) -struct unaligned_long { uint64_t l; } __attribute__((packed)); -#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) -#define uldq(a) (((const struct unaligned_long *) (a))->l) - -#if AV_GCC_VERSION_AT_LEAST(3,3) -#define prefetch(p) __builtin_prefetch((p), 0, 1) -#define prefetch_en(p) __builtin_prefetch((p), 0, 0) -#define prefetch_m(p) __builtin_prefetch((p), 1, 1) -#define prefetch_men(p) __builtin_prefetch((p), 1, 0) -#define cmpbge __builtin_alpha_cmpbge -/* Avoid warnings. */ -#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) -#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) -#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) -#define zap __builtin_alpha_zap -#define zapnot __builtin_alpha_zapnot -#define amask __builtin_alpha_amask -#define implver __builtin_alpha_implver -#define rpcc __builtin_alpha_rpcc -#else -#define prefetch(p) __asm__ volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") -#define prefetch_en(p) __asm__ volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") -#define prefetch_m(p) __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") -#define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") -#define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extql(a, b) ({ uint64_t __r; __asm__ ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extwl(a, b) ({ uint64_t __r; __asm__ ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extqh(a, b) ({ uint64_t __r; __asm__ ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define zap(a, b) ({ uint64_t __r; __asm__ ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define amask(a) ({ uint64_t __r; __asm__ ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) -#define implver() ({ uint64_t __r; __asm__ ("implver %0" : "=r" (__r)); __r; }) -#define rpcc() ({ uint64_t __r; __asm__ volatile ("rpcc %0" : "=r" (__r)); __r; }) -#endif -#define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory") - -#if AV_GCC_VERSION_AT_LEAST(3,3) && defined(__alpha_max__) -#define minub8 __builtin_alpha_minub8 -#define minsb8 __builtin_alpha_minsb8 -#define minuw4 __builtin_alpha_minuw4 -#define minsw4 __builtin_alpha_minsw4 -#define maxub8 __builtin_alpha_maxub8 -#define maxsb8 __builtin_alpha_maxsb8 -#define maxuw4 __builtin_alpha_maxuw4 -#define maxsw4 __builtin_alpha_maxsw4 -#define perr __builtin_alpha_perr -#define pklb __builtin_alpha_pklb -#define pkwb __builtin_alpha_pkwb -#define unpkbl __builtin_alpha_unpkbl -#define unpkbw __builtin_alpha_unpkbw -#else -#define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define perr(a, b) ({ uint64_t __r; __asm__ (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) -#define pklb(a) ({ uint64_t __r; __asm__ (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define pkwb(a) ({ uint64_t __r; __asm__ (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define unpkbl(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define unpkbw(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#endif - -#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ - -#include -#define ldq(p) (*(const uint64_t *) (p)) -#define ldl(p) (*(const int32_t *) (p)) -#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) -#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0) -#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) -#define uldq(a) (*(const __unaligned uint64_t *) (a)) -#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) -#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) -#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) -#define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b) -#define zap(a, b) asm ("zap %a0,%a1,%v0", a, b) -#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) -#define amask(a) asm ("amask %a0,%v0", a) -#define implver() asm ("implver %v0") -#define rpcc() asm ("rpcc %v0") -#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) -#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) -#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) -#define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b) -#define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b) -#define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b) -#define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b) -#define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b) -#define perr(a, b) asm ("perr %a0,%a1,%v0", a, b) -#define pklb(a) asm ("pklb %a0,%v0", a) -#define pkwb(a) asm ("pkwb %a0,%v0", a) -#define unpkbl(a) asm ("unpkbl %a0,%v0", a) -#define unpkbw(a) asm ("unpkbw %a0,%v0", a) -#define wh64(a) asm ("wh64 %a0", a) - -#else -#error "Unknown compiler!" -#endif - -#endif /* AVCODEC_ALPHA_ASM_H */ diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c deleted file mode 100644 index 7a41cb8..0000000 --- a/libavcodec/alpha/dsputil_alpha.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/attributes.h" -#include "libavcodec/dsputil.h" -#include "dsputil_alpha.h" -#include "asm.h" - -void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, - int line_size); -void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, - int line_size); - -#if 0 -/* These functions were the base for the optimized assembler routines, - and remain here for documentation purposes. */ -static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, - ptrdiff_t line_size) -{ - int i = 8; - uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ - - do { - uint64_t shorts0, shorts1; - - shorts0 = ldq(block); - shorts0 = maxsw4(shorts0, 0); - shorts0 = minsw4(shorts0, clampmask); - stl(pkwb(shorts0), pixels); - - shorts1 = ldq(block + 4); - shorts1 = maxsw4(shorts1, 0); - shorts1 = minsw4(shorts1, clampmask); - stl(pkwb(shorts1), pixels + 4); - - pixels += line_size; - block += 8; - } while (--i); -} - -void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels, - ptrdiff_t line_size) -{ - int h = 8; - /* Keep this function a leaf function by generating the constants - manually (mainly for the hack value ;-). */ - uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ - uint64_t signmask = zap(-1, 0x33); - signmask ^= signmask >> 1; /* 0x8000800080008000 */ - - do { - uint64_t shorts0, pix0, signs0; - uint64_t shorts1, pix1, signs1; - - shorts0 = ldq(block); - shorts1 = ldq(block + 4); - - pix0 = unpkbw(ldl(pixels)); - /* Signed subword add (MMX paddw). */ - signs0 = shorts0 & signmask; - shorts0 &= ~signmask; - shorts0 += pix0; - shorts0 ^= signs0; - /* Clamp. */ - shorts0 = maxsw4(shorts0, 0); - shorts0 = minsw4(shorts0, clampmask); - - /* Next 4. */ - pix1 = unpkbw(ldl(pixels + 4)); - signs1 = shorts1 & signmask; - shorts1 &= ~signmask; - shorts1 += pix1; - shorts1 ^= signs1; - shorts1 = maxsw4(shorts1, 0); - shorts1 = minsw4(shorts1, clampmask); - - stl(pkwb(shorts0), pixels); - stl(pkwb(shorts1), pixels + 4); - - pixels += line_size; - block += 8; - } while (--h); -} -#endif - -static void clear_blocks_axp(int16_t *blocks) { - uint64_t *p = (uint64_t *) blocks; - int n = sizeof(int16_t) * 6 * 64; - - do { - p[0] = 0; - p[1] = 0; - p[2] = 0; - p[3] = 0; - p[4] = 0; - p[5] = 0; - p[6] = 0; - p[7] = 0; - p += 8; - n -= 8 * 8; - } while (n); -} - -av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx) -{ - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - - if (!high_bit_depth) { - c->clear_blocks = clear_blocks_axp; - } - - /* amask clears all bits that correspond to present features. */ - if (amask(AMASK_MVI) == 0) { - c->put_pixels_clamped = put_pixels_clamped_mvi_asm; - c->add_pixels_clamped = add_pixels_clamped_mvi_asm; - - if (!high_bit_depth) - c->get_pixels = get_pixels_mvi; - c->diff_pixels = diff_pixels_mvi; - c->sad[0] = pix_abs16x16_mvi_asm; - c->sad[1] = pix_abs8x8_mvi; - c->pix_abs[0][0] = pix_abs16x16_mvi_asm; - c->pix_abs[1][0] = pix_abs8x8_mvi; - c->pix_abs[0][1] = pix_abs16x16_x2_mvi; - c->pix_abs[0][2] = pix_abs16x16_y2_mvi; - c->pix_abs[0][3] = pix_abs16x16_xy2_mvi; - } - - put_pixels_clamped_axp_p = c->put_pixels_clamped; - add_pixels_clamped_axp_p = c->add_pixels_clamped; - - if (avctx->bits_per_raw_sample <= 8 && - (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) { - c->idct_put = ff_simple_idct_put_axp; - c->idct_add = ff_simple_idct_add_axp; - c->idct = ff_simple_idct_axp; - } -} diff --git a/libavcodec/alpha/dsputil_alpha.h b/libavcodec/alpha/dsputil_alpha.h deleted file mode 100644 index d976c18..0000000 --- a/libavcodec/alpha/dsputil_alpha.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_ALPHA_DSPUTIL_ALPHA_H -#define AVCODEC_ALPHA_DSPUTIL_ALPHA_H - -#include -#include - -void ff_simple_idct_axp(int16_t *block); -void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block); -void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); - -void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, - int line_size); -void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, - int line_size); -extern void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, - int line_size); -extern void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, - int line_size); - -void get_pixels_mvi(int16_t *restrict block, - const uint8_t *restrict pixels, int line_size); -void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); -int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); -int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); - - -#endif /* AVCODEC_ALPHA_DSPUTIL_ALPHA_H */ diff --git a/libavcodec/alpha/dsputil_alpha_asm.S b/libavcodec/alpha/dsputil_alpha_asm.S deleted file mode 100644 index afe02cc..0000000 --- a/libavcodec/alpha/dsputil_alpha_asm.S +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* - * These functions are scheduled for pca56. They should work - * reasonably on ev6, though. - */ - -#include "regdef.h" - - .set noat - .set noreorder - .arch pca56 - .text - -/************************************************************************ - * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, - * int line_size) - */ - .align 6 - .globl put_pixels_clamped_mvi_asm - .ent put_pixels_clamped_mvi_asm -put_pixels_clamped_mvi_asm: - .frame sp, 0, ra - .prologue 0 - - lda t8, -1 - lda t9, 8 # loop counter - zap t8, 0xaa, t8 # 00ff00ff00ff00ff - - .align 4 -1: ldq t0, 0(a0) - ldq t1, 8(a0) - ldq t2, 16(a0) - ldq t3, 24(a0) - - maxsw4 t0, zero, t0 - subq t9, 2, t9 - maxsw4 t1, zero, t1 - lda a0, 32(a0) - - maxsw4 t2, zero, t2 - addq a1, a2, ta - maxsw4 t3, zero, t3 - minsw4 t0, t8, t0 - - minsw4 t1, t8, t1 - minsw4 t2, t8, t2 - minsw4 t3, t8, t3 - pkwb t0, t0 - - pkwb t1, t1 - pkwb t2, t2 - pkwb t3, t3 - stl t0, 0(a1) - - stl t1, 4(a1) - addq ta, a2, a1 - stl t2, 0(ta) - stl t3, 4(ta) - - bne t9, 1b - ret - .end put_pixels_clamped_mvi_asm - -/************************************************************************ - * void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, - * int line_size) - */ - .align 6 - .globl add_pixels_clamped_mvi_asm - .ent add_pixels_clamped_mvi_asm -add_pixels_clamped_mvi_asm: - .frame sp, 0, ra - .prologue 0 - - lda t1, -1 - lda th, 8 - zap t1, 0x33, tg - nop - - srl tg, 1, t0 - xor tg, t0, tg # 0x8000800080008000 - zap t1, 0xaa, tf # 0x00ff00ff00ff00ff - - .align 4 -1: ldl t1, 0(a1) # pix0 (try to hit cache line soon) - ldl t4, 4(a1) # pix1 - addq a1, a2, te # pixels += line_size - ldq t0, 0(a0) # shorts0 - - ldl t7, 0(te) # pix2 (try to hit cache line soon) - ldl ta, 4(te) # pix3 - ldq t3, 8(a0) # shorts1 - ldq t6, 16(a0) # shorts2 - - ldq t9, 24(a0) # shorts3 - unpkbw t1, t1 # 0 0 (quarter/op no.) - and t0, tg, t2 # 0 1 - unpkbw t4, t4 # 1 0 - - bic t0, tg, t0 # 0 2 - unpkbw t7, t7 # 2 0 - and t3, tg, t5 # 1 1 - addq t0, t1, t0 # 0 3 - - xor t0, t2, t0 # 0 4 - unpkbw ta, ta # 3 0 - and t6, tg, t8 # 2 1 - maxsw4 t0, zero, t0 # 0 5 - - bic t3, tg, t3 # 1 2 - bic t6, tg, t6 # 2 2 - minsw4 t0, tf, t0 # 0 6 - addq t3, t4, t3 # 1 3 - - pkwb t0, t0 # 0 7 - xor t3, t5, t3 # 1 4 - maxsw4 t3, zero, t3 # 1 5 - addq t6, t7, t6 # 2 3 - - xor t6, t8, t6 # 2 4 - and t9, tg, tb # 3 1 - minsw4 t3, tf, t3 # 1 6 - bic t9, tg, t9 # 3 2 - - maxsw4 t6, zero, t6 # 2 5 - addq t9, ta, t9 # 3 3 - stl t0, 0(a1) # 0 8 - minsw4 t6, tf, t6 # 2 6 - - xor t9, tb, t9 # 3 4 - maxsw4 t9, zero, t9 # 3 5 - lda a0, 32(a0) # block += 16; - pkwb t3, t3 # 1 7 - - minsw4 t9, tf, t9 # 3 6 - subq th, 2, th - pkwb t6, t6 # 2 7 - pkwb t9, t9 # 3 7 - - stl t3, 4(a1) # 1 8 - addq te, a2, a1 # pixels += line_size - stl t6, 0(te) # 2 8 - stl t9, 4(te) # 3 8 - - bne th, 1b - ret - .end add_pixels_clamped_mvi_asm diff --git a/libavcodec/alpha/hpeldsp_alpha.c b/libavcodec/alpha/hpeldsp_alpha.c deleted file mode 100644 index 144fa22..0000000 --- a/libavcodec/alpha/hpeldsp_alpha.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/attributes.h" -#include "libavcodec/hpeldsp.h" -#include "hpeldsp_alpha.h" -#include "asm.h" - -static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) -{ - return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); -} - -static inline uint64_t avg2(uint64_t a, uint64_t b) -{ - return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); -} - -#if 0 -/* The XY2 routines basically utilize this scheme, but reuse parts in - each iteration. */ -static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) -{ - uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); - return r1 + r2; -} -#endif - -#define OP(LOAD, STORE) \ - do { \ - STORE(LOAD(pixels), block); \ - pixels += line_size; \ - block += line_size; \ - } while (--h) - -#define OP_X2(LOAD, STORE) \ - do { \ - uint64_t pix1, pix2; \ - \ - pix1 = LOAD(pixels); \ - pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ - STORE(AVG2(pix1, pix2), block); \ - pixels += line_size; \ - block += line_size; \ - } while (--h) - -#define OP_Y2(LOAD, STORE) \ - do { \ - uint64_t pix = LOAD(pixels); \ - do { \ - uint64_t next_pix; \ - \ - pixels += line_size; \ - next_pix = LOAD(pixels); \ - STORE(AVG2(pix, next_pix), block); \ - block += line_size; \ - pix = next_pix; \ - } while (--h); \ - } while (0) - -#define OP_XY2(LOAD, STORE) \ - do { \ - uint64_t pix1 = LOAD(pixels); \ - uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ - uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ - + (pix2 & BYTE_VEC(0x03)); \ - uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ - + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ - \ - do { \ - uint64_t npix1, npix2; \ - uint64_t npix_l, npix_h; \ - uint64_t avg; \ - \ - pixels += line_size; \ - npix1 = LOAD(pixels); \ - npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ - npix_l = (npix1 & BYTE_VEC(0x03)) \ - + (npix2 & BYTE_VEC(0x03)); \ - npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ - + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ - avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ - + pix_h + npix_h; \ - STORE(avg, block); \ - \ - block += line_size; \ - pix_l = npix_l; \ - pix_h = npix_h; \ - } while (--h); \ - } while (0) - -#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ -static void OPNAME ## _pixels ## SUFF ## _axp \ - (uint8_t *restrict block, const uint8_t *restrict pixels, \ - ptrdiff_t line_size, int h) \ -{ \ - if ((size_t) pixels & 0x7) { \ - OPKIND(uldq, STORE); \ - } else { \ - OPKIND(ldq, STORE); \ - } \ -} \ - \ -static void OPNAME ## _pixels16 ## SUFF ## _axp \ - (uint8_t *restrict block, const uint8_t *restrict pixels, \ - ptrdiff_t line_size, int h) \ -{ \ - OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ - OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ -} - -#define PIXOP(OPNAME, STORE) \ - MAKE_OP(OPNAME, , OP, STORE) \ - MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ - MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ - MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) - -/* Rounding primitives. */ -#define AVG2 avg2 -#define AVG4 avg4 -#define AVG4_ROUNDER BYTE_VEC(0x02) -#define STORE(l, b) stq(l, b) -PIXOP(put, STORE); - -#undef STORE -#define STORE(l, b) stq(AVG2(l, ldq(b)), b); -PIXOP(avg, STORE); - -/* Not rounding primitives. */ -#undef AVG2 -#undef AVG4 -#undef AVG4_ROUNDER -#undef STORE -#define AVG2 avg2_no_rnd -#define AVG4 avg4_no_rnd -#define AVG4_ROUNDER BYTE_VEC(0x01) -#define STORE(l, b) stq(l, b) -PIXOP(put_no_rnd, STORE); - -#undef STORE -#define STORE(l, b) stq(AVG2(l, ldq(b)), b); -PIXOP(avg_no_rnd, STORE); - -static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h) -{ - put_pixels_axp_asm(block, pixels, line_size, h); - put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); -} - -av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags) -{ - c->put_pixels_tab[0][0] = put_pixels16_axp_asm; - c->put_pixels_tab[0][1] = put_pixels16_x2_axp; - c->put_pixels_tab[0][2] = put_pixels16_y2_axp; - c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; - - c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; - c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; - - c->avg_pixels_tab[0][0] = avg_pixels16_axp; - c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; - c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; - - c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp; - c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp; - c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp; - c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp; - - c->put_pixels_tab[1][0] = put_pixels_axp_asm; - c->put_pixels_tab[1][1] = put_pixels_x2_axp; - c->put_pixels_tab[1][2] = put_pixels_y2_axp; - c->put_pixels_tab[1][3] = put_pixels_xy2_axp; - - c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; - c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; - - c->avg_pixels_tab[1][0] = avg_pixels_axp; - c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; - c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; - c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; -} diff --git a/libavcodec/alpha/hpeldsp_alpha.h b/libavcodec/alpha/hpeldsp_alpha.h deleted file mode 100644 index e44ff50..0000000 --- a/libavcodec/alpha/hpeldsp_alpha.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H -#define AVCODEC_ALPHA_HPELDSP_ALPHA_H - -#include -#include - -void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); - -#endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */ diff --git a/libavcodec/alpha/hpeldsp_alpha_asm.S b/libavcodec/alpha/hpeldsp_alpha_asm.S deleted file mode 100644 index b23d24f..0000000 --- a/libavcodec/alpha/hpeldsp_alpha_asm.S +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* - * These functions are scheduled for pca56. They should work - * reasonably on ev6, though. - */ - -#include "regdef.h" - - .set noat - .set noreorder - .arch pca56 - .text - -/************************************************************************ - * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, - * int line_size, int h) - */ - .align 6 - .globl put_pixels_axp_asm - .ent put_pixels_axp_asm -put_pixels_axp_asm: - .frame sp, 0, ra - .prologue 0 - - and a1, 7, t0 - beq t0, $aligned - - .align 4 -$unaligned: - ldq_u t0, 0(a1) - ldq_u t1, 8(a1) - addq a1, a2, a1 - nop - - ldq_u t2, 0(a1) - ldq_u t3, 8(a1) - addq a1, a2, a1 - nop - - ldq_u t4, 0(a1) - ldq_u t5, 8(a1) - addq a1, a2, a1 - nop - - ldq_u t6, 0(a1) - ldq_u t7, 8(a1) - extql t0, a1, t0 - addq a1, a2, a1 - - extqh t1, a1, t1 - addq a0, a2, t8 - extql t2, a1, t2 - addq t8, a2, t9 - - extqh t3, a1, t3 - addq t9, a2, ta - extql t4, a1, t4 - or t0, t1, t0 - - extqh t5, a1, t5 - or t2, t3, t2 - extql t6, a1, t6 - or t4, t5, t4 - - extqh t7, a1, t7 - or t6, t7, t6 - stq t0, 0(a0) - stq t2, 0(t8) - - stq t4, 0(t9) - subq a3, 4, a3 - stq t6, 0(ta) - addq ta, a2, a0 - - bne a3, $unaligned - ret - - .align 4 -$aligned: - ldq t0, 0(a1) - addq a1, a2, a1 - ldq t1, 0(a1) - addq a1, a2, a1 - - ldq t2, 0(a1) - addq a1, a2, a1 - ldq t3, 0(a1) - - addq a0, a2, t4 - addq a1, a2, a1 - addq t4, a2, t5 - subq a3, 4, a3 - - stq t0, 0(a0) - addq t5, a2, t6 - stq t1, 0(t4) - addq t6, a2, a0 - - stq t2, 0(t5) - stq t3, 0(t6) - - bne a3, $aligned - ret - .end put_pixels_axp_asm diff --git a/libavcodec/alpha/motion_est_alpha.c b/libavcodec/alpha/motion_est_alpha.c deleted file mode 100644 index e062e90..0000000 --- a/libavcodec/alpha/motion_est_alpha.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "dsputil_alpha.h" -#include "asm.h" - -void get_pixels_mvi(int16_t *restrict block, - const uint8_t *restrict pixels, int line_size) -{ - int h = 8; - - do { - uint64_t p; - - p = ldq(pixels); - stq(unpkbw(p), block); - stq(unpkbw(p >> 32), block + 4); - - pixels += line_size; - block += 8; - } while (--h); -} - -void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride) { - int h = 8; - uint64_t mask = 0x4040; - - mask |= mask << 16; - mask |= mask << 32; - do { - uint64_t x, y, c, d, a; - uint64_t signs; - - x = ldq(s1); - y = ldq(s2); - c = cmpbge(x, y); - d = x - y; - a = zap(mask, c); /* We use 0x4040404040404040 here... */ - d += 4 * a; /* ...so we can use s4addq here. */ - signs = zap(-1, c); - - stq(unpkbw(d) | (unpkbw(signs) << 8), block); - stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4); - - s1 += stride; - s2 += stride; - block += 8; - } while (--h); -} - -static inline uint64_t avg2(uint64_t a, uint64_t b) -{ - return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); -} - -static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) -{ - uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); - return r1 + r2; -} - -int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) -{ - int result = 0; - - if ((size_t) pix2 & 0x7) { - /* works only when pix2 is actually unaligned */ - do { /* do 8 pixel a time */ - uint64_t p1, p2; - - p1 = ldq(pix1); - p2 = uldq(pix2); - result += perr(p1, p2); - - pix1 += line_size; - pix2 += line_size; - } while (--h); - } else { - do { - uint64_t p1, p2; - - p1 = ldq(pix1); - p2 = ldq(pix2); - result += perr(p1, p2); - - pix1 += line_size; - pix2 += line_size; - } while (--h); - } - - return result; -} - -#if 0 /* now done in assembly */ -int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) -{ - int result = 0; - int h = 16; - - if ((size_t) pix2 & 0x7) { - /* works only when pix2 is actually unaligned */ - do { /* do 16 pixel a time */ - uint64_t p1_l, p1_r, p2_l, p2_r; - uint64_t t; - - p1_l = ldq(pix1); - p1_r = ldq(pix1 + 8); - t = ldq_u(pix2 + 8); - p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2); - p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); - pix1 += line_size; - pix2 += line_size; - - result += perr(p1_l, p2_l) - + perr(p1_r, p2_r); - } while (--h); - } else { - do { - uint64_t p1_l, p1_r, p2_l, p2_r; - - p1_l = ldq(pix1); - p1_r = ldq(pix1 + 8); - p2_l = ldq(pix2); - p2_r = ldq(pix2 + 8); - pix1 += line_size; - pix2 += line_size; - - result += perr(p1_l, p2_l) - + perr(p1_r, p2_r); - } while (--h); - } - - return result; -} -#endif - -int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) -{ - int result = 0; - uint64_t disalign = (size_t) pix2 & 0x7; - - switch (disalign) { - case 0: - do { - uint64_t p1_l, p1_r, p2_l, p2_r; - uint64_t l, r; - - p1_l = ldq(pix1); - p1_r = ldq(pix1 + 8); - l = ldq(pix2); - r = ldq(pix2 + 8); - p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56)); - p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56)); - pix1 += line_size; - pix2 += line_size; - - result += perr(p1_l, p2_l) - + perr(p1_r, p2_r); - } while (--h); - break; - case 7: - /* |.......l|lllllllr|rrrrrrr*| - This case is special because disalign1 would be 8, which - gets treated as 0 by extqh. At least it is a bit faster - that way :) */ - do { - uint64_t p1_l, p1_r, p2_l, p2_r; - uint64_t l, m, r; - - p1_l = ldq(pix1); - p1_r = ldq(pix1 + 8); - l = ldq_u(pix2); - m = ldq_u(pix2 + 8); - r = ldq_u(pix2 + 16); - p2_l = avg2(extql(l, disalign) | extqh(m, disalign), m); - p2_r = avg2(extql(m, disalign) | extqh(r, disalign), r); - pix1 += line_size; - pix2 += line_size; - - result += perr(p1_l, p2_l) - + perr(p1_r, p2_r); - } while (--h); - break; - default: - do { - uint64_t disalign1 = disalign + 1; - uint64_t p1_l, p1_r, p2_l, p2_r; - uint64_t l, m, r; - - p1_l = ldq(pix1); - p1_r = ldq(pix1 + 8); - l = ldq_u(pix2); - m = ldq_u(pix2 + 8); - r = ldq_u(pix2 + 16); - p2_l = avg2(extql(l, disalign) | extqh(m, disalign), - extql(l, disalign1) | extqh(m, disalign1)); - p2_r = avg2(extql(m, disalign) | extqh(r, disalign), - extql(m, disalign1) | extqh(r, disalign1)); - pix1 += line_size; - pix2 += line_size; - - result += perr(p1_l, p2_l) - + perr(p1_r, p2_r); - } while (--h); - break; - } - return result; -} - -int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) -{ - int result = 0; - - if ((size_t) pix2 & 0x7) { - uint64_t t, p2_l, p2_r; - t = ldq_u(pix2 + 8); - p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2); - p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); - - do { - uint64_t p1_l, p1_r, np2_l, np2_r; - uint64_t t; - - p1_l = ldq(pix1); - p1_r = ldq(pix1 + 8); - pix2 += line_size; - t = ldq_u(pix2 + 8); - np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2); - np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); - - result += perr(p1_l, avg2(p2_l, np2_l)) - + perr(p1_r, avg2(p2_r, np2_r)); - - pix1 += line_size; - p2_l = np2_l; - p2_r = np2_r; - - } while (--h); - } else { - uint64_t p2_l, p2_r; - p2_l = ldq(pix2); - p2_r = ldq(pix2 + 8); - do { - uint64_t p1_l, p1_r, np2_l, np2_r; - - p1_l = ldq(pix1); - p1_r = ldq(pix1 + 8); - pix2 += line_size; - np2_l = ldq(pix2); - np2_r = ldq(pix2 + 8); - - result += perr(p1_l, avg2(p2_l, np2_l)) - + perr(p1_r, avg2(p2_r, np2_r)); - - pix1 += line_size; - p2_l = np2_l; - p2_r = np2_r; - } while (--h); - } - return result; -} - -int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) -{ - int result = 0; - - uint64_t p1_l, p1_r; - uint64_t p2_l, p2_r, p2_x; - - p1_l = ldq(pix1); - p1_r = ldq(pix1 + 8); - - if ((size_t) pix2 & 0x7) { /* could be optimized a lot */ - p2_l = uldq(pix2); - p2_r = uldq(pix2 + 8); - p2_x = (uint64_t) pix2[16] << 56; - } else { - p2_l = ldq(pix2); - p2_r = ldq(pix2 + 8); - p2_x = ldq(pix2 + 16) << 56; - } - - do { - uint64_t np1_l, np1_r; - uint64_t np2_l, np2_r, np2_x; - - pix1 += line_size; - pix2 += line_size; - - np1_l = ldq(pix1); - np1_r = ldq(pix1 + 8); - - if ((size_t) pix2 & 0x7) { /* could be optimized a lot */ - np2_l = uldq(pix2); - np2_r = uldq(pix2 + 8); - np2_x = (uint64_t) pix2[16] << 56; - } else { - np2_l = ldq(pix2); - np2_r = ldq(pix2 + 8); - np2_x = ldq(pix2 + 16) << 56; - } - - result += perr(p1_l, - avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56), - np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56))) - + perr(p1_r, - avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x), - np2_r, (np2_r >> 8) | ((uint64_t) np2_x))); - - p1_l = np1_l; - p1_r = np1_r; - p2_l = np2_l; - p2_r = np2_r; - p2_x = np2_x; - } while (--h); - - return result; -} diff --git a/libavcodec/alpha/motion_est_mvi_asm.S b/libavcodec/alpha/motion_est_mvi_asm.S deleted file mode 100644 index 7fe4e16..0000000 --- a/libavcodec/alpha/motion_est_mvi_asm.S +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "regdef.h" - -/* Some nicer register names. */ -#define ta t10 -#define tb t11 -#define tc t12 -#define td AT -/* Danger: these overlap with the argument list and the return value */ -#define te a5 -#define tf a4 -#define tg a3 -#define th v0 - - .set noat - .set noreorder - .arch pca56 - .text - -/***************************************************************************** - * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) - * - * This code is written with a pca56 in mind. For ev6, one should - * really take the increased latency of 3 cycles for MVI instructions - * into account. - * - * It is important to keep the loading and first use of a register as - * far apart as possible, because if a register is accessed before it - * has been fetched from memory, the CPU will stall. - */ - .align 4 - .globl pix_abs16x16_mvi_asm - .ent pix_abs16x16_mvi_asm -pix_abs16x16_mvi_asm: - .frame sp, 0, ra, 0 - .prologue 0 - - and a2, 7, t0 - clr v0 - beq t0, $aligned - .align 4 -$unaligned: - /* Registers: - line 0: - t0: left_u -> left lo -> left - t1: mid - t2: right_u -> right hi -> right - t3: ref left - t4: ref right - line 1: - t5: left_u -> left lo -> left - t6: mid - t7: right_u -> right hi -> right - t8: ref left - t9: ref right - temp: - ta: left hi - tb: right lo - tc: error left - td: error right */ - - /* load line 0 */ - ldq_u t0, 0(a2) # left_u - ldq_u t1, 8(a2) # mid - ldq_u t2, 16(a2) # right_u - ldq t3, 0(a1) # ref left - ldq t4, 8(a1) # ref right - addq a1, a3, a1 # pix1 - addq a2, a3, a2 # pix2 - /* load line 1 */ - ldq_u t5, 0(a2) # left_u - ldq_u t6, 8(a2) # mid - ldq_u t7, 16(a2) # right_u - ldq t8, 0(a1) # ref left - ldq t9, 8(a1) # ref right - addq a1, a3, a1 # pix1 - addq a2, a3, a2 # pix2 - /* calc line 0 */ - extql t0, a2, t0 # left lo - extqh t1, a2, ta # left hi - extql t1, a2, tb # right lo - or t0, ta, t0 # left - extqh t2, a2, t2 # right hi - perr t3, t0, tc # error left - or t2, tb, t2 # right - perr t4, t2, td # error right - addq v0, tc, v0 # add error left - addq v0, td, v0 # add error left - /* calc line 1 */ - extql t5, a2, t5 # left lo - extqh t6, a2, ta # left hi - extql t6, a2, tb # right lo - or t5, ta, t5 # left - extqh t7, a2, t7 # right hi - perr t8, t5, tc # error left - or t7, tb, t7 # right - perr t9, t7, td # error right - addq v0, tc, v0 # add error left - addq v0, td, v0 # add error left - /* loop */ - subq a4, 2, a4 # h -= 2 - bne a4, $unaligned - ret - - .align 4 -$aligned: - /* load line 0 */ - ldq t0, 0(a2) # left - ldq t1, 8(a2) # right - addq a2, a3, a2 # pix2 - ldq t2, 0(a1) # ref left - ldq t3, 8(a1) # ref right - addq a1, a3, a1 # pix1 - /* load line 1 */ - ldq t4, 0(a2) # left - ldq t5, 8(a2) # right - addq a2, a3, a2 # pix2 - ldq t6, 0(a1) # ref left - ldq t7, 8(a1) # ref right - addq a1, a3, a1 # pix1 - /* load line 2 */ - ldq t8, 0(a2) # left - ldq t9, 8(a2) # right - addq a2, a3, a2 # pix2 - ldq ta, 0(a1) # ref left - ldq tb, 8(a1) # ref right - addq a1, a3, a1 # pix1 - /* load line 3 */ - ldq tc, 0(a2) # left - ldq td, 8(a2) # right - addq a2, a3, a2 # pix2 - ldq te, 0(a1) # ref left - ldq a0, 8(a1) # ref right - /* calc line 0 */ - perr t0, t2, t0 # error left - addq a1, a3, a1 # pix1 - perr t1, t3, t1 # error right - addq v0, t0, v0 # add error left - /* calc line 1 */ - perr t4, t6, t0 # error left - addq v0, t1, v0 # add error right - perr t5, t7, t1 # error right - addq v0, t0, v0 # add error left - /* calc line 2 */ - perr t8, ta, t0 # error left - addq v0, t1, v0 # add error right - perr t9, tb, t1 # error right - addq v0, t0, v0 # add error left - /* calc line 3 */ - perr tc, te, t0 # error left - addq v0, t1, v0 # add error right - perr td, a0, t1 # error right - addq v0, t0, v0 # add error left - addq v0, t1, v0 # add error right - /* loop */ - subq a4, 4, a4 # h -= 4 - bne a4, $aligned - ret - .end pix_abs16x16_mvi_asm diff --git a/libavcodec/alpha/mpegvideo_alpha.c b/libavcodec/alpha/mpegvideo_alpha.c deleted file mode 100644 index 5557159..0000000 --- a/libavcodec/alpha/mpegvideo_alpha.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Alpha optimized DSP utils - * Copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/attributes.h" -#include "libavcodec/mpegvideo.h" -#include "asm.h" - -static void dct_unquantize_h263_axp(int16_t *block, int n_coeffs, - uint64_t qscale, uint64_t qadd) -{ - uint64_t qmul = qscale << 1; - uint64_t correction = WORD_VEC(qmul * 255 >> 8); - int i; - - qadd = WORD_VEC(qadd); - - for(i = 0; i <= n_coeffs; block += 4, i += 4) { - uint64_t levels, negmask, zeros, add, sub; - - levels = ldq(block); - if (levels == 0) - continue; - -#ifdef __alpha_max__ - /* I don't think the speed difference justifies runtime - detection. */ - negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ - negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ -#else - negmask = cmpbge(WORD_VEC(0x7fff), levels); - negmask &= (negmask >> 1) | (1 << 7); - negmask = zap(-1, negmask); -#endif - - zeros = cmpbge(0, levels); - zeros &= zeros >> 1; - /* zeros |= zeros << 1 is not needed since qadd <= 255, so - zapping the lower byte suffices. */ - - levels *= qmul; - levels -= correction & (negmask << 16); - - add = qadd & ~negmask; - sub = qadd & negmask; - /* Set qadd to 0 for levels == 0. */ - add = zap(add, zeros); - levels += add; - levels -= sub; - - stq(levels, block); - } -} - -static void dct_unquantize_h263_intra_axp(MpegEncContext *s, int16_t *block, - int n, int qscale) -{ - int n_coeffs; - uint64_t qadd; - int16_t block0 = block[0]; - - if (!s->h263_aic) { - if (n < 4) - block0 *= s->y_dc_scale; - else - block0 *= s->c_dc_scale; - qadd = (qscale - 1) | 1; - } else { - qadd = 0; - } - - if(s->ac_pred) - n_coeffs = 63; - else - n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; - - dct_unquantize_h263_axp(block, n_coeffs, qscale, qadd); - - block[0] = block0; -} - -static void dct_unquantize_h263_inter_axp(MpegEncContext *s, int16_t *block, - int n, int qscale) -{ - int n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; - dct_unquantize_h263_axp(block, n_coeffs, qscale, (qscale - 1) | 1); -} - -av_cold void ff_MPV_common_init_axp(MpegEncContext *s) -{ - s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; - s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp; -} diff --git a/libavcodec/alpha/regdef.h b/libavcodec/alpha/regdef.h deleted file mode 100644 index 1005943..0000000 --- a/libavcodec/alpha/regdef.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Alpha optimized DSP utils - * copyright (c) 2002 Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* Some BSDs don't seem to have regdef.h... sigh */ -#ifndef AVCODEC_ALPHA_REGDEF_H -#define AVCODEC_ALPHA_REGDEF_H - -#define v0 $0 /* function return value */ - -#define t0 $1 /* temporary registers (caller-saved) */ -#define t1 $2 -#define t2 $3 -#define t3 $4 -#define t4 $5 -#define t5 $6 -#define t6 $7 -#define t7 $8 - -#define s0 $9 /* saved-registers (callee-saved registers) */ -#define s1 $10 -#define s2 $11 -#define s3 $12 -#define s4 $13 -#define s5 $14 -#define s6 $15 -#define fp s6 /* frame-pointer (s6 in frame-less procedures) */ - -#define a0 $16 /* argument registers (caller-saved) */ -#define a1 $17 -#define a2 $18 -#define a3 $19 -#define a4 $20 -#define a5 $21 - -#define t8 $22 /* more temps (caller-saved) */ -#define t9 $23 -#define t10 $24 -#define t11 $25 -#define ra $26 /* return address register */ -#define t12 $27 - -#define pv t12 /* procedure-variable register */ -#define AT $at /* assembler temporary */ -#define gp $29 /* global pointer */ -#define sp $30 /* stack pointer */ -#define zero $31 /* reads as zero, writes are noops */ - -/* Some nicer register names. */ -#define ta t10 -#define tb t11 -#define tc t12 -#define td AT -/* Danger: these overlap with the argument list and the return value */ -#define te a5 -#define tf a4 -#define tg a3 -#define th v0 - -#endif /* AVCODEC_ALPHA_REGDEF_H */ diff --git a/libavcodec/alpha/simple_idct_alpha.c b/libavcodec/alpha/simple_idct_alpha.c deleted file mode 100644 index 262ad71..0000000 --- a/libavcodec/alpha/simple_idct_alpha.c +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Simple IDCT (Alpha optimized) - * - * Copyright (c) 2001 Michael Niedermayer - * - * based upon some outcommented C code from mpeg2dec (idct_mmx.c - * written by Aaron Holtzman ) - * - * Alpha optimizations by Måns Rullgård - * and Falk Hueffner - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "dsputil_alpha.h" -#include "asm.h" - -// cos(i * M_PI / 16) * sqrt(2) * (1 << 14) -// W4 is actually exactly 16384, but using 16383 works around -// accumulating rounding errors for some encoders -#define W1 22725 -#define W2 21407 -#define W3 19266 -#define W4 16383 -#define W5 12873 -#define W6 8867 -#define W7 4520 -#define ROW_SHIFT 11 -#define COL_SHIFT 20 - -/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ -static inline int idct_row(int16_t *row) -{ - int a0, a1, a2, a3, b0, b1, b2, b3, t; - uint64_t l, r, t2; - l = ldq(row); - r = ldq(row + 4); - - if (l == 0 && r == 0) - return 0; - - a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1)); - - if (((l & ~0xffffUL) | r) == 0) { - a0 >>= ROW_SHIFT; - t2 = (uint16_t) a0; - t2 |= t2 << 16; - t2 |= t2 << 32; - - stq(t2, row); - stq(t2, row + 4); - return 1; - } - - a1 = a0; - a2 = a0; - a3 = a0; - - t = extwl(l, 4); /* row[2] */ - if (t != 0) { - t = sextw(t); - a0 += W2 * t; - a1 += W6 * t; - a2 -= W6 * t; - a3 -= W2 * t; - } - - t = extwl(r, 0); /* row[4] */ - if (t != 0) { - t = sextw(t); - a0 += W4 * t; - a1 -= W4 * t; - a2 -= W4 * t; - a3 += W4 * t; - } - - t = extwl(r, 4); /* row[6] */ - if (t != 0) { - t = sextw(t); - a0 += W6 * t; - a1 -= W2 * t; - a2 += W2 * t; - a3 -= W6 * t; - } - - t = extwl(l, 2); /* row[1] */ - if (t != 0) { - t = sextw(t); - b0 = W1 * t; - b1 = W3 * t; - b2 = W5 * t; - b3 = W7 * t; - } else { - b0 = 0; - b1 = 0; - b2 = 0; - b3 = 0; - } - - t = extwl(l, 6); /* row[3] */ - if (t) { - t = sextw(t); - b0 += W3 * t; - b1 -= W7 * t; - b2 -= W1 * t; - b3 -= W5 * t; - } - - - t = extwl(r, 2); /* row[5] */ - if (t) { - t = sextw(t); - b0 += W5 * t; - b1 -= W1 * t; - b2 += W7 * t; - b3 += W3 * t; - } - - t = extwl(r, 6); /* row[7] */ - if (t) { - t = sextw(t); - b0 += W7 * t; - b1 -= W5 * t; - b2 += W3 * t; - b3 -= W1 * t; - } - - row[0] = (a0 + b0) >> ROW_SHIFT; - row[1] = (a1 + b1) >> ROW_SHIFT; - row[2] = (a2 + b2) >> ROW_SHIFT; - row[3] = (a3 + b3) >> ROW_SHIFT; - row[4] = (a3 - b3) >> ROW_SHIFT; - row[5] = (a2 - b2) >> ROW_SHIFT; - row[6] = (a1 - b1) >> ROW_SHIFT; - row[7] = (a0 - b0) >> ROW_SHIFT; - - return 2; -} - -static inline void idct_col(int16_t *col) -{ - int a0, a1, a2, a3, b0, b1, b2, b3; - - col[0] += (1 << (COL_SHIFT - 1)) / W4; - - a0 = W4 * col[8 * 0]; - a1 = W4 * col[8 * 0]; - a2 = W4 * col[8 * 0]; - a3 = W4 * col[8 * 0]; - - if (col[8 * 2]) { - a0 += W2 * col[8 * 2]; - a1 += W6 * col[8 * 2]; - a2 -= W6 * col[8 * 2]; - a3 -= W2 * col[8 * 2]; - } - - if (col[8 * 4]) { - a0 += W4 * col[8 * 4]; - a1 -= W4 * col[8 * 4]; - a2 -= W4 * col[8 * 4]; - a3 += W4 * col[8 * 4]; - } - - if (col[8 * 6]) { - a0 += W6 * col[8 * 6]; - a1 -= W2 * col[8 * 6]; - a2 += W2 * col[8 * 6]; - a3 -= W6 * col[8 * 6]; - } - - if (col[8 * 1]) { - b0 = W1 * col[8 * 1]; - b1 = W3 * col[8 * 1]; - b2 = W5 * col[8 * 1]; - b3 = W7 * col[8 * 1]; - } else { - b0 = 0; - b1 = 0; - b2 = 0; - b3 = 0; - } - - if (col[8 * 3]) { - b0 += W3 * col[8 * 3]; - b1 -= W7 * col[8 * 3]; - b2 -= W1 * col[8 * 3]; - b3 -= W5 * col[8 * 3]; - } - - if (col[8 * 5]) { - b0 += W5 * col[8 * 5]; - b1 -= W1 * col[8 * 5]; - b2 += W7 * col[8 * 5]; - b3 += W3 * col[8 * 5]; - } - - if (col[8 * 7]) { - b0 += W7 * col[8 * 7]; - b1 -= W5 * col[8 * 7]; - b2 += W3 * col[8 * 7]; - b3 -= W1 * col[8 * 7]; - } - - col[8 * 0] = (a0 + b0) >> COL_SHIFT; - col[8 * 7] = (a0 - b0) >> COL_SHIFT; - col[8 * 1] = (a1 + b1) >> COL_SHIFT; - col[8 * 6] = (a1 - b1) >> COL_SHIFT; - col[8 * 2] = (a2 + b2) >> COL_SHIFT; - col[8 * 5] = (a2 - b2) >> COL_SHIFT; - col[8 * 3] = (a3 + b3) >> COL_SHIFT; - col[8 * 4] = (a3 - b3) >> COL_SHIFT; -} - -/* If all rows but the first one are zero after row transformation, - all rows will be identical after column transformation. */ -static inline void idct_col2(int16_t *col) -{ - int i; - uint64_t l, r; - - for (i = 0; i < 8; ++i) { - int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4; - - a0 *= W4; - col[i] = a0 >> COL_SHIFT; - } - - l = ldq(col + 0 * 4); r = ldq(col + 1 * 4); - stq(l, col + 2 * 4); stq(r, col + 3 * 4); - stq(l, col + 4 * 4); stq(r, col + 5 * 4); - stq(l, col + 6 * 4); stq(r, col + 7 * 4); - stq(l, col + 8 * 4); stq(r, col + 9 * 4); - stq(l, col + 10 * 4); stq(r, col + 11 * 4); - stq(l, col + 12 * 4); stq(r, col + 13 * 4); - stq(l, col + 14 * 4); stq(r, col + 15 * 4); -} - -void ff_simple_idct_axp(int16_t *block) -{ - - int i; - int rowsZero = 1; /* all rows except row 0 zero */ - int rowsConstant = 1; /* all rows consist of a constant value */ - - for (i = 0; i < 8; i++) { - int sparseness = idct_row(block + 8 * i); - - if (i > 0 && sparseness > 0) - rowsZero = 0; - if (sparseness == 2) - rowsConstant = 0; - } - - if (rowsZero) { - idct_col2(block); - } else if (rowsConstant) { - idct_col(block); - for (i = 0; i < 8; i += 2) { - uint64_t v = (uint16_t) block[0]; - uint64_t w = (uint16_t) block[8]; - - v |= v << 16; - w |= w << 16; - v |= v << 32; - w |= w << 32; - stq(v, block + 0 * 4); - stq(v, block + 1 * 4); - stq(w, block + 2 * 4); - stq(w, block + 3 * 4); - block += 4 * 4; - } - } else { - for (i = 0; i < 8; i++) - idct_col(block + i); - } -} - -void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block) -{ - ff_simple_idct_axp(block); - put_pixels_clamped_axp_p(block, dest, line_size); -} - -void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block) -{ - ff_simple_idct_axp(block); - add_pixels_clamped_axp_p(block, dest, line_size); -} diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 8b121b7..0548f71 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2442,7 +2442,9 @@ typedef struct AVCodecContext { #define FF_IDCT_SIMPLEVIS 18 #define FF_IDCT_FAAN 20 #define FF_IDCT_SIMPLENEON 22 +#if FF_API_ARCH_ALPHA #define FF_IDCT_SIMPLEALPHA 23 +#endif /** * bits per sample/pixel from the demuxer (needed for huffyuv). diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c index 716af65..d71f7a3 100644 --- a/libavcodec/dct-test.c +++ b/libavcodec/dct-test.c @@ -61,8 +61,6 @@ void ff_simple_idct_armv5te(int16_t *data); void ff_simple_idct_armv6(int16_t *data); void ff_simple_idct_neon(int16_t *data); -void ff_simple_idct_axp(int16_t *data); - struct algo { const char *name; void (*func)(int16_t *block); @@ -136,10 +134,6 @@ static const struct algo idct_tab[] = { { "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON }, #endif -#if ARCH_ALPHA - { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM }, -#endif - { 0 } }; diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 5839eb3..fbdd5ad 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2666,8 +2666,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) } - if (ARCH_ALPHA) - ff_dsputil_init_alpha(c, avctx); if (ARCH_ARM) ff_dsputil_init_arm(c, avctx); if (ARCH_BFIN) diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index b110f08..f571a99 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -311,7 +311,6 @@ int ff_check_alignment(void); void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); -void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c index 852cb5a..598f956 100644 --- a/libavcodec/hpeldsp.c +++ b/libavcodec/hpeldsp.c @@ -54,8 +54,6 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) hpel_funcs(avg, [3], 2); hpel_funcs(avg_no_rnd,, 16); - if (ARCH_ALPHA) - ff_hpeldsp_init_alpha(c, flags); if (ARCH_ARM) ff_hpeldsp_init_arm(c, flags); if (ARCH_BFIN) diff --git a/libavcodec/hpeldsp.h b/libavcodec/hpeldsp.h index fc57103..8501e3d 100644 --- a/libavcodec/hpeldsp.h +++ b/libavcodec/hpeldsp.h @@ -94,7 +94,6 @@ typedef struct HpelDSPContext { void ff_hpeldsp_init(HpelDSPContext *c, int flags); -void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags); void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags); void ff_hpeldsp_init_bfin(HpelDSPContext *c, int flags); void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags); diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index ae458ab..d609b54 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -165,8 +165,6 @@ av_cold int ff_dct_common_init(MpegEncContext *s) s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact; s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c; - if (ARCH_ALPHA) - ff_MPV_common_init_axp(s); if (ARCH_ARM) ff_MPV_common_init_arm(s); if (ARCH_BFIN) diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index e492af4..d249b4e 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -785,7 +785,6 @@ int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet); void ff_MPV_encode_init_x86(MpegEncContext *s); void ff_MPV_common_init_x86(MpegEncContext *s); -void ff_MPV_common_init_axp(MpegEncContext *s); void ff_MPV_common_init_arm(MpegEncContext *s); void ff_MPV_common_init_bfin(MpegEncContext *s); void ff_MPV_common_init_ppc(MpegEncContext *s); diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c index f844850..c76a14b 100644 --- a/libavcodec/msmpeg4.c +++ b/libavcodec/msmpeg4.c @@ -242,10 +242,7 @@ int ff_msmpeg4_pred_dc(MpegEncContext *s, int n, : "%eax", "%edx" ); #else - /* #elif ARCH_ALPHA */ - /* Divisions are extremely costly on Alpha; optimize the most - common case. But they are costly everywhere... - */ + /* Divisions are costly everywhere; optimize the most common case. */ if (scale == 8) { a = (a + (8 >> 1)) / 8; b = (b + (8 >> 1)) / 8; diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h index 853aa62..99105e3 100644 --- a/libavcodec/options_table.h +++ b/libavcodec/options_table.h @@ -193,7 +193,9 @@ static const AVOption avcodec_options[] = { {"simplearmv5te", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"}, {"simplearmv6", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"}, {"simpleneon", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"}, +#if FF_API_ARCH_ALPHA {"simplealpha", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEALPHA }, INT_MIN, INT_MAX, V|E|D, "idct"}, +#endif {"ipp", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_IPP }, INT_MIN, INT_MAX, V|E|D, "idct"}, {"xvidmmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVIDMMX }, INT_MIN, INT_MAX, V|E|D, "idct"}, {"faani", "floating point AAN IDCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"}, diff --git a/libavcodec/version.h b/libavcodec/version.h index b488558..c36efce 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -94,5 +94,8 @@ #ifndef FF_API_CODEC_PKT #define FF_API_CODEC_PKT (LIBAVCODEC_VERSION_MAJOR < 56) #endif +#ifndef FF_API_ARCH_ALPHA +#define FF_API_ARCH_ALPHA (LIBAVCODEC_VERSION_MAJOR < 56) +#endif #endif /* AVCODEC_VERSION_H */ -- 2.7.4