From 3611e7a3090f06ef3b9f5060334070e122e1b406 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 23 Jul 2010 21:46:30 +0000 Subject: [PATCH] Inline asm for VP56 arith coder This is a lot more reliable to get cmov rather than trying to trick gcc into generating it, useful since it's 2% faster overall. Patch by Eli Friedman Originally committed as revision 24471 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/vp56.h | 12 +++++----- libavcodec/x86/vp56_arith.h | 54 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 libavcodec/x86/vp56_arith.h diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h index ad07a49..1eacdc2 100644 --- a/libavcodec/vp56.h +++ b/libavcodec/vp56.h @@ -208,23 +208,25 @@ static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c) return code_word; } +#if ARCH_X86 +#include "x86/vp56_arith.h" +#endif + +#ifndef vp56_rac_get_prob +#define vp56_rac_get_prob vp56_rac_get_prob static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) { - /* Don't put c->high in a local variable; if we do that, gcc gets - * the stupids and turns the code below into a branch again. */ unsigned int code_word = vp56_rac_renorm(c); unsigned int low = 1 + (((c->high - 1) * prob) >> 8); unsigned int low_shift = low << 8; int bit = code_word >= low_shift; - /* Incantation to convince GCC to turn these into conditional moves - * instead of branches -- faster, as this branch is basically - * unpredictable. */ c->high = bit ? c->high - low : low; c->code_word = bit ? code_word - low_shift : code_word; return bit; } +#endif // branchy variant, to be used where there's a branch based on the bit decoded static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) diff --git a/libavcodec/x86/vp56_arith.h b/libavcodec/x86/vp56_arith.h new file mode 100644 index 0000000..d05dcb6 --- /dev/null +++ b/libavcodec/x86/vp56_arith.h @@ -0,0 +1,54 @@ +/** + * VP5 and VP6 compatible video decoder (arith decoder) + * + * Copyright (C) 2006 Aurelien Jacobs + * Copyright (C) 2010 Eli Friedman + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_VP56_ARITH_H +#define AVCODEC_X86_VP56_ARITH_H + +#if HAVE_FAST_CMOV +#define vp56_rac_get_prob vp56_rac_get_prob +static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) +{ + unsigned int code_word = vp56_rac_renorm(c); + unsigned int high = c->high; + unsigned int low = 1 + (((high - 1) * prob) >> 8); + unsigned int low_shift = low << 8; + int bit = 0; + + __asm__( + "subl %4, %1 \n\t" + "subl %3, %2 \n\t" + "leal (%2, %3), %3 \n\t" + "setae %b0 \n\t" + "cmovb %4, %1 \n\t" + "cmovb %3, %2 \n\t" + : "+q"(bit), "+r"(high), "+r"(code_word), "+r"(low_shift) + : "r"(low) + ); + + c->high = high; + c->code_word = code_word; + return bit; +} +#endif + +#endif /* AVCODEC_X86_VP56_ARITH_H */ -- 2.7.4