http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211j/DDI0211J_arm1136_r1p5_trm.pdf
Optimization guide for Intel XScale (used in Sharp Zaurus PDA):
http://download.intel.com/design/intelxscale/27347302.pdf
-Intel Wireless MMX2 Coprocessor: Programmers Reference Manual
+Intel Wireless MMX 2 Coprocessor: Programmers Reference Manual
http://download.intel.com/design/intelxscale/31451001.pdf
PowerPC-specific:
Horizontal scaler
There are several horizontal scalers. A special case worth mentioning is
- the fast bilinear scaler that is made of runtime-generated MMX2 code
+ the fast bilinear scaler that is made of runtime-generated MMXEXT code
using specially tuned pshufw instructions.
The remaining scalers are specially-tuned for various filter lengths.
They scale 8-bit unsigned planar data to 16-bit signed planar data.
/*
- * DSP utils : average functions are compiled twice for 3dnow/mmx2
+ * DSP utils : average functions are compiled twice for 3dnow/mmxext
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer
*
#undef OP_AVG
/***********************************/
-/* MMX2 specific */
+/* MMXEXT specific */
#define DEF(x) x ## _mmx2
-/* Introduced only in MMX2 set */
+/* Introduced only in MMXEXT set */
#define PAVGB "pavgb"
#define OP_AVG PAVGB
/*
* RGB15->RGB16 original by Strepto/Astral
* ported to gcc & bugfixed : A'rpi
- * MMX2, 3DNOW optimization by Nick Kurshev
+ * MMXEXT, 3DNOW optimization by Nick Kurshev
* 32-bit C version, and and&add trick by Michael Niedermayer
*/
/*
* original by Strepto/Astral
* ported to gcc & bugfixed: A'rpi
- * MMX2, 3DNOW optimization by Nick Kurshev
+ * MMXEXT, 3DNOW optimization by Nick Kurshev
* 32-bit C version, and and&add trick by Michael Niedermayer
*/
static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
int vChrFilterSize; ///< Vertical filter size for chroma pixels.
//@}
- int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes.
- int chrMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for chroma planes.
- uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for luma/alpha planes.
- uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code for chroma planes.
+ int lumMmx2FilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
+ int chrMmx2FilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
+ uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
+ uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
int canMMX2BeUsed;
int xpos, i;
// create an optimized horizontal scaling routine
- /* This scaler is made of runtime-generated MMX2 code using specially tuned
+ /* This scaler is made of runtime-generated MMXEXT code using specially tuned
* pshufw instructions. For every four output pixels, if four input pixels
* are enough for the fast bilinear scaling, then a chunk of fragmentB is
* used. If five input pixels are needed, then a chunk of fragmentA is used.
&& (flags & SWS_FAST_BILINEAR)) {
if (flags & SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO,
- "output width is not a multiple of 32 -> no MMX2 scaler\n");
+ "output width is not a multiple of 32 -> no MMXEXT scaler\n");
}
if (usesHFilter)
c->canMMX2BeUsed = 0;
sws_format_name(dstFormat));
if (INLINE_MMXEXT(cpu_flags))
- av_log(c, AV_LOG_INFO, "using MMX2\n");
+ av_log(c, AV_LOG_INFO, "using MMXEXT\n");
else if (INLINE_AMD3DNOW(cpu_flags))
av_log(c, AV_LOG_INFO, "using 3DNOW\n");
else if (INLINE_MMX(cpu_flags))
%else ; %1 == 9/10
%if cpuflag(sse4)
packusdw m2, m1
-%else ; mmx2/sse2
+%else ; mmxext/sse2
packssdw m2, m1
pmaxsw m2, m6
-%endif ; mmx2/sse2/sse4/avx
+%endif ; mmxext/sse2/sse4/avx
pminsw m2, [yuv2yuvX_%1_upper]
%endif ; %1 == 9/10/16
mova [dstq+r5*2], m2
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
-//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
+// Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one.
#define COMPILE_TEMPLATE_MMXEXT 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX
#include "rgb2rgb_template.c"
-//MMX2 versions
+// MMXEXT versions
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
/*
RGB15->RGB16 original by Strepto/Astral
ported to gcc & bugfixed : A'rpi
- MMX2, 3DNOW optimization by Nick Kurshev
+ MMXEXT, 3DNOW optimization by Nick Kurshev
32-bit C version, and and&add trick by Michael Niedermayer
*/
/*
original by Strepto/Astral
ported to gcc & bugfixed: A'rpi
- MMX2, 3DNOW optimization by Nick Kurshev
+ MMXEXT, 3DNOW optimization by Nick Kurshev
32-bit C version, and and&add trick by Michael Niedermayer
*/
static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
#include "swscale_template.c"
#endif
-//MMX2 versions
+// MMXEXT versions
#if HAVE_MMXEXT_INLINE
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
}
if (c->srcBpc == 8 && c->dstBpc <= 10) {
- // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
+ // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
#if COMPILE_TEMPLATE_MMXEXT
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
{
*
* Copyright (C) 2009 Konstantin Shishkov
*
- * MMX/MMX2 template stuff (needed for fast movntq support),
+ * MMX/MMXEXT template stuff (needed for fast movntq support),
* 1,4,8bpp support and context / deglobalize stuff
* by Michael Niedermayer (michaelni@gmx.at)
*
#include "yuv2rgb_template.c"
#endif /* HAVE_MMX_INLINE */
-//MMX2 versions
+// MMXEXT versions
#if HAVE_MMXEXT_INLINE
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT