void ff_mmxext_idct(DCTELEM *block);
/* pixel operations */
-static const unsigned long long int mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101LL;
-static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001LL;
-static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002LL;
+static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
+static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
+static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL;
//static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 };
//static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 };
#ifndef PIC
#define MOVQ_WONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wone))
#define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo))
+#define MOVQ_BONE(regd) "movq "MANGLE(mm_bone)", "#regd" \n\t"
#else
// for shared library it's better to use this way for accessing constants
// pcmpeqd -> -1
"pcmpeqd %%" #regd ", %%" #regd " \n\t" \
"psrlw $15, %%" #regd " \n\t" \
"psllw $1, %%" #regd ::)
+
+#define MOVQ_BONE(regd) \
+ "pcmpeqd " #regd ", " #regd " \n\t" \
+ "psrlw $15, " #regd " \n\t"\
+ "packuswb " #regd ", " #regd " \n\t"
#endif
+
/***********************************/
/* 3Dnow specific */
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
* mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
*/
-
+
static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
{
__asm __volatile(
"xorl %%eax, %%eax \n\t"
- "movq "MANGLE(mm_bone)", %%mm7 \n\t"
+ MOVQ_BONE(%%mm7)
".balign 16 \n\t"
"1: \n\t"
"movq (%1, %%eax), %%mm0 \n\t"
static void DEF(put_no_rnd_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
- "movq "MANGLE(mm_bone)", %%mm7 \n\t"
+ MOVQ_BONE(%%mm7)
"xorl %%eax, %%eax \n\t"
"movq (%1), %%mm0 \n\t"
".balign 16 \n\t"
static void DEF(avg_pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
__asm __volatile(
- "movq "MANGLE(mm_bone)", %%mm7 \n\t"
+ MOVQ_BONE(%%mm7)
"xorl %%eax, %%eax \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"