#warning Everything in this header is deprecated, use plain __asm__()! New code using this header will be rejected.
-/*
- * The type of an value that fits in an MMX register (note that long
- * long constant values MUST be suffixed by LL and unsigned long long
- * values by ULL, lest they be truncated by the compiler)
- */
-
-typedef union {
- long long q; /* Quadword (64-bit) value */
- unsigned long long uq; /* Unsigned Quadword */
- int d[2]; /* 2 Doubleword (32-bit) values */
- unsigned int ud[2]; /* 2 Unsigned Doubleword */
- short w[4]; /* 4 Word (16-bit) values */
- unsigned short uw[4]; /* 4 Unsigned Word */
- char b[8]; /* 8 Byte (8-bit) values */
- unsigned char ub[8]; /* 8 Unsigned Byte */
- float s[2]; /* Single-precision (32-bit) value */
-} mmx_t; /* On an 8-byte (64-bit) boundary */
-
#define mmx_i2r(op,imm,reg) \
__asm__ volatile (#op " %0, %%" #reg \
#ifdef HAVE_MMX
#include "i386/mmx.h"
+#include "i386/dsputil_mmx.h"
#endif
#define xglue(x, y) x ## y
#else
{
- mmx_t rounder;
- rounder.uw[0]=4;
- rounder.uw[1]=4;
- rounder.uw[2]=4;
- rounder.uw[3]=4;
pxor_r2r(mm7,mm7);
- movq_m2r(rounder,mm6);
+ movq_m2r(ff_pw_4,mm6);
}
for (;size > 3; size-=4) {
DEINT_LINE_LUM
#else
{
- mmx_t rounder;
- rounder.uw[0]=4;
- rounder.uw[1]=4;
- rounder.uw[2]=4;
- rounder.uw[3]=4;
pxor_r2r(mm7,mm7);
- movq_m2r(rounder,mm6);
+ movq_m2r(ff_pw_4,mm6);
}
for (;size > 3; size-=4) {
DEINT_INPLACE_LINE_LUM
int src_pos, phase;
const uint8_t *s;
int16_t *filter;
- mmx_t tmp;
+ uint64_t tmp;
src_pos = src_start;
pxor_r2r(mm7, mm7);
packuswb_r2r(mm7, mm3);
packuswb_r2r(mm7, mm2);
movq_r2m(mm0, tmp);
- dst[0] = tmp.ub[0];
+ dst[0] = tmp & 0xFF;
movq_r2m(mm1, tmp);
- dst[1] = tmp.ub[0];
+ dst[1] = tmp & 0xFF;
movq_r2m(mm2, tmp);
- dst[2] = tmp.ub[0];
+ dst[2] = tmp & 0xFF;
movq_r2m(mm3, tmp);
- dst[3] = tmp.ub[0];
+ dst[3] = tmp & 0xFF;
dst += 4;
dst_width -= 4;
}
FILTER4(mm0);
packuswb_r2r(mm7, mm0);
movq_r2m(mm0, tmp);
- dst[0] = tmp.ub[0];
+ dst[0] = tmp & 0xFF;
dst++;
dst_width--;
}
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
int wrap, int16_t *filter)
{
- int sum, i, v;
+ int sum, i;
const uint8_t *s;
- mmx_t tmp;
- mmx_t coefs[4];
+ uint64_t tmp;
+ uint64_t coefs[4];
for(i=0;i<4;i++) {
- v = filter[i];
- coefs[i].uw[0] = v;
- coefs[i].uw[1] = v;
- coefs[i].uw[2] = v;
- coefs[i].uw[3] = v;
+ tmp = filter[i];
+ coefs[i] = (tmp<<48) + (tmp<<32) + (tmp<<16) + tmp;
}
pxor_r2r(mm7, mm7);
packuswb_r2r(mm7, mm0);
movq_r2m(mm0, tmp);
- *(uint32_t *)dst = tmp.ud[0];
+ *(uint32_t *)dst = tmp & 0xFFFFFFFF;
dst += 4;
s += 4;
dst_width -= 4;