"pmullw %3, %%mm6 \n\t"\
"add %2, %0 \n\t"\
"punpcklbw %%mm7, "#F" \n\t"\
+ "paddw %4, "#A" \n\t"\
"paddw "#F", "#A" \n\t"\
"paddw "#A", %%mm6 \n\t"\
"movq %%mm6, "#OF"(%1) \n\t"
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
\
: "+a"(src)\
- : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
+ : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
: "memory"\
);\
tmp += 4;\
}\
tmp -= 3*4;\
asm volatile(\
- "movq %4, %%mm6 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"paddw 10(%0), %%mm0 \n\t"\
"psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
"paddsw %%mm2, %%mm0 \n\t"\
"psraw $2, %%mm0 \n\t"/*((a-b)/4-b+c)/4 */\
- "paddw %%mm6, %%mm2 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"/*(a-5*b+20*c)/16 +32 */\
+ "paddw %%mm2, %%mm0 \n\t"/*(a-5*b+20*c)/16 */\
"psraw $6, %%mm0 \n\t"\
"packuswb %%mm0, %%mm0 \n\t"\
OP(%%mm0, (%1),%%mm7, d)\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(tmp), "+c"(dst), "+m"(h)\
- : "S"((long)dstStride), "m"(ff_pw_32)\
+ : "S"((long)dstStride)\
: "memory"\
);\
}\
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
: "+a"(src)\
- : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
+ : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
: "memory"\
);\
if(size==16){\
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
: "+a"(src)\
- : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
+ : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
: "memory"\
);\
}\
do{\
h = size;\
asm volatile(\
- "movq %4, %%mm6 \n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 8(%0), %%mm3 \n\t"\
"paddsw %%mm5, %%mm3 \n\t"\
"psraw $2, %%mm0 \n\t"\
"psraw $2, %%mm3 \n\t"\
- "paddw %%mm6, %%mm2 \n\t"\
- "paddw %%mm6, %%mm5 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
"paddw %%mm5, %%mm3 \n\t"\
"psraw $6, %%mm0 \n\t"\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(tmp), "+c"(dst), "+m"(h)\
- : "S"((long)dstStride), "m"(ff_pw_32)\
+ : "S"((long)dstStride)\
: "memory"\
);\
tmp += 8 - size*24;\
static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
{\
asm volatile(\
- "movq %5, %%mm6 \n\t"\
"movq (%1), %%mm0 \n\t"\
"movq 24(%1), %%mm1 \n\t"\
- "paddw %%mm6, %%mm0 \n\t"\
- "paddw %%mm6, %%mm1 \n\t"\
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm1 \n\t"\
"packuswb %%mm0, %%mm0 \n\t"\
"lea (%2,%4,2), %2 \n\t"\
"movq 48(%1), %%mm0 \n\t"\
"movq 72(%1), %%mm1 \n\t"\
- "paddw %%mm6, %%mm0 \n\t"\
- "paddw %%mm6, %%mm1 \n\t"\
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm1 \n\t"\
"packuswb %%mm0, %%mm0 \n\t"\
OP(%%mm0, (%2), %%mm4, d)\
OP(%%mm1, (%2,%4), %%mm5, d)\
:"+a"(src8), "+c"(src16), "+d"(dst)\
- :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\
+ :"S"((long)src8Stride), "D"((long)dstStride)\
:"memory");\
}\
static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
{\
- asm volatile(\
- "movq %0, %%mm6 \n\t"\
- ::"m"(ff_pw_16)\
- );\
while(h--){\
asm volatile(\
"movq (%1), %%mm0 \n\t"\
"movq 8(%1), %%mm1 \n\t"\
- "paddw %%mm6, %%mm0 \n\t"\
- "paddw %%mm6, %%mm1 \n\t"\
"psraw $5, %%mm0 \n\t"\
"psraw $5, %%mm1 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\