and does slow the mc2 chroma put down, avg interrestingly seems unaffected speedwise on duron
this of course should be rather done in a way which doesnt slow it down but its better a few %
slower but correct then incorrect
Originally committed as revision 8093 to svn://svn.ffmpeg.org/ffmpeg/trunk
/* writes garbage to the right of dst.
* ok because partitions are processed from left to right. */
H264_CHROMA_OP4((%0), %%mm1, %%mm3)
- "movd %%mm1, (%0)\n\t"
+ "movd %%mm1, %%esi\n\t"
+ "movw %%si, (%0)\n\t"
"add %4, %0\n\t"
"sub $1, %2\n\t"
"jnz 1b\n\t"
- : "+r" (dst), "+r"(src), "+r"(h) : "m" (ff_pw_32), "r"(stride));
+ : "+r" (dst), "+r"(src), "+r"(h)
+ : "m" (ff_pw_32), "r"(stride)
+ : "%esi");
}
#endif