hadamard8_16_wrapper %1, 3
%elif cpuflag(mmx)
ALIGN 16
-; int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2,
-; int stride, int h)
+; int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1,
+; uint8_t *src2, int stride, int h)
; r0 = void *s = unused, int h = unused (always 8)
; note how r1, r2 and r3 are not clobbered in this function, so 16x16
; can simply call this 2x2x (and that's why we access rsp+gprsize
HADAMARD8_DIFF 9
INIT_XMM sse2
-; sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
+; int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+; int line_size, int h);
cglobal sse16, 5, 5, 8
shr r4d, 1
pxor m0, m0 ; mm0 = 0
RET
INIT_MMX mmx
-; get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
+; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
cglobal get_pixels, 3,4
movsxdifnidn r2, r2d
add r0, 128
RET
INIT_MMX mmx
-; diff_pixels_mmx(int16_t *block, const uint8_t *s1, const unint8_t *s2, stride)
+; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+; int stride);
cglobal diff_pixels, 4,5
movsxdifnidn r3, r3d
pxor m7, m7
REP_RET
INIT_MMX mmx
-; pix_sum16_mmx(uint8_t * pix, int line_size)
+; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
cglobal pix_sum16, 2, 3
movsxdifnidn r1, r1d
mov r2, r1
RET
INIT_MMX mmx
-; pix_norm1_mmx(uint8_t *pix, int line_size)
+; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
cglobal pix_norm1, 2, 4
movsxdifnidn r1, r1d
mov r2, 16