const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
pix_type *d = &(dst[dst_stride * y]); \
const pix_type *ptr1 = s; \
- const pix_type *ptr2 = ptr1 + src_stride; \
- const pix_type *ptr3 = ptr2 + src_stride; \
- const pix_type *ptr4 = ptr3 + src_stride; \
- for(x = 0; x < w; x+=4) { \
+ const pix_type *ptr2 = ptr1 - src_stride; \
+ const pix_type *ptr3 = ptr2 - src_stride; \
+ const pix_type *ptr4 = ptr3 - src_stride; \
+ for(x = 0; x < w; x += 4) { \
pix_type s_array[4] = { *ptr1, *ptr2, *ptr3, *ptr4 }; \
vst1q_u32(d, vld1q_u32(s_array)); \
d += 4; \
- ptr1 += klght; \
- ptr2 += klght; \
- ptr3 += klght; \
- ptr4 += klght; \
+ ptr1 -= klght; \
+ ptr2 -= klght; \
+ ptr3 -= klght; \
+ ptr4 -= klght; \
} \
} \
} \
pix_type *d = &(dst[dst_stride * y]); \
for (x = 0; x < w; x++) { \
*d++ = *s; \
- s += src_stride; \
+ s -= src_stride; \
} \
} \
} \