1 #include "evas_common.h"
3 #if defined BUILD_MMX || defined BUILD_SSE
9 static void evas_common_copy_pixels_c (DATA32 *src, DATA32 *dst, int len);
10 static void evas_common_copy_pixels_mmx (DATA32 *src, DATA32 *dst, int len);
11 static void evas_common_copy_pixels_mmx2 (DATA32 *src, DATA32 *dst, int len);
12 static void evas_common_copy_pixels_sse/*NB*/ (DATA32 *src, DATA32 *dst, int len);
15 static void evas_common_copy_pixels_neon (DATA32 *src, DATA32 *dst, int len);
16 static void evas_common_copy_pixels_rev_neon (DATA32 *src, DATA32 *dst, int len);
19 static void evas_common_copy_pixels_rev_c (DATA32 *src, DATA32 *dst, int len);
20 static void evas_common_copy_pixels_rev_mmx (DATA32 *src, DATA32 *dst, int len);
21 static void evas_common_copy_pixels_rev_sse/*NB*/ (DATA32 *src, DATA32 *dst, int len);
23 static void evas_common_copy_rev_pixels_c (DATA32 *src, DATA32 *dst, int len);
27 evas_common_blit_init(void)
32 evas_common_blit_rectangle(const RGBA_Image *src, RGBA_Image *dst, int src_x, int src_y, int w, int h, int dst_x, int dst_y)
36 DATA32 *src_ptr, *dst_ptr;
40 if (src_x + w > (int)src->cache_entry.w) w = src->cache_entry.w - src_x;
51 if (src_y + h > (int)src->cache_entry.h) h = src->cache_entry.h - src_y;
62 if (dst_x + w > (int)dst->cache_entry.w) w = dst->cache_entry.w - dst_x;
73 if (dst_y + h > (int)dst->cache_entry.h) h = dst->cache_entry.h - dst_y;
85 /* src after dst - go forward */
86 if (((src_y * src->cache_entry.w) + src_x) > ((dst_y * dst->cache_entry.w) + dst_x))
88 func = evas_common_draw_func_copy_get(w, 0);
89 for (y = 0; y < h; y++)
91 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
92 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
93 func(src_ptr, dst_ptr, w);
99 func = evas_common_draw_func_copy_get(w, 1);
100 for (y = h - 1; y >= 0; y--)
102 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
103 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
104 func(src_ptr, dst_ptr, w);
110 func = evas_common_draw_func_copy_get(w, 0);
111 for (y = 0; y < h; y++)
113 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
114 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
115 func(src_ptr, dst_ptr, w);
120 /****************************************************************************/
123 evas_common_copy_rev_pixels_c(DATA32 *src, DATA32 *dst, int len)
125 DATA32 *dst_end = dst + len;
128 while (dst < dst_end) *dst++ = *src--;
134 evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len)
136 uint32_t *tmp = (void *)37;
137 #define AP "evas_common_copy_rev_pixels_neon_"
139 // Can we do 32 byte?
140 "andS %[tmp], %[d], $0x1f \n\t"
141 "beq "AP"quadstart \n\t"
143 // Can we do at least 16 byte?
144 "andS %[tmp], %[d], $0x4 \n\t"
145 "beq "AP"dualstart \n\t"
150 "vld1.32 d0[0], [%[s]] \n\t"
151 "vst1.32 d0[0], [%[d]]! \n\t"
155 "sub %[tmp], %[e], %[d] \n\t"
156 "cmp %[tmp], #31 \n\t"
157 "blt "AP"loopout \n\t"
159 "andS %[tmp], %[d], $0x1f \n\t"
160 "beq "AP"quadstart \n\t"
164 "vldm %[s], {d0} \n\t"
165 "vrev64.32 d1, d0 \n\t"
166 "vstm %[d]!, {d1} \n\t"
168 "andS %[tmp], %[d], $0x1f \n\t"
169 "bne "AP"dualloop \n\t"
173 "sub %[tmp], %[e], %[d] \n\t"
174 "cmp %[tmp], #32 \n\t"
175 "blt "AP"loopout \n\t"
177 "sub %[tmp],%[e],#32 \n\t"
181 "vldm %[s], {d0,d1,d2,d3} \n\t"
183 "vrev64.32 d7,d0 \n\t"
184 "vrev64.32 d6,d1 \n\t"
185 "vrev64.32 d5,d2 \n\t"
186 "vrev64.32 d4,d3 \n\t"
188 "vstm %[d]!, {d4,d5,d6,d7} \n\t"
190 "cmp %[tmp], %[d] \n\t"
191 "bhi "AP"quadloop \n\t"
195 "cmp %[d], %[e] \n\t"
197 "sub %[tmp],%[e], %[d] \n\t"
198 "cmp %[tmp],$0x04 \n\t"
199 "beq "AP"singleloop2 \n\t"
202 "sub %[tmp],%[e],$0x7 \n\t"
203 AP "dualloop2int: \n\t"
205 "vldm %[s], {d0} \n\t"
206 "vrev64.32 d1,d0 \n\t"
207 "vstm %[d]!, {d1} \n\t"
209 "cmp %[tmp], %[d] \n\t"
210 "bhi "AP"dualloop2int \n\t"
213 "cmp %[e], %[d] \n\t"
216 AP "singleloop2: \n\t"
218 "vld1.32 d0[0], [%[s]] \n\t"
219 "vst1.32 d0[0], [%[d]] \n\t"
225 : [s] "r" (src + len), [e] "r" (dst + len), [d] "r" (dst),[tmp] "r" (tmp)
227 : "q0","q1","q2","q3","0","1","memory"
237 evas_common_copy_pixels_c(DATA32 *src, DATA32 *dst, int len)
239 DATA32 *dst_end = dst + len;
241 while (dst < dst_end) *dst++ = *src++;
247 evas_common_copy_pixels_mmx(DATA32 *src, DATA32 *dst, int len)
248 { // XXX cppcheck: [./src/lib/engines/common/evas_blit_main.c:248]: (error) Invalid number of character ({). Can't process file.
249 // so... wtf? what's wrong with this { ? or anytrhing surrounding it?
250 DATA32 *dst_end, *dst_end_pre;
255 src_align = (intptr_t)src & 0x3f; /* 64 byte alignment */
256 dst_align = (intptr_t)dst & 0x3f; /* 64 byte alignment */
258 if ((src_align != dst_align) ||
259 ((src_align & 0x3) != 0))
262 evas_common_copy_pixels_c(src, dst, len);
267 while ((src_align > 0) && (len > 0))
271 src_align -= sizeof(DATA32);
273 #endif /* ALIGN_FIX */
276 dst_end_pre = dst + ((len / 16) * 16);
278 while (dst < dst_end_pre)
280 MOVE_16DWORDS_MMX(src, dst);
284 while (dst < dst_end) *dst++ = *src++;
290 evas_common_copy_pixels_mmx2(DATA32 *src, DATA32 *dst, int len)
292 DATA32 *dst_end, *dst_end_pre;
297 src_align = (intptr_t)src & 0x3f; /* 64 byte alignment */
298 dst_align = (intptr_t)dst & 0x3f; /* 64 byte alignment */
300 if ((src_align != dst_align) ||
301 ((src_align & 0x3) != 0))
304 evas_common_copy_pixels_c(src, dst, len);
309 while ((src_align > 0) && (len > 0))
313 src_align -= sizeof(DATA32);
318 dst_end_pre = dst + ((len / 16) * 16);
320 while (dst < dst_end_pre)
322 MOVE_16DWORDS_MMX(src, dst);
326 while (dst < dst_end) *dst++ = *src++;
332 evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){
333 uint32_t *e,*tmp = (void *)37;
335 #define AP "evas_common_copy_pixels_neon_"
337 // Can we do 32 byte?
338 "andS %[tmp], %[d], $0x1f \n\t"
339 "beq "AP"quadstart \n\t"
341 // Can we do at least 16 byte?
342 "andS %[tmp], %[d], $0x4 \n\t"
343 "beq "AP"dualstart \n\t"
347 "vld1.32 d0[0], [%[s]]! \n\t"
348 "vst1.32 d0[0], [%[d]]! \n\t"
352 "sub %[tmp], %[e], %[d] \n\t"
353 "cmp %[tmp], #31 \n\t"
354 "blt "AP"loopout \n\t"
356 "andS %[tmp], %[d], $0x1f \n\t"
357 "beq "AP"quadstart \n\t"
360 "vldm %[s]!, {d0} \n\t"
361 "vstm %[d]!, {d0} \n\t"
363 "andS %[tmp], %[d], $0x1f \n\t"
364 "bne "AP"dualloop \n\t"
368 "sub %[tmp], %[e], %[d] \n\t"
369 "cmp %[tmp], #64 \n\t"
370 "blt "AP"loopout \n\t"
372 "sub %[tmp],%[e],#63 \n\t"
375 "vldm %[s]!, {d0,d1,d2,d3} \n\t"
376 "vldm %[s]!, {d4,d5,d6,d7} \n\t"
377 "vstm %[d]!, {d0,d1,d2,d3} \n\t"
378 "vstm %[d]!, {d4,d5,d6,d7} \n\t"
380 "cmp %[tmp], %[d] \n\t"
381 "bhi "AP"quadloop \n\t"
385 "cmp %[d], %[e] \n\t"
387 "sub %[tmp],%[e], %[d] \n\t"
388 "cmp %[tmp],$0x04 \n\t"
389 "beq "AP"singleloop2 \n\t"
392 "sub %[tmp],%[e],$0x7 \n\t"
393 AP "dualloop2int: \n\t"
394 "vldm %[s]!, {d0} \n\t"
395 "vstm %[d]!, {d0} \n\t"
397 "cmp %[tmp], %[d] \n\t"
398 "bhi "AP"dualloop2int \n\t"
401 "cmp %[e], %[d] \n\t"
404 AP "singleloop2: \n\t"
405 "vld1.32 d0[0], [%[s]] \n\t"
406 "vst1.32 d0[0], [%[d]] \n\t"
412 : [s] "r" (src), [e] "r" (e), [d] "r" (dst),[tmp] "r" (tmp)
414 : "q0","q1","q2","q3","memory"
419 #endif /* BUILD_NEON */
423 evas_common_copy_pixels_sse(DATA32 *src, DATA32 *dst, int len)
425 DATA32 *src_ptr, *dst_ptr, *dst_end_ptr;
427 dst_end_ptr = dst + len;
431 while (dst_ptr < dst_end_ptr)
433 MOVE_16DWORDS_MMX2(src_ptr, dst_ptr);
437 dst_end_ptr = dst + len;
438 while (dst_ptr < dst_end_ptr)
449 src_align = (int)src & 0x3f; /* 64 byte alignment */
450 dst_align = (int)dst & 0x3f; /* 64 byte alignment */
452 if ((src_align != dst_align) ||
453 ((src_align & 0x3) != 0))
456 evas_common_copy_pixels_c(src, dst, len);
461 while ((src_align > 0) && (len > 0))
467 src_align -= sizeof(DATA32);
469 #endif /* ALIGN_FIX */
473 dst_end_ptr = dst + len;
474 dst_end_ptr_pre = dst + ((len / 16) * 16);
476 while (dst_ptr < dst_end_ptr_pre)
478 prefetch(&src_ptr[16]);
479 MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
483 while (dst_ptr < dst_end_ptr)
493 /****************************************************************************/
497 evas_common_copy_pixels_rev_c(DATA32 *src, DATA32 *dst, int len)
505 while (dst > dst_end) *dst-- = *src--;
511 evas_common_copy_pixels_rev_mmx(DATA32 *src, DATA32 *dst, int len)
513 DATA32 *dst_end, *dst_end_pre;
517 src = src + len - 16;
519 dst_end_pre = dst + len - ((len / 16) * 16);
520 dst = dst + len - 16;
522 while (dst >= dst_end_pre)
524 MOVE_16DWORDS_MMX(src, dst);
530 while (dst >= dst_end)
538 while (dst > dst_end)
546 evas_common_copy_pixels_rev_sse(DATA32 *src, DATA32 *dst, int len)
548 DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre;
550 src_ptr = src + len - 16;
551 dst_ptr = dst + len - 16;
553 dst_end_ptr_pre = dst + len - ((len / 16) * 16);
557 while (dst_ptr >= dst_end_ptr_pre)
559 prefetch(&src_ptr[-16]);
560 MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
566 while (dst_ptr >= dst_end_ptr)
575 src_ptr = src + len - 1;
576 dst_ptr = dst + len - 1;
577 while (dst_ptr >= dst_end_ptr)
589 evas_common_draw_func_copy_get(int pixels, int reverse)
592 return evas_common_copy_rev_pixels_c;
596 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 256 * 256))
597 return evas_common_copy_pixels_rev_sse;
603 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
604 return evas_common_copy_pixels_rev_mmx;
607 # if defined(BUILD_SSE) || defined(BUILD_MMX)
610 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
611 return evas_common_copy_pixels_rev_neon;
615 # if defined(BUILD_MMX) || defined(BUILD_NEON)
618 return evas_common_copy_pixels_rev_c;
627 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
629 return evas_common_copy_pixels_mmx2;
636 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 64 * 64))
638 return evas_common_copy_pixels_sse;
645 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
647 return evas_common_copy_pixels_neon;
654 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
656 return evas_common_copy_pixels_mmx;
662 return evas_common_copy_pixels_c;
668 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 256 * 256))
669 return evas_common_copy_pixels_sse;
676 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
678 return evas_common_copy_pixels_mmx2;
680 else if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
682 return evas_common_copy_pixels_mmx;
688 return evas_common_copy_pixels_c;
694 return evas_common_copy_pixels_c;