1 #include "evas_common.h"
3 #if defined BUILD_MMX || defined BUILD_SSE
9 static void evas_common_copy_pixels_c (DATA32 *src, DATA32 *dst, int len);
11 static void evas_common_copy_pixels_mmx (DATA32 *src, DATA32 *dst, int len);
12 static void evas_common_copy_pixels_mmx2 (DATA32 *src, DATA32 *dst, int len);
15 static void evas_common_copy_pixels_sse/*NB*/ (DATA32 *src, DATA32 *dst, int len);
19 static void evas_common_copy_pixels_neon (DATA32 *src, DATA32 *dst, int len);
20 static void evas_common_copy_pixels_rev_neon (DATA32 *src, DATA32 *dst, int len);
23 static void evas_common_copy_pixels_rev_c (DATA32 *src, DATA32 *dst, int len);
25 static void evas_common_copy_pixels_rev_mmx (DATA32 *src, DATA32 *dst, int len);
28 static void evas_common_copy_pixels_rev_sse/*NB*/ (DATA32 *src, DATA32 *dst, int len);
31 static void evas_common_copy_rev_pixels_c (DATA32 *src, DATA32 *dst, int len);
35 evas_common_blit_init(void)
40 evas_common_blit_rectangle(const RGBA_Image *src, RGBA_Image *dst, int src_x, int src_y, int w, int h, int dst_x, int dst_y)
44 DATA32 *src_ptr, *dst_ptr;
48 if (src_x + w > (int)src->cache_entry.w) w = src->cache_entry.w - src_x;
59 if (src_y + h > (int)src->cache_entry.h) h = src->cache_entry.h - src_y;
70 if (dst_x + w > (int)dst->cache_entry.w) w = dst->cache_entry.w - dst_x;
81 if (dst_y + h > (int)dst->cache_entry.h) h = dst->cache_entry.h - dst_y;
93 /* src after dst - go forward */
94 if (((src_y * src->cache_entry.w) + src_x) > ((dst_y * dst->cache_entry.w) + dst_x))
96 func = evas_common_draw_func_copy_get(w, 0);
97 for (y = 0; y < h; y++)
99 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
100 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
101 func(src_ptr, dst_ptr, w);
107 func = evas_common_draw_func_copy_get(w, 1);
108 for (y = h - 1; y >= 0; y--)
110 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
111 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
112 func(src_ptr, dst_ptr, w);
118 func = evas_common_draw_func_copy_get(w, 0);
119 for (y = 0; y < h; y++)
121 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
122 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
123 func(src_ptr, dst_ptr, w);
128 /****************************************************************************/
131 evas_common_copy_rev_pixels_c(DATA32 *src, DATA32 *dst, int len)
133 DATA32 *dst_end = dst + len;
136 while (dst < dst_end) *dst++ = *src--;
142 evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len)
144 uint32_t *tmp = (void *)37;
145 #define AP "evas_common_copy_rev_pixels_neon_"
147 // Can we do 32 byte?
148 "andS %[tmp], %[d], $0x1f \n\t"
149 "beq "AP"quadstart \n\t"
151 // Can we do at least 16 byte?
152 "andS %[tmp], %[d], $0x4 \n\t"
153 "beq "AP"dualstart \n\t"
158 "vld1.32 d0[0], [%[s]] \n\t"
159 "vst1.32 d0[0], [%[d]]! \n\t"
163 "sub %[tmp], %[e], %[d] \n\t"
164 "cmp %[tmp], #31 \n\t"
165 "blt "AP"loopout \n\t"
167 "andS %[tmp], %[d], $0x1f \n\t"
168 "beq "AP"quadstart \n\t"
172 "vldm %[s], {d0} \n\t"
173 "vrev64.32 d1, d0 \n\t"
174 "vstm %[d]!, {d1} \n\t"
176 "andS %[tmp], %[d], $0x1f \n\t"
177 "bne "AP"dualloop \n\t"
181 "sub %[tmp], %[e], %[d] \n\t"
182 "cmp %[tmp], #32 \n\t"
183 "blt "AP"loopout \n\t"
185 "sub %[tmp],%[e],#32 \n\t"
189 "vldm %[s], {d0,d1,d2,d3} \n\t"
191 "vrev64.32 d7,d0 \n\t"
192 "vrev64.32 d6,d1 \n\t"
193 "vrev64.32 d5,d2 \n\t"
194 "vrev64.32 d4,d3 \n\t"
196 "vstm %[d]!, {d4,d5,d6,d7} \n\t"
198 "cmp %[tmp], %[d] \n\t"
199 "bhi "AP"quadloop \n\t"
203 "cmp %[d], %[e] \n\t"
205 "sub %[tmp],%[e], %[d] \n\t"
206 "cmp %[tmp],$0x04 \n\t"
207 "beq "AP"singleloop2 \n\t"
210 "sub %[tmp],%[e],$0x7 \n\t"
211 AP "dualloop2int: \n\t"
213 "vldm %[s], {d0} \n\t"
214 "vrev64.32 d1,d0 \n\t"
215 "vstm %[d]!, {d1} \n\t"
217 "cmp %[tmp], %[d] \n\t"
218 "bhi "AP"dualloop2int \n\t"
221 "cmp %[e], %[d] \n\t"
224 AP "singleloop2: \n\t"
226 "vld1.32 d0[0], [%[s]] \n\t"
227 "vst1.32 d0[0], [%[d]] \n\t"
233 : [s] "r" (src + len), [e] "r" (dst + len), [d] "r" (dst),[tmp] "r" (tmp)
235 : "q0","q1","q2","q3","0","1","memory"
245 evas_common_copy_pixels_c(DATA32 *src, DATA32 *dst, int len)
247 DATA32 *dst_end = dst + len;
249 while (dst < dst_end) *dst++ = *src++;
255 evas_common_copy_pixels_mmx(DATA32 *src, DATA32 *dst, int len)
256 { // XXX cppcheck: [./src/lib/engines/common/evas_blit_main.c:248]: (error) Invalid number of character ({). Can't process file.
257 // so... wtf? what's wrong with this { ? or anytrhing surrounding it?
258 DATA32 *dst_end, *dst_end_pre;
263 src_align = (intptr_t)src & 0x3f; /* 64 byte alignment */
264 dst_align = (intptr_t)dst & 0x3f; /* 64 byte alignment */
266 if ((src_align != dst_align) ||
267 ((src_align & 0x3) != 0))
270 evas_common_copy_pixels_c(src, dst, len);
275 while ((src_align > 0) && (len > 0))
279 src_align -= sizeof(DATA32);
281 #endif /* ALIGN_FIX */
284 dst_end_pre = dst + ((len / 16) * 16);
286 while (dst < dst_end_pre)
288 MOVE_16DWORDS_MMX(src, dst);
292 while (dst < dst_end) *dst++ = *src++;
298 evas_common_copy_pixels_mmx2(DATA32 *src, DATA32 *dst, int len)
300 DATA32 *dst_end, *dst_end_pre;
305 src_align = (intptr_t)src & 0x3f; /* 64 byte alignment */
306 dst_align = (intptr_t)dst & 0x3f; /* 64 byte alignment */
308 if ((src_align != dst_align) ||
309 ((src_align & 0x3) != 0))
312 evas_common_copy_pixels_c(src, dst, len);
317 while ((src_align > 0) && (len > 0))
321 src_align -= sizeof(DATA32);
326 dst_end_pre = dst + ((len / 16) * 16);
328 while (dst < dst_end_pre)
330 MOVE_16DWORDS_MMX(src, dst);
334 while (dst < dst_end) *dst++ = *src++;
340 evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){
341 uint32_t *e,*tmp = (void *)37;
343 #define AP "evas_common_copy_pixels_neon_"
345 // Can we do 32 byte?
346 "andS %[tmp], %[d], $0x1f \n\t"
347 "beq "AP"quadstart \n\t"
349 // Can we do at least 16 byte?
350 "andS %[tmp], %[d], $0x4 \n\t"
351 "beq "AP"dualstart \n\t"
355 "vld1.32 d0[0], [%[s]]! \n\t"
356 "vst1.32 d0[0], [%[d]]! \n\t"
360 "sub %[tmp], %[e], %[d] \n\t"
361 "cmp %[tmp], #31 \n\t"
362 "blt "AP"loopout \n\t"
364 "andS %[tmp], %[d], $0x1f \n\t"
365 "beq "AP"quadstart \n\t"
368 "vldm %[s]!, {d0} \n\t"
369 "vstm %[d]!, {d0} \n\t"
371 "andS %[tmp], %[d], $0x1f \n\t"
372 "bne "AP"dualloop \n\t"
376 "sub %[tmp], %[e], %[d] \n\t"
377 "cmp %[tmp], #64 \n\t"
378 "blt "AP"loopout \n\t"
380 "sub %[tmp],%[e],#63 \n\t"
383 "vldm %[s]!, {d0,d1,d2,d3} \n\t"
384 "vldm %[s]!, {d4,d5,d6,d7} \n\t"
385 "vstm %[d]!, {d0,d1,d2,d3} \n\t"
386 "vstm %[d]!, {d4,d5,d6,d7} \n\t"
388 "cmp %[tmp], %[d] \n\t"
389 "bhi "AP"quadloop \n\t"
393 "cmp %[d], %[e] \n\t"
395 "sub %[tmp],%[e], %[d] \n\t"
396 "cmp %[tmp],$0x04 \n\t"
397 "beq "AP"singleloop2 \n\t"
400 "sub %[tmp],%[e],$0x7 \n\t"
401 AP "dualloop2int: \n\t"
402 "vldm %[s]!, {d0} \n\t"
403 "vstm %[d]!, {d0} \n\t"
405 "cmp %[tmp], %[d] \n\t"
406 "bhi "AP"dualloop2int \n\t"
409 "cmp %[e], %[d] \n\t"
412 AP "singleloop2: \n\t"
413 "vld1.32 d0[0], [%[s]] \n\t"
414 "vst1.32 d0[0], [%[d]] \n\t"
420 : [s] "r" (src), [e] "r" (e), [d] "r" (dst),[tmp] "r" (tmp)
422 : "q0","q1","q2","q3","memory"
427 #endif /* BUILD_NEON */
431 evas_common_copy_pixels_sse(DATA32 *src, DATA32 *dst, int len)
433 DATA32 *src_ptr, *dst_ptr, *dst_end_ptr;
435 dst_end_ptr = dst + len;
439 while (dst_ptr < dst_end_ptr)
441 MOVE_16DWORDS_MMX2(src_ptr, dst_ptr);
445 dst_end_ptr = dst + len;
446 while (dst_ptr < dst_end_ptr)
457 src_align = (int)src & 0x3f; /* 64 byte alignment */
458 dst_align = (int)dst & 0x3f; /* 64 byte alignment */
460 if ((src_align != dst_align) ||
461 ((src_align & 0x3) != 0))
464 evas_common_copy_pixels_c(src, dst, len);
469 while ((src_align > 0) && (len > 0))
475 src_align -= sizeof(DATA32);
477 #endif /* ALIGN_FIX */
481 dst_end_ptr = dst + len;
482 dst_end_ptr_pre = dst + ((len / 16) * 16);
484 while (dst_ptr < dst_end_ptr_pre)
486 prefetch(&src_ptr[16]);
487 MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
491 while (dst_ptr < dst_end_ptr)
501 /****************************************************************************/
505 evas_common_copy_pixels_rev_c(DATA32 *src, DATA32 *dst, int len)
513 while (dst > dst_end) *dst-- = *src--;
519 evas_common_copy_pixels_rev_mmx(DATA32 *src, DATA32 *dst, int len)
521 DATA32 *dst_end, *dst_end_pre;
525 src = src + len - 16;
527 dst_end_pre = dst + len - ((len / 16) * 16);
528 dst = dst + len - 16;
530 while (dst >= dst_end_pre)
532 MOVE_16DWORDS_MMX(src, dst);
538 while (dst >= dst_end)
546 while (dst > dst_end)
554 evas_common_copy_pixels_rev_sse(DATA32 *src, DATA32 *dst, int len)
556 DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre;
558 src_ptr = src + len - 16;
559 dst_ptr = dst + len - 16;
561 dst_end_ptr_pre = dst + len - ((len / 16) * 16);
565 while (dst_ptr >= dst_end_ptr_pre)
567 prefetch(&src_ptr[-16]);
568 MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
574 while (dst_ptr >= dst_end_ptr)
583 src_ptr = src + len - 1;
584 dst_ptr = dst + len - 1;
585 while (dst_ptr >= dst_end_ptr)
597 evas_common_draw_func_copy_get(int pixels, int reverse)
600 return evas_common_copy_rev_pixels_c;
604 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 256 * 256))
605 return evas_common_copy_pixels_rev_sse;
611 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
612 return evas_common_copy_pixels_rev_mmx;
615 # if defined(BUILD_SSE) || defined(BUILD_MMX)
618 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
619 return evas_common_copy_pixels_rev_neon;
623 # if defined(BUILD_MMX) || defined(BUILD_NEON)
626 return evas_common_copy_pixels_rev_c;
635 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
637 return evas_common_copy_pixels_mmx2;
644 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 64 * 64))
646 return evas_common_copy_pixels_sse;
653 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
655 return evas_common_copy_pixels_neon;
662 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
664 return evas_common_copy_pixels_mmx;
670 return evas_common_copy_pixels_c;
676 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 256 * 256))
677 return evas_common_copy_pixels_sse;
684 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
686 return evas_common_copy_pixels_mmx2;
688 else if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
690 return evas_common_copy_pixels_mmx;
696 return evas_common_copy_pixels_c;
702 return evas_common_copy_pixels_c;