This means you should not need to set any special compiler flags; which should
mean gcc will generate non-neon specific asm (unless you tell it to). This
means it is possible to build an armv6 binary with neon suppor (as we always
meant to to do).
git-svn-id: svn+ssh://svn.enlightenment.org/var/svn/e/trunk/evas@55307
7cbeb6ba-43b4-40fd-8cce-
4c39aea84d33
//#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70)
#ifdef BUILD_NEON
asm volatile (
+ ".fpu neon \n\t"
"vqadd.u8 d0, d1, d0\n"
);
#endif
DATA32 *e, *tmp = 0;
#define AP "B_C_DP"
asm volatile (
+ ".fpu neon \n\t"
"vdup.u32 q6, %[c] \n\t"
"vmov.i8 q5, #1 \n\t"
"vmvn.u8 q7,q6 \n\t"
#define AP "blend_mas_c_dp_"
asm volatile (
+ ".fpu neon \n\t"
" vdup.i32 q15, %[c] \n\t"
" vmov.i8 q14, #1 \n\t"
#define AP "_blend_mas_can_dp_neon_"
asm volatile (
+ ".fpu neon \n\t"
"vdup.u32 q9, %[c] \n\t"
"vmov.i8 q15, #1 \n\t"
"vmov.i8 q14, #0 \n\t"
_op_blend_p_c_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
#define AP "blend_p_c_dp_"
asm volatile (
+ ".fpu neon \n\t"
// Load 'c'
"vdup.u32 q7, %[c] \n\t"
"vmov.i8 q6, #1 \n\t"
#define AP "_op_blend_pan_caa_dp_"
DATA32 *e = d + l, *tmp = (void*)73;
asm volatile (
+ ".fpu neon \n\t"
/* Set up 'c' */
"vdup.u8 d14, %[c] \n\t"
"vmov.i8 d15, #1 \n\t"
_op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
#define AP "blend_p_dp_"
asm volatile (
+ ".fpu neon \n\t"
//** init
"vmov.i8 q8, $0x1 \n\t"
#define AP "blend_pas_dp_"
DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912;
asm volatile (
+ ".fpu neon \n\t"
"vmov.i8 q8, #1 \n\t"
AP"loopchoose: \n\t"
// If aliged - go as fast we can
#define AP "COPY_C_DP_"
uint32_t *e = d + l,*tmp;
asm volatile (
+ ".fpu neon \n\t"
"vdup.i32 q0, %[c] \n\t"
s3 = s + 8;
s4 = s + 12;
asm volatile (
+ ".fpu neon \n\t"
"asmloop2:\n\t"
"cmp %[e], %[d]\n\t"
"vld1.32 {d16-d17}, [%[s]]!\n\t"