From aa08176745dfc5afda1463db81f9ac0e74eca5f3 Mon Sep 17 00:00:00 2001 From: Daniel Juyung Seo Date: Tue, 27 Jul 2010 16:27:28 +0900 Subject: [PATCH] [Neon&WordCache] Added wordcache and neon features. --- debian/rules | 2 +- src/lib/engines/common/evas_font_draw.c | 54 +++++++++++---- .../evas_op_blend/op_blend_mask_color_neon.c | 78 +++++++++++++++++++--- 3 files changed, 110 insertions(+), 24 deletions(-) diff --git a/debian/rules b/debian/rules index 0d89f3c..f916400 100755 --- a/debian/rules +++ b/debian/rules @@ -32,7 +32,7 @@ else arch_flags += --enable-winkcodec=no endif -#arch_flags += --enable-word-cache --enable-metric-cache +arch_flags += --enable-word-cache --enable-metric-cache DEB_CONFIGURE_EXTRA_FLAGS := --disable-image-loader-svg \ --enable-simple-x11 \ diff --git a/src/lib/engines/common/evas_font_draw.c b/src/lib/engines/common/evas_font_draw.c index 02a422f..e177cac 100644 --- a/src/lib/engines/common/evas_font_draw.c +++ b/src/lib/engines/common/evas_font_draw.c @@ -11,7 +11,8 @@ #define WORD_CACHE_MAXLEN 50 /* How many to cache */ -#define WORD_CACHE_NWORDS 20 +#define WORD_CACHE_NWORDS 40 +static int max_cached_words = WORD_CACHE_NWORDS; struct prword { EINA_INLIST; @@ -43,19 +44,30 @@ struct cinfo { }; - - +LK(lock_words); // for word cache call static Eina_Inlist *words = NULL; static struct prword *evas_font_word_prerender(RGBA_Draw_Context *dc, const char *text, int len, RGBA_Font *fn, RGBA_Font_Int *fi,int use_kerning); -#ifdef EVAS_FRAME_QUEUING EAPI void evas_common_font_draw_init(void) { + char *p; + int tmp; +#ifdef EVAS_FRAME_QUEUING LKI(lock_font_draw); LKI(lock_fribidi); +#endif + if ((p = getenv("EVAS_WORD_CACHE_MAX_WORDS"))) + { + tmp = strtol(p,NULL,10); + /* 0 to disable of course */ + if (tmp > -1 && tmp < 500){ + max_cached_words = tmp; + } + } } +#ifdef EVAS_FRAME_QUEUING EAPI void evas_common_font_draw_finish(void) { @@ -393,13 +405,16 @@ evas_common_font_draw_internal(RGBA_Image *dst, RGBA_Draw_Context *dc, RGBA_Font FT_UInt prev_index; DATA32 *im; int c; + char *p; int char_index = 0; /* the index of the current char */ #if defined(METRIC_CACHE) || defined(WORD_CACHE) - /* A fast strNlen would be nice (there is a wcsnlen strangely) */ - for (len = 0 ; text[len] && len < WORD_CACHE_MAXLEN ; len ++) - ; + /* A fast (portable) strNlen would be nice (there is a wcsnlen strangely) */ + if ((p = memchr(text, 0, WORD_CACHE_MAXLEN))) + len = p - text; + else + len = WORD_CACHE_MAXLEN; if (len > 2 && len < WORD_CACHE_MAXLEN){ struct prword *word = evas_font_word_prerender(dc, text, len, fn, fi, @@ -432,6 +447,7 @@ evas_common_font_draw_internal(RGBA_Image *dst, RGBA_Draw_Context *dc, RGBA_Font // y = ext_y; } + if (xrun < 1) return; #ifdef WORD_CACHE for (j = rowstart ; j < rowend ; j ++){ func(NULL, word->im + (word->roww * j) + xstart, dc->col.col, @@ -496,7 +512,9 @@ evas_common_font_draw_internal(RGBA_Image *dst, RGBA_Draw_Context *dc, RGBA_Font LKL(fi->ft_mutex); if (fi->src->current_size != fi->size) { + FTLOCK(); FT_Activate_Size(fi->ft.size); + FTUNLOCK(); fi->src->current_size = fi->size; } /* hmmm kerning means i can't sanely do my own cached metric tables! */ @@ -777,13 +795,17 @@ evas_font_word_prerender(RGBA_Draw_Context *dc, const char *in_text, int len, RG struct prword *w; int gl; + + LKL(lock_words); EINA_INLIST_FOREACH(words,w){ if (w->len == len && w->font == fn && fi->size == w->size && - (w->str == in_text || strcmp(w->str, in_text) == 0)){ + (w->str == in_text || memcmp(w->str,in_text,len) == 0)){ words = eina_inlist_promote(words, EINA_INLIST_GET(w)); + LKU(lock_words); return w; } } + LKU(lock_words); #ifdef INTERNATIONAL_SUPPORT /*FIXME: should get the direction by parmater */ @@ -795,7 +817,8 @@ evas_font_word_prerender(RGBA_Draw_Context *dc, const char *in_text, int len, RG * holder, will change in the future.*/ char *visual_text = evas_intl_utf8_to_visual(in_text, &len, &direction, NULL, NULL, &level_list); text = (visual_text) ? visual_text : in_text; - +#else + text = in_text; #endif gl = dc->font_ext.func.gl_new ? 1: 0; @@ -810,9 +833,12 @@ evas_font_word_prerender(RGBA_Draw_Context *dc, const char *in_text, int len, RG ci->gl = evas_common_font_utf8_get_next((unsigned char *)text, &chr); if (ci->gl == 0) break; ci->index = evas_common_font_glyph_search(fn, &fi, ci->gl); + LKL(fi->ft_mutex); if (fi->src->current_size != fi->size) { + FTLOCK(); FT_Activate_Size(fi->ft.size); + FTUNLOCK(); fi->src->current_size = fi->size; } if (use_kerning && char_index && (pface == fi->src->ft.face)) @@ -823,6 +849,7 @@ evas_font_word_prerender(RGBA_Draw_Context *dc, const char *in_text, int len, RG } pface = fi->src->ft.face; ci->fg = evas_common_font_int_cache_glyph_get(fi, ci->index); + LKU(fi->ft_mutex); if (!ci->fg) continue; if (gl){ ci->fg->ext_dat =dc->font_ext.func.gl_new(dc->font_ext.data,ci->fg); @@ -866,7 +893,7 @@ evas_font_word_prerender(RGBA_Draw_Context *dc, const char *in_text, int len, RG save = malloc(sizeof(struct prword)); save->cinfo = metrics; - save->str = eina_stringshare_add(text); + save->str = eina_stringshare_add(in_text); save->font = fn; save->size = fi->size; save->len = len; @@ -875,10 +902,11 @@ evas_font_word_prerender(RGBA_Draw_Context *dc, const char *in_text, int len, RG save->roww = width; save->height = height; save->baseline = baseline; + LKL(lock_words); words = eina_inlist_prepend(words, EINA_INLIST_GET(save)); /* Clean up if too long */ - if (eina_inlist_count(words) > 20){ + if (eina_inlist_count(words) > max_cached_words){ struct prword *last = (struct prword *)(words->last); if (last->im) free(last->im); if (last->cinfo) free(last->cinfo); @@ -886,14 +914,14 @@ evas_font_word_prerender(RGBA_Draw_Context *dc, const char *in_text, int len, RG words = eina_inlist_remove(words,EINA_INLIST_GET(last)); free(last); } - - return save; + LKU(lock_words); #ifdef INTERNATIONAL_SUPPORT if (level_list) free(level_list); if (visual_text) free(visual_text); #endif + return save; } diff --git a/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c b/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c index 4fc2f5d..6c8bef5 100644 --- a/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c +++ b/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c @@ -1,11 +1,29 @@ +#define NEONDEBUG 0 + + +#if NEONDEBUG +#define DEBUG_FNCOUNT(x) \ + do { \ + static int _foo = 0; \ + if (_foo++%10000 ==0) \ + printf("%s %+d %s: %d (%s)\n",__FILE__,__LINE__,__FUNCTION__,\ + _foo, x " optimised");\ + } while (0) +#else +#define DEBUG_FNCOUNT(x) ((void)x) +#endif + + /* blend mask x color -> dst */ #ifdef BUILD_NEON static void _op_blend_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { DATA32 *e; - int alpha = 256 - (c >> 24); + + DEBUG_FNCOUNT(""); + #define AP "blend_mas_c_dp_" asm volatile ( " vdup.i32 q15, %[c] \n\t" @@ -60,6 +78,8 @@ _op_blend_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int " sub %[tmp], %[e], %[d] \n\t" " cmp %[tmp], #16 \n\t" " blt "AP"loopout \n\t" + + " sub %[tmp], %[e], #15 \n\t" " sub %[d], #16 \n\t" @@ -109,6 +129,15 @@ _op_blend_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int " bhi "AP"quadloopint \n\t" AP"loopout: \n\t" +#if NEONDEBUG + "cmp %[d], %[e] \n\t" + "ble "AP"foo \n\t" + "cmp %[tmp], %[m] \n\t" + "sub %[x], %[x] \n\t" + "vst1.32 d0[0], [%[x]] \n\t" + AP"foo: \n\t" +#endif + " cmp %[d], %[e] \n\t" " beq "AP"done \n\t" " sub %[tmp],%[e], %[d] \n\t" @@ -116,7 +145,7 @@ _op_blend_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int " beq "AP"singleout \n\t" AP "dualloop2: \n\t" - "sub %[tmp],%[e],$0x7 \n\t" + "sub %[tmp],%[e],$0x8 \n\t" " vld1.16 d0[0], [%[m]]! \n\t" " vldm %[d], {d4} \n\t" " vmovl.u8 q0, d0 \n\t" @@ -150,7 +179,13 @@ _op_blend_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int " vst1.32 d0[0], [%[d]]! \n\t" AP"done: \n\t" - +#if NEONDEBUG + "cmp %[d], %[e] \n\t" + "beq "AP"reallydone \n\t" + "sub %[tmp], %[tmp] \n\t" + "vst1.32 d0[0], [%[tmp]] \n\t" + AP"reallydone:" +#endif : // Out : [e] "r" (d + l), [d] "r" (d), [c] "r" (c), [tmp] "r" (7), [m] "r" (m), [x] "r" (0) @@ -166,6 +201,9 @@ static void _op_blend_mas_can_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { DATA32 *e,*tmp; int alpha; + + DEBUG_FNCOUNT(""); + #define AP "_blend_mas_can_dp_neon_" asm volatile ( "vdup.u32 q9, %[c] \n\t" @@ -204,7 +242,7 @@ _op_blend_mas_can_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, i AP"dualstart: \n\t" " sub %[tmp], %[e], %[d] \n\t" " cmp %[tmp], #16 \n\t" - " ble "AP"loopout \n\t" + " blt "AP"loopout \n\t" AP"dualloop: \n\t" " vld1.16 d0[0], [%[m]]! \n\t" @@ -232,21 +270,23 @@ _op_blend_mas_can_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, i AP"fastloop: \n\t" " add %[d], #16 \n\t" " cmp %[tmp], %[d] \n\t" - " ble "AP"loopout \n\t" + " blt "AP"loopout \n\t" AP"quadloopint: \n\t" // Load the mask: 4 bytes: It has d0/d1 " ldr %[x], [%[m]] \n\t" " add %[m], #4 \n\t" + + // Check for shortcuts " cmp %[x], #0 \n\t" " beq "AP"fastloop \n\t" - " vmov.32 d0[0], %[x] \n\t" - // Load d into d8/d9 q4 - " vldm %[d], {d8,d9} \n\t" " cmp %[x], $0xffffffff \n\t" " beq "AP"quadstore \n\t" + " vmov.32 d0[0], %[x] \n\t" + // Load d into d8/d9 q4 + " vldm %[d], {d8,d9} \n\t" // Get the alpha channel ready (m) " vmovl.u8 q0, d0 \n\t" @@ -293,8 +333,14 @@ _op_blend_mas_can_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, i " cmp %[tmp], %[d] \n\t" " bhi "AP"quadloopint \n\t" - AP"loopout: \n\t" +#if NEONDEBUG + "cmp %[d], %[e] \n\t" + "ble "AP"foo \n\t" + "sub %[tmp], %[tmp] \n\t" + "vst1.32 d0[0], [%[tmp]] \n\t" + AP"foo: \n\t" +#endif " cmp %[e], %[d] \n\t" " beq "AP"done \n\t" @@ -356,13 +402,22 @@ _op_blend_mas_can_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, i "vqmovun.s16 d2, q6 \n\t" "vst1.32 d2[0], [%[d]]! \n\t" + AP"done: \n\t" +#if NEONDEBUG + "cmp %[d], %[e] \n\t" + "beq "AP"reallydone \n\t" + "sub %[m], %[m] \n\t" + "vst1.32 d0[0], [%[m]] \n\t" + AP"reallydone:" +#endif + : // output regs // Input : [e] "r" (e = d + l), [d] "r" (d), [c] "r" (c), [m] "r" (m), [tmp] "r" (7), [x] "r" (33) - : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q14","q15", + : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q9","q14","q15", "memory" // clobbered ); @@ -436,6 +491,9 @@ static void _op_blend_rel_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { DATA32 *e; int alpha; + + DEBUG_FNCOUNT("not"); + UNROLL8_PLD_WHILE(d, l, e, { DATA32 mc = MUL_SYM(*m, c); -- 2.7.4