From: Karl Williamson Date: Mon, 6 Jan 2014 19:22:02 +0000 (-0700) Subject: isWORDCHAR_uni(), isDIGIT_utf8() etc no longer go out to disk X-Git-Tag: upstream/5.20.0~818^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f25ce84407dda38dcbb46145067fe57d29d1ef7c;p=platform%2Fupstream%2Fperl.git isWORDCHAR_uni(), isDIGIT_utf8() etc no longer go out to disk Previous commits in this series have caused all the POSIX classes to be completely specified at C compile time. This allows us to revise the base function used by all these macros to use these definitions, avoiding reading them in from disk. --- diff --git a/embed.fnc b/embed.fnc index c3ace70..6acb8b6 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2357,7 +2357,7 @@ sn |NV|mulexp10 |NV value|I32 exponent #if defined(PERL_IN_UTF8_C) iRn |STRLEN |is_utf8_char_slow|NN const U8 *s|const STRLEN len sRM |UV |check_locale_boundary_crossing|NN const U8* const p|const UV result|NN U8* const ustrp|NN STRLEN *lenp -iR |bool |is_utf8_common |NN const U8 *const p|NN SV **swash|NN const char * const swashname +iR |bool |is_utf8_common |NN const U8 *const p|NN SV **swash|NN const char * const swashname|NULLOK SV* const invlist sR |SV* |swatch_get |NN SV* swash|UV start|UV span #endif diff --git a/embed.h b/embed.h index d5b85db..e48b241 100644 --- a/embed.h +++ b/embed.h @@ -1677,7 +1677,7 @@ # if defined(PERL_IN_UTF8_C) #define check_locale_boundary_crossing(a,b,c,d) S_check_locale_boundary_crossing(aTHX_ a,b,c,d) #define is_utf8_char_slow S_is_utf8_char_slow -#define is_utf8_common(a,b,c) S_is_utf8_common(aTHX_ a,b,c) +#define is_utf8_common(a,b,c,d) S_is_utf8_common(aTHX_ a,b,c,d) #define swatch_get(a,b,c) S_swatch_get(aTHX_ a,b,c) #define to_lower_latin1(a,b,c) S_to_lower_latin1(aTHX_ a,b,c) # endif diff --git a/proto.h b/proto.h index a0a1a23..19f58a5 100644 --- a/proto.h +++ b/proto.h @@ -7642,7 +7642,7 @@ PERL_STATIC_INLINE STRLEN S_is_utf8_char_slow(const U8 *s, const STRLEN len) #define PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW \ assert(s) -PERL_STATIC_INLINE bool S_is_utf8_common(pTHX_ const U8 *const p, SV **swash, const char * const swashname) +PERL_STATIC_INLINE bool S_is_utf8_common(pTHX_ const U8 *const p, SV **swash, const char * const swashname, SV* const invlist) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/utf8.c b/utf8.c index 161fb21..fa34f34 100644 --- a/utf8.c +++ b/utf8.c @@ -1546,7 +1546,7 @@ S_is_utf8_idfirst(pTHX_ const U8 *p) if (*p == '_') return TRUE; /* is_utf8_idstart would be more logical. */ - return is_utf8_common(p, &PL_utf8_idstart, "IdStart"); + return is_utf8_common(p, &PL_utf8_idstart, "IdStart", NULL); } bool @@ -1810,13 +1810,15 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, const U8 flags) PERL_STATIC_INLINE bool S_is_utf8_common(pTHX_ const U8 *const p, SV **swash, - const char *const swashname) + const char *const swashname, SV* const invlist) { /* returns a boolean giving whether or not the UTF8-encoded character that * starts at

is in the swash indicated by . * contains a pointer to where the swash indicated by * is to be stored; which this routine will do, so that future calls will - * look at <*swash> and only generate a swash if it is not null + * look at <*swash> and only generate a swash if it is not null. + * is NULL or an inversion list that defines the swash. If not null, it + * saves time during initialization of the swash. * * Note that it is assumed that the buffer length of

is enough to * contain all the bytes that comprise the character. Thus, <*p> should @@ -1845,7 +1847,13 @@ S_is_utf8_common(pTHX_ const U8 *const p, SV **swash, } if (!*swash) { U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; - *swash = _core_swash_init("utf8", swashname, &PL_sv_undef, 1, 0, NULL, &flags); + *swash = _core_swash_init("utf8", + + /* Only use the name if there is no inversion + * list; otherwise will go out to disk */ + (invlist) ? "" : swashname, + + &PL_sv_undef, 1, 0, invlist, &flags); } return swash_fetch(*swash, p, TRUE) != 0; @@ -1860,7 +1868,10 @@ Perl__is_utf8_FOO(pTHX_ const U8 classnum, const U8 *p) assert(classnum < _FIRST_NON_SWASH_CC); - return is_utf8_common(p, &PL_utf8_swash_ptrs[classnum], swash_property_names[classnum]); + return is_utf8_common(p, + &PL_utf8_swash_ptrs[classnum], + swash_property_names[classnum], + PL_XPosix_ptrs[classnum]); } bool @@ -1883,7 +1894,7 @@ Perl_is_utf8_xidfirst(pTHX_ const U8 *p) /* The naming is historical. */ if (*p == '_') return TRUE; /* is_utf8_idstart would be more logical. */ - return is_utf8_common(p, &PL_utf8_xidstart, "XIdStart"); + return is_utf8_common(p, &PL_utf8_xidstart, "XIdStart", NULL); } bool @@ -1893,7 +1904,7 @@ Perl__is_utf8_perl_idstart(pTHX_ const U8 *p) PERL_ARGS_ASSERT__IS_UTF8_PERL_IDSTART; - return is_utf8_common(p, &PL_utf8_perl_idstart, "_Perl_IDStart"); + return is_utf8_common(p, &PL_utf8_perl_idstart, "_Perl_IDStart", NULL); } bool @@ -1903,7 +1914,7 @@ Perl__is_utf8_perl_idcont(pTHX_ const U8 *p) PERL_ARGS_ASSERT__IS_UTF8_PERL_IDCONT; - return is_utf8_common(p, &PL_utf8_perl_idcont, "_Perl_IDCont"); + return is_utf8_common(p, &PL_utf8_perl_idcont, "_Perl_IDCont", NULL); } @@ -1914,7 +1925,7 @@ Perl_is_utf8_idcont(pTHX_ const U8 *p) PERL_ARGS_ASSERT_IS_UTF8_IDCONT; - return is_utf8_common(p, &PL_utf8_idcont, "IdContinue"); + return is_utf8_common(p, &PL_utf8_idcont, "IdContinue", NULL); } bool @@ -1924,7 +1935,7 @@ Perl_is_utf8_xidcont(pTHX_ const U8 *p) PERL_ARGS_ASSERT_IS_UTF8_XIDCONT; - return is_utf8_common(p, &PL_utf8_idcont, "XIdContinue"); + return is_utf8_common(p, &PL_utf8_idcont, "XIdContinue", NULL); } bool @@ -1934,7 +1945,7 @@ Perl__is_utf8_mark(pTHX_ const U8 *p) PERL_ARGS_ASSERT__IS_UTF8_MARK; - return is_utf8_common(p, &PL_utf8_mark, "IsM"); + return is_utf8_common(p, &PL_utf8_mark, "IsM", NULL); } /*