Refactor is(SPACE|PSXSP)_(uni|utf8) macros and utf8.c
authorKarl Williamson <public@khwilliamson.com>
Mon, 19 Nov 2012 21:36:12 +0000 (14:36 -0700)
committerKarl Williamson <public@khwilliamson.com>
Tue, 20 Nov 2012 00:13:02 +0000 (17:13 -0700)
This refactors the isSPACE_uni, is_SPACE_utf8, isPSXSPC_uni,
and is_PSXSPC_utf8 macros in handy.h, so that no function call need be
done to handle above Latin1 input.  These macros are quite small, and
unlikely to grow over time, as Unicode has mostly finished adding white
space equivalents to the Standard.  The functions that implement these
in utf8.c are also changed to use the macros instead of generating a
swash.  This should speed things up slightly, with less memory used over
time as the swash fills.

handy.h
utf8.c

diff --git a/handy.h b/handy.h
index 80792e3..178d975 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -929,7 +929,7 @@ EXTCONST U32 PL_charclass[];
 #define isBLANK_uni(c)          _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)
 #define isIDFIRST_uni(c)        _generic_uni(_CC_IDFIRST, is_uni_idfirst, c)
 #define isALPHA_uni(c)          _generic_uni(_CC_ALPHA, is_uni_alpha, c)
-#define isSPACE_uni(c)          _generic_uni(_CC_SPACE, is_uni_space, c)
+#define isSPACE_uni(c)          _generic_uni(_CC_SPACE, is_XPERLSPACE_cp_high, c)
 #define isVERTWS_uni(c)         _generic_uni(_CC_VERTSPACE, is_VERTWS_cp_high, c)
 #define isDIGIT_uni(c)          _generic_uni(_CC_DIGIT, is_uni_digit, c)
 #define isUPPER_uni(c)          _generic_uni(_CC_UPPER, is_uni_upper, c)
@@ -945,7 +945,8 @@ EXTCONST U32 PL_charclass[];
 #define isXDIGIT_uni(c)         _generic_uni(_CC_XDIGIT, is_XDIGIT_cp_high, c)
 
 /* Posix and regular space differ only in U+000B, which is in Latin1 */
-#define isPSXSPC_uni(c)         _generic_uni(_CC_PSXSPC, is_uni_space, c)
+#define isPSXSPC_uni(c)         _generic_uni(_CC_PSXSPC,                \
+                                             is_XPERLSPACE_cp_high, c)
 
 #define toUPPER_uni(c,s,l)     to_uni_upper(c,s,l)
 #define toTITLE_uni(c,s,l)     to_uni_title(c,s,l)
@@ -1001,7 +1002,7 @@ EXTCONST U32 PL_charclass[];
 #define isIDCONT_utf8(p)        _generic_utf8(_CC_WORDCHAR, is_utf8_xidcont, p)
 #define isALPHA_utf8(p)         _generic_utf8(_CC_ALPHA, is_utf8_alpha, p)
 #define isBLANK_utf8(p)         _generic_utf8(_CC_BLANK, is_HORIZWS_high, p)
-#define isSPACE_utf8(p)         _generic_utf8(_CC_SPACE, is_utf8_space, p)
+#define isSPACE_utf8(p)         _generic_utf8(_CC_SPACE, is_XPERLSPACE_high, p)
 #define isVERTWS_utf8(p)        _generic_utf8(_CC_VERTSPACE, is_VERTWS_high, p)
 #define isDIGIT_utf8(p)         _generic_utf8(_CC_DIGIT, is_utf8_digit, p)
 #define isUPPER_utf8(p)         _generic_utf8(_CC_UPPER, is_utf8_upper, p)
@@ -1021,7 +1022,7 @@ EXTCONST U32 PL_charclass[];
 
 /* Posix and regular space differ only in U+000B, which is in ASCII (and hence
  * Latin1 */
-#define isPSXSPC_utf8(p)        _generic_utf8(_CC_PSXSPC, is_utf8_space, p)
+#define isPSXSPC_utf8(p)        _generic_utf8(_CC_PSXSPC, is_XPERLSPACE_high, p)
 
 #define isALNUM_LC_utf8(p)     isALNUM_LC_uvchr(valid_utf8_to_uvchr(p,  0))
 #define isIDFIRST_LC_utf8(p)   isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p,  0))
diff --git a/utf8.c b/utf8.c
index 7092d06..5621317 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1521,9 +1521,7 @@ Perl_is_uni_blank(pTHX_ UV c)
 bool
 Perl_is_uni_space(pTHX_ UV c)
 {
-    U8 tmpbuf[UTF8_MAXBYTES+1];
-    uvchr_to_utf8(tmpbuf, c);
-    return is_utf8_space(tmpbuf);
+    return isSPACE_uni(c);
 }
 
 bool
@@ -2067,7 +2065,7 @@ Perl_is_utf8_space(pTHX_ const U8 *p)
 
     PERL_ARGS_ASSERT_IS_UTF8_SPACE;
 
-    return is_utf8_common(p, &PL_utf8_space, "IsXPerlSpace");
+    return isSPACE_utf8(p);
 }
 
 bool