+From 384879be07418fc6224b6603a2e8ca6f11e178fc Mon Sep 17 00:00:00 2001
+From: Christian Persch <chpe@gnome.org>
+Date: Sun, 12 Feb 2012 21:20:33 +0100
+Subject: [PATCH] regex: Use glib for unicode data
+
+Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
+---
+ glib/pcre/Makefile.am | 1 -
+ glib/pcre/pcre_compile.c | 26 +++---
+ glib/pcre/pcre_dfa_exec.c | 96 ++++++++--------
+ glib/pcre/pcre_exec.c | 26 +++---
+ glib/pcre/pcre_internal.h | 11 +--
+ glib/pcre/pcre_tables.c | 16 +++
+ glib/pcre/pcre_xclass.c | 24 ++--
+ glib/pcre/ucp.h | 265 +++++++++++++++++++++++----------------------
+ 8 files changed, 239 insertions(+), 226 deletions(-)
+
+diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am
+index 21da5c5..1981953 100644
+--- a/glib/pcre/Makefile.am
++++ b/glib/pcre/Makefile.am
+@@ -51,7 +51,6 @@ libpcre_la_SOURCES = \
+ pcre_string_utils.c \
+ pcre_study.c \
+ pcre_tables.c \
+- pcre_ucd.c \
+ pcre_valid_utf8.c \
+ pcre_version.c \
+ pcre_xclass.c \
+diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
+index eb985df..b44055a 100644
+--- a/glib/pcre/pcre_compile.c
++++ b/glib/pcre/pcre_compile.c
+@@ -2890,43 +2890,43 @@ Returns: TRUE if auto-possessifying is OK
+ static BOOL
+ check_char_prop(int c, int ptype, int pdata, BOOL negated)
+ {
+-const ucd_record *prop = GET_UCD(c);
++const pcre_uint8 chartype = UCD_CHARTYPE(c);
+ switch(ptype)
+ {
+ case PT_LAMP:
+- return (prop->chartype == ucp_Lu ||
+- prop->chartype == ucp_Ll ||
+- prop->chartype == ucp_Lt) == negated;
++ return (chartype == ucp_Lu ||
++ chartype == ucp_Ll ||
++ chartype == ucp_Lt) == negated;
+
+ case PT_GC:
+- return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
++ return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
+
+ case PT_PC:
+- return (pdata == prop->chartype) == negated;
++ return (pdata == chartype) == negated;
+
+ case PT_SC:
+- return (pdata == prop->script) == negated;
++ return (pdata == UCD_SCRIPT(c)) == negated;
+
+ /* These are specials */
+
+ case PT_ALNUM:
+- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
++ return (PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
+
+ case PT_SPACE: /* Perl space */
+- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
+ == negated;
+
+ case PT_PXSPACE: /* POSIX space */
+- return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+ c == CHAR_FF || c == CHAR_CR)
+ == negated;
+
+ case PT_WORD:
+- return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++ return (PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N ||
+ c == CHAR_UNDERSCORE) == negated;
+ }
+ return FALSE;
+diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
+index 21d7be6..41ff65b 100644
+--- a/glib/pcre/pcre_dfa_exec.c
++++ b/glib/pcre/pcre_dfa_exec.c
+@@ -1015,7 +1015,7 @@ for (;;)
+ if (clen > 0)
+ {
+ BOOL OK;
+- const ucd_record * prop = GET_UCD(c);
++ const pcre_uint8 chartype = UCD_CHARTYPE(c);
+ switch(code[1])
+ {
+ case PT_ANY:
+@@ -1023,43 +1023,43 @@ for (;;)
+ break;
+
+ case PT_LAMP:
+- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+- prop->chartype == ucp_Lt;
++ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
++ chartype == ucp_Lt;
+ break;
+
+ case PT_GC:
+- OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
++ OK = PRIV(ucp_gentype)[chartype] == code[2];
+ break;
+
+ case PT_PC:
+- OK = prop->chartype == code[2];
++ OK = chartype == code[2];
+ break;
+
+ case PT_SC:
+- OK = prop->script == code[2];
++ OK = UCD_SCRIPT(c) == code[2];
+ break;
+
+ /* These are specials for combination cases. */
+
+ case PT_ALNUM:
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
++ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N;
+ break;
+
+ case PT_SPACE: /* Perl space */
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+ break;
+
+ case PT_PXSPACE: /* POSIX space */
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+ c == CHAR_FF || c == CHAR_CR;
+ break;
+
+ case PT_WORD:
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N ||
+ c == CHAR_UNDERSCORE;
+ break;
+
+@@ -1209,7 +1209,7 @@ for (;;)
+ if (clen > 0)
+ {
+ BOOL OK;
+- const ucd_record * prop = GET_UCD(c);
++ const pcre_uint8 chartype = UCD_CHARTYPE(c);
+ switch(code[2])
+ {
+ case PT_ANY:
+@@ -1217,43 +1217,43 @@ for (;;)
+ break;
+
+ case PT_LAMP:
+- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+- prop->chartype == ucp_Lt;
++ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
++ chartype == ucp_Lt;
+ break;
+
+ case PT_GC:
+- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
++ OK = PRIV(ucp_gentype)[chartype] == code[3];
+ break;
+
+ case PT_PC:
+- OK = prop->chartype == code[3];
++ OK = chartype == code[3];
+ break;
+
+ case PT_SC:
+- OK = prop->script == code[3];
++ OK = UCD_SCRIPT(c) == code[3];
+ break;
+
+ /* These are specials for combination cases. */
+
+ case PT_ALNUM:
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
++ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N;
+ break;
+
+ case PT_SPACE: /* Perl space */
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+ break;
+
+ case PT_PXSPACE: /* POSIX space */
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+ c == CHAR_FF || c == CHAR_CR;
+ break;
+
+ case PT_WORD:
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N ||
+ c == CHAR_UNDERSCORE;
+ break;
+
+@@ -1456,7 +1456,7 @@ for (;;)
+ if (clen > 0)
+ {
+ BOOL OK;
+- const ucd_record * prop = GET_UCD(c);
++ const pcre_uint8 chartype = UCD_CHARTYPE(c);
+ switch(code[2])
+ {
+ case PT_ANY:
+@@ -1464,43 +1464,43 @@ for (;;)
+ break;
+
+ case PT_LAMP:
+- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+- prop->chartype == ucp_Lt;
++ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
++ chartype == ucp_Lt;
+ break;
+
+ case PT_GC:
+- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
++ OK = PRIV(ucp_gentype)[chartype] == code[3];
+ break;
+
+ case PT_PC:
+- OK = prop->chartype == code[3];
++ OK = chartype == code[3];
+ break;
+
+ case PT_SC:
+- OK = prop->script == code[3];
++ OK = UCD_SCRIPT(c) == code[3];
+ break;
+
+ /* These are specials for combination cases. */
+
+ case PT_ALNUM:
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
++ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N;
+ break;
+
+ case PT_SPACE: /* Perl space */
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+ break;
+
+ case PT_PXSPACE: /* POSIX space */
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+ c == CHAR_FF || c == CHAR_CR;
+ break;
+
+ case PT_WORD:
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N ||
+ c == CHAR_UNDERSCORE;
+ break;
+
+@@ -1728,7 +1728,7 @@ for (;;)
+ if (clen > 0)
+ {
+ BOOL OK;
+- const ucd_record * prop = GET_UCD(c);
++ const pcre_uint8 chartype = UCD_CHARTYPE(c);
+ switch(code[1 + IMM2_SIZE + 1])
+ {
+ case PT_ANY:
+@@ -1736,43 +1736,43 @@ for (;;)
+ break;
+
+ case PT_LAMP:
+- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+- prop->chartype == ucp_Lt;
++ OK = chartype == ucp_Lu || chartype == ucp_Ll ||
++ chartype == ucp_Lt;
+ break;
+
+ case PT_GC:
+- OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
++ OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
+ break;
+
+ case PT_PC:
+- OK = prop->chartype == code[1 + IMM2_SIZE + 2];
++ OK = chartype == code[1 + IMM2_SIZE + 2];
+ break;
+
+ case PT_SC:
+- OK = prop->script == code[1 + IMM2_SIZE + 2];
++ OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
+ break;
+
+ /* These are specials for combination cases. */
+
+ case PT_ALNUM:
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
++ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N;
+ break;
+
+ case PT_SPACE: /* Perl space */
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+ break;
+
+ case PT_PXSPACE: /* POSIX space */
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+ c == CHAR_FF || c == CHAR_CR;
+ break;
+
+ case PT_WORD:
+- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++ OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N ||
+ c == CHAR_UNDERSCORE;
+ break;
+
+diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
+index b715353..8eb3162 100644
+--- a/glib/pcre/pcre_exec.c
++++ b/glib/pcre/pcre_exec.c
+@@ -2507,7 +2507,7 @@ for (;;)
+ }
+ GETCHARINCTEST(c, eptr);
+ {
+- const ucd_record *prop = GET_UCD(c);
++ const pcre_uint8 chartype = UCD_CHARTYPE(c);
+
+ switch(ecode[1])
+ {
+@@ -2516,44 +2516,44 @@ for (;;)
+ break;
+
+ case PT_LAMP:
+- if ((prop->chartype == ucp_Lu ||
+- prop->chartype == ucp_Ll ||
+- prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
++ if ((chartype == ucp_Lu ||
++ chartype == ucp_Ll ||
++ chartype == ucp_Lt) == (op == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
+ case PT_GC:
+- if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
++ if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
+ case PT_PC:
+- if ((ecode[2] != prop->chartype) == (op == OP_PROP))
++ if ((ecode[2] != chartype) == (op == OP_PROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
+ case PT_SC:
+- if ((ecode[2] != prop->script) == (op == OP_PROP))
++ if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
+ /* These are specials */
+
+ case PT_ALNUM:
+- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
++ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
+ case PT_SPACE: /* Perl space */
+- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
+ == (op == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
+ case PT_PXSPACE: /* POSIX space */
+- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+ c == CHAR_FF || c == CHAR_CR)
+ == (op == OP_NOTPROP))
+@@ -2561,8 +2561,8 @@ for (;;)
+ break;
+
+ case PT_WORD:
+- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N ||
+ c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
+index e5a4b6a..41c7ee3 100644
+--- a/glib/pcre/pcre_internal.h
++++ b/glib/pcre/pcre_internal.h
+@@ -2315,15 +2315,12 @@ extern const int PRIV(ucp_typerange)[];
+ #ifdef SUPPORT_UCP
+ /* UCD access macros */
+
+-#define UCD_BLOCK_SIZE 128
+-#define GET_UCD(ch) (PRIV(ucd_records) + \
+- PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
+- UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
++unsigned int _pcre_ucp_othercase(const unsigned int c);
+
+-#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
+-#define UCD_SCRIPT(ch) GET_UCD(ch)->script
++#define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch))
++#define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch))
+ #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
+-#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
++#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
+
+ #endif /* SUPPORT_UCP */
+
+diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
+index c8134ec..47becc7 100644
+--- a/glib/pcre/pcre_tables.c
++++ b/glib/pcre/pcre_tables.c
+@@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = {
+
+ const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
+
++unsigned int
++_pcre_ucp_othercase(const unsigned int c)
++{
++ int other_case = NOTACHAR;
++
++ if (g_unichar_islower(c))
++ other_case = g_unichar_toupper(c);
++ else if (g_unichar_isupper(c))
++ other_case = g_unichar_tolower(c);
++
++ if (other_case == c)
++ other_case = NOTACHAR;
++
++ return other_case;
++}
++
+ #endif /* SUPPORT_UTF */
+
+ /* End of pcre_tables.c */
+diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
+index dca7a39..e5a55d7 100644
+--- a/glib/pcre/pcre_xclass.c
++++ b/glib/pcre/pcre_xclass.c
+@@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
+ #ifdef SUPPORT_UCP
+ else /* XCL_PROP & XCL_NOTPROP */
+ {
+- const ucd_record *prop = GET_UCD(c);
++ const pcre_uint8 chartype = UCD_CHARTYPE(c);
+
+ switch(*data)
+ {
+@@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
+ break;
+
+ case PT_LAMP:
+- if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+- prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
++ if ((chartype == ucp_Lu || chartype == ucp_Ll ||
++ chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
+ break;
+
+ case PT_GC:
+- if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
++ if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
+ return !negated;
+ break;
+
+ case PT_PC:
+- if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
++ if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
+ break;
+
+ case PT_SC:
+- if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
++ if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
+ break;
+
+ case PT_ALNUM:
+- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
++ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
+ return !negated;
+ break;
+
+ case PT_SPACE: /* Perl space */
+- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
+ == (t == XCL_PROP))
+ return !negated;
+ break;
+
+ case PT_PXSPACE: /* POSIX space */
+- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++ if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
+ c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+ c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
+ return !negated;
+ break;
+
+ case PT_WORD:
+- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+- PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
++ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
++ PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
+ == (t == XCL_PROP))
+ return !negated;
+ break;
+diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
+index 59c3bec..53a48c9 100644
+--- a/glib/pcre/ucp.h
++++ b/glib/pcre/ucp.h
+@@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
+ should always be at the end of each enum, for backwards compatibility. */
+
+ /* These are the general character categories. */
++#include "gunicode.h"
+
+ enum {
+ ucp_C, /* Other */
+@@ -24,148 +25,148 @@ enum {
+ /* These are the particular character types. */
+
+ enum {
+- ucp_Cc, /* Control */
+- ucp_Cf, /* Format */
+- ucp_Cn, /* Unassigned */
+- ucp_Co, /* Private use */
+- ucp_Cs, /* Surrogate */
+- ucp_Ll, /* Lower case letter */
+- ucp_Lm, /* Modifier letter */
+- ucp_Lo, /* Other letter */
+- ucp_Lt, /* Title case letter */
+- ucp_Lu, /* Upper case letter */
+- ucp_Mc, /* Spacing mark */
+- ucp_Me, /* Enclosing mark */
+- ucp_Mn, /* Non-spacing mark */
+- ucp_Nd, /* Decimal number */
+- ucp_Nl, /* Letter number */
+- ucp_No, /* Other number */
+- ucp_Pc, /* Connector punctuation */
+- ucp_Pd, /* Dash punctuation */
+- ucp_Pe, /* Close punctuation */
+- ucp_Pf, /* Final punctuation */
+- ucp_Pi, /* Initial punctuation */
+- ucp_Po, /* Other punctuation */
+- ucp_Ps, /* Open punctuation */
+- ucp_Sc, /* Currency symbol */
+- ucp_Sk, /* Modifier symbol */
+- ucp_Sm, /* Mathematical symbol */
+- ucp_So, /* Other symbol */
+- ucp_Zl, /* Line separator */
+- ucp_Zp, /* Paragraph separator */
+- ucp_Zs /* Space separator */
++ ucp_Cc = G_UNICODE_CONTROL, /* Control */
++ ucp_Cf = G_UNICODE_FORMAT, /* Format */
++ ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */
++ ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */
++ ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */
++ ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */
++ ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */
++ ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */
++ ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */
++ ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */
++ ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */
++ ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */
++ ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */
++ ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */
++ ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */
++ ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */
++ ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */
++ ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */
++ ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */
++ ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */
++ ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */
++ ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */
++ ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */
++ ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */
++ ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */
++ ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */
++ ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */
++ ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */
++ ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */
++ ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */
+ };
+