regex: Use glib for unicode data

author Christian Persch <chpe@gnome.org>

Sun, 12 Feb 2012 20:20:33 +0000 (21:20 +0100)

committer Matthias Clasen <mclasen@redhat.com>

Mon, 27 Feb 2012 03:09:05 +0000 (22:09 -0500)
author Christian Persch <chpe@gnome.org>
Sun, 12 Feb 2012 20:20:33 +0000 (21:20 +0100)
committer Matthias Clasen <mclasen@redhat.com>
Mon, 27 Feb 2012 03:09:05 +0000 (22:09 -0500)
diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am

index 21da5c5..1981953 100644 (file)
--- a/glib/pcre/Makefile.am
+++ b/glib/pcre/Makefile.am
@@ -51,7 +51,6 @@ libpcre_la_SOURCES = \
         pcre_string_utils.c \
         pcre_study.c \
         pcre_tables.c \
         pcre_string_utils.c \
         pcre_study.c \
         pcre_tables.c \
-       pcre_ucd.c \
         pcre_valid_utf8.c \
         pcre_version.c \
         pcre_xclass.c \
         pcre_valid_utf8.c \
         pcre_version.c \
         pcre_xclass.c \
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c

index eb985df..b44055a 100644 (file)
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@@ -2890,43 +2890,43 @@ Returns:       TRUE if auto-possessifying is OK
  static BOOL
  check_char_prop(int c, int ptype, int pdata, BOOL negated)
  {
  static BOOL
  check_char_prop(int c, int ptype, int pdata, BOOL negated)
  {
-const ucd_record *prop = GET_UCD(c);
+const pcre_uint8 chartype = UCD_CHARTYPE(c);
  switch(ptype)
    {
    case PT_LAMP:
  switch(ptype)
    {
    case PT_LAMP:
-  return (prop->chartype == ucp_Lu ||
-          prop->chartype == ucp_Ll ||
-          prop->chartype == ucp_Lt) == negated;
+  return (chartype == ucp_Lu ||
+          chartype == ucp_Ll ||
+          chartype == ucp_Lt) == negated;
  
    case PT_GC:
  
    case PT_GC:
-  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
+  return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
  
    case PT_PC:
  
    case PT_PC:
-  return (pdata == prop->chartype) == negated;
+  return (pdata == chartype) == negated;
  
    case PT_SC:
  
    case PT_SC:
-  return (pdata == prop->script) == negated;
+  return (pdata == UCD_SCRIPT(c)) == negated;
  
    /* These are specials */
  
    case PT_ALNUM:
  
    /* These are specials */
  
    case PT_ALNUM:
-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
+  return (PRIV(ucp_gentype)[chartype] == ucp_L ||
+          PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
  
    case PT_SPACE:    /* Perl space */
  
    case PT_SPACE:    /* Perl space */
-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+  return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
            == negated;
  
    case PT_PXSPACE:  /* POSIX space */
            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
            == negated;
  
    case PT_PXSPACE:  /* POSIX space */
-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+  return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
            c == CHAR_FF || c == CHAR_CR)
            == negated;
  
    case PT_WORD:
            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
            c == CHAR_FF || c == CHAR_CR)
            == negated;
  
    case PT_WORD:
-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+  return (PRIV(ucp_gentype)[chartype] == ucp_L ||
+          PRIV(ucp_gentype)[chartype] == ucp_N ||
            c == CHAR_UNDERSCORE) == negated;
    }
  return FALSE;
            c == CHAR_UNDERSCORE) == negated;
    }
  return FALSE;
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c

index 21d7be6..41ff65b 100644 (file)
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -1015,7 +1015,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
        if (clen > 0)
          {
          BOOL OK;
-        const ucd_record * prop = GET_UCD(c);
+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
          switch(code[1])
            {
            case PT_ANY:
          switch(code[1])
            {
            case PT_ANY:
@@ -1023,43 +1023,43 @@ for (;;)
            break;
  
            case PT_LAMP:
            break;
  
            case PT_LAMP:
-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-               prop->chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+               chartype == ucp_Lt;
            break;
  
            case PT_GC:
            break;
  
            case PT_GC:
-          OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
+          OK = PRIV(ucp_gentype)[chartype] == code[2];
            break;
  
            case PT_PC:
            break;
  
            case PT_PC:
-          OK = prop->chartype == code[2];
+          OK = chartype == code[2];
            break;
  
            case PT_SC:
            break;
  
            case PT_SC:
-          OK = prop->script == code[2];
+          OK = UCD_SCRIPT(c) == code[2];
            break;
  
            /* These are specials for combination cases. */
  
            case PT_ALNUM:
            break;
  
            /* These are specials for combination cases. */
  
            case PT_ALNUM:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
            break;
  
            case PT_SPACE:    /* Perl space */
            break;
  
            case PT_SPACE:    /* Perl space */
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_PXSPACE:  /* POSIX space */
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_PXSPACE:  /* POSIX space */
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                 c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_WORD:
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                 c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_WORD:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
                 c == CHAR_UNDERSCORE;
            break;
  
                 c == CHAR_UNDERSCORE;
            break;
  
@@ -1209,7 +1209,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
        if (clen > 0)
          {
          BOOL OK;
-        const ucd_record * prop = GET_UCD(c);
+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
          switch(code[2])
            {
            case PT_ANY:
          switch(code[2])
            {
            case PT_ANY:
@@ -1217,43 +1217,43 @@ for (;;)
            break;
  
            case PT_LAMP:
            break;
  
            case PT_LAMP:
-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-            prop->chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+            chartype == ucp_Lt;
            break;
  
            case PT_GC:
            break;
  
            case PT_GC:
-          OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
+          OK = PRIV(ucp_gentype)[chartype] == code[3];
            break;
  
            case PT_PC:
            break;
  
            case PT_PC:
-          OK = prop->chartype == code[3];
+          OK = chartype == code[3];
            break;
  
            case PT_SC:
            break;
  
            case PT_SC:
-          OK = prop->script == code[3];
+          OK = UCD_SCRIPT(c) == code[3];
            break;
  
            /* These are specials for combination cases. */
  
            case PT_ALNUM:
            break;
  
            /* These are specials for combination cases. */
  
            case PT_ALNUM:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
            break;
  
            case PT_SPACE:    /* Perl space */
            break;
  
            case PT_SPACE:    /* Perl space */
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_PXSPACE:  /* POSIX space */
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_PXSPACE:  /* POSIX space */
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                 c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_WORD:
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                 c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_WORD:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
                 c == CHAR_UNDERSCORE;
            break;
  
                 c == CHAR_UNDERSCORE;
            break;
  
@@ -1456,7 +1456,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
        if (clen > 0)
          {
          BOOL OK;
-        const ucd_record * prop = GET_UCD(c);
+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
          switch(code[2])
            {
            case PT_ANY:
          switch(code[2])
            {
            case PT_ANY:
@@ -1464,43 +1464,43 @@ for (;;)
            break;
  
            case PT_LAMP:
            break;
  
            case PT_LAMP:
-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-            prop->chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+            chartype == ucp_Lt;
            break;
  
            case PT_GC:
            break;
  
            case PT_GC:
-          OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
+          OK = PRIV(ucp_gentype)[chartype] == code[3];
            break;
  
            case PT_PC:
            break;
  
            case PT_PC:
-          OK = prop->chartype == code[3];
+          OK = chartype == code[3];
            break;
  
            case PT_SC:
            break;
  
            case PT_SC:
-          OK = prop->script == code[3];
+          OK = UCD_SCRIPT(c) == code[3];
            break;
  
            /* These are specials for combination cases. */
  
            case PT_ALNUM:
            break;
  
            /* These are specials for combination cases. */
  
            case PT_ALNUM:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
            break;
  
            case PT_SPACE:    /* Perl space */
            break;
  
            case PT_SPACE:    /* Perl space */
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_PXSPACE:  /* POSIX space */
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_PXSPACE:  /* POSIX space */
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                 c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_WORD:
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                 c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_WORD:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
                 c == CHAR_UNDERSCORE;
            break;
  
                 c == CHAR_UNDERSCORE;
            break;
  
@@ -1728,7 +1728,7 @@ for (;;)
        if (clen > 0)
          {
          BOOL OK;
        if (clen > 0)
          {
          BOOL OK;
-        const ucd_record * prop = GET_UCD(c);
+        const pcre_uint8 chartype = UCD_CHARTYPE(c);
          switch(code[1 + IMM2_SIZE + 1])
            {
            case PT_ANY:
          switch(code[1 + IMM2_SIZE + 1])
            {
            case PT_ANY:
@@ -1736,43 +1736,43 @@ for (;;)
            break;
  
            case PT_LAMP:
            break;
  
            case PT_LAMP:
-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-            prop->chartype == ucp_Lt;
+          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
+            chartype == ucp_Lt;
            break;
  
            case PT_GC:
            break;
  
            case PT_GC:
-          OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
+          OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
            break;
  
            case PT_PC:
            break;
  
            case PT_PC:
-          OK = prop->chartype == code[1 + IMM2_SIZE + 2];
+          OK = chartype == code[1 + IMM2_SIZE + 2];
            break;
  
            case PT_SC:
            break;
  
            case PT_SC:
-          OK = prop->script == code[1 + IMM2_SIZE + 2];
+          OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
            break;
  
            /* These are specials for combination cases. */
  
            case PT_ALNUM:
            break;
  
            /* These are specials for combination cases. */
  
            case PT_ALNUM:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N;
            break;
  
            case PT_SPACE:    /* Perl space */
            break;
  
            case PT_SPACE:    /* Perl space */
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_PXSPACE:  /* POSIX space */
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_PXSPACE:  /* POSIX space */
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                 c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_WORD:
                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
                 c == CHAR_FF || c == CHAR_CR;
            break;
  
            case PT_WORD:
-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
+               PRIV(ucp_gentype)[chartype] == ucp_N ||
                 c == CHAR_UNDERSCORE;
            break;
  
                 c == CHAR_UNDERSCORE;
            break;
  
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c

index b715353..8eb3162 100644 (file)
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
@@ -2507,7 +2507,7 @@ for (;;)
        }
      GETCHARINCTEST(c, eptr);
        {
        }
      GETCHARINCTEST(c, eptr);
        {
-      const ucd_record *prop = GET_UCD(c);
+      const pcre_uint8 chartype = UCD_CHARTYPE(c);
  
        switch(ecode[1])
          {
  
        switch(ecode[1])
          {
@@ -2516,44 +2516,44 @@ for (;;)
          break;
  
          case PT_LAMP:
          break;
  
          case PT_LAMP:
-        if ((prop->chartype == ucp_Lu ||
-             prop->chartype == ucp_Ll ||
-             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
+        if ((chartype == ucp_Lu ||
+             chartype == ucp_Ll ||
+             chartype == ucp_Lt) == (op == OP_NOTPROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_GC:
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_GC:
-        if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
+        if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_PC:
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_PC:
-        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
+        if ((ecode[2] != chartype) == (op == OP_PROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_SC:
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_SC:
-        if ((ecode[2] != prop->script) == (op == OP_PROP))
+        if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
          /* These are specials */
  
          case PT_ALNUM:
            RRETURN(MATCH_NOMATCH);
          break;
  
          /* These are specials */
  
          case PT_ALNUM:
-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
+        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+             PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_SPACE:    /* Perl space */
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_SPACE:    /* Perl space */
-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                 == (op == OP_NOTPROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_PXSPACE:  /* POSIX space */
               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                 == (op == OP_NOTPROP))
            RRETURN(MATCH_NOMATCH);
          break;
  
          case PT_PXSPACE:  /* POSIX space */
-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
               c == CHAR_FF || c == CHAR_CR)
                 == (op == OP_NOTPROP))
               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
               c == CHAR_FF || c == CHAR_CR)
                 == (op == OP_NOTPROP))
@@ -2561,8 +2561,8 @@ for (;;)
          break;
  
          case PT_WORD:
          break;
  
          case PT_WORD:
-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+             PRIV(ucp_gentype)[chartype] == ucp_N ||
               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
            RRETURN(MATCH_NOMATCH);
          break;
               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
            RRETURN(MATCH_NOMATCH);
          break;
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h

index e5a4b6a..41c7ee3 100644 (file)
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@@ -2315,15 +2315,12 @@ extern const int         PRIV(ucp_typerange)[];
  #ifdef SUPPORT_UCP
  /* UCD access macros */
  
  #ifdef SUPPORT_UCP
  /* UCD access macros */
  
-#define UCD_BLOCK_SIZE 128
-#define GET_UCD(ch) (PRIV(ucd_records) + \
-        PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
-        UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
+unsigned int _pcre_ucp_othercase(const unsigned int c);
  
  
-#define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype
-#define UCD_SCRIPT(ch)    GET_UCD(ch)->script
+#define UCD_CHARTYPE(ch)  (pcre_uint8)g_unichar_type((gunichar)(ch))
+#define UCD_SCRIPT(ch)    (pcre_uint8)g_unichar_get_script((gunichar)(ch))
  #define UCD_CATEGORY(ch)  PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
  #define UCD_CATEGORY(ch)  PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
-#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
+#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
  
  #endif /* SUPPORT_UCP */
  
  
  #endif /* SUPPORT_UCP */
  
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c

index c8134ec..47becc7 100644 (file)
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = {
  
  const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
  
  
  const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
  
+unsigned int 
+_pcre_ucp_othercase(const unsigned int c)
+{
+  int other_case = NOTACHAR;
+
+  if (g_unichar_islower(c))
+    other_case = g_unichar_toupper(c);
+  else if (g_unichar_isupper(c))
+    other_case = g_unichar_tolower(c);
+
+  if (other_case == c)
+    other_case = NOTACHAR;
+
+  return other_case;
+}
+
  #endif /* SUPPORT_UTF */
  
  /* End of pcre_tables.c */
  #endif /* SUPPORT_UTF */
  
  /* End of pcre_tables.c */
diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c

index dca7a39..e5a55d7 100644 (file)
--- a/glib/pcre/pcre_xclass.c
+++ b/glib/pcre/pcre_xclass.c
@@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
  #ifdef SUPPORT_UCP
    else  /* XCL_PROP & XCL_NOTPROP */
      {
  #ifdef SUPPORT_UCP
    else  /* XCL_PROP & XCL_NOTPROP */
      {
-    const ucd_record *prop = GET_UCD(c);
+    const pcre_uint8 chartype = UCD_CHARTYPE(c);
  
      switch(*data)
        {
  
      switch(*data)
        {
@@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
        break;
  
        case PT_LAMP:
        break;
  
        case PT_LAMP:
-      if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
-           prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
+      if ((chartype == ucp_Lu || chartype == ucp_Ll ||
+           chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
        break;
  
        case PT_GC:
        break;
  
        case PT_GC:
-      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
+      if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
          return !negated;
        break;
  
        case PT_PC:
          return !negated;
        break;
  
        case PT_PC:
-      if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
+      if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
        break;
  
        case PT_SC:
        break;
  
        case PT_SC:
-      if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
+      if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
        break;
  
        case PT_ALNUM:
        break;
  
        case PT_ALNUM:
-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
+      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+           PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
          return !negated;
        break;
  
        case PT_SPACE:    /* Perl space */
          return !negated;
        break;
  
        case PT_SPACE:    /* Perl space */
-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+      if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
             c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
               == (t == XCL_PROP))
          return !negated;
        break;
  
        case PT_PXSPACE:  /* POSIX space */
             c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
               == (t == XCL_PROP))
          return !negated;
        break;
  
        case PT_PXSPACE:  /* POSIX space */
-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
+      if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
             c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
             c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
          return !negated;
        break;
  
        case PT_WORD:
             c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
             c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
          return !negated;
        break;
  
        case PT_WORD:
-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-           PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
+      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+           PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
               == (t == XCL_PROP))
          return !negated;
        break;
               == (t == XCL_PROP))
          return !negated;
        break;
diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h

index 59c3bec..53a48c9 100644 (file)
--- a/glib/pcre/ucp.h
+++ b/glib/pcre/ucp.h
@@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
  should always be at the end of each enum, for backwards compatibility. */
  
  /* These are the general character categories. */
  should always be at the end of each enum, for backwards compatibility. */
  
  /* These are the general character categories. */
+#include "gunicode.h"
  
  enum {
    ucp_C,     /* Other */
  
  enum {
    ucp_C,     /* Other */
@@ -24,148 +25,148 @@ enum {
  /* These are the particular character types. */
  
  enum {
  /* These are the particular character types. */
  
  enum {
-  ucp_Cc,    /* Control */
-  ucp_Cf,    /* Format */
-  ucp_Cn,    /* Unassigned */
-  ucp_Co,    /* Private use */
-  ucp_Cs,    /* Surrogate */
-  ucp_Ll,    /* Lower case letter */
-  ucp_Lm,    /* Modifier letter */
-  ucp_Lo,    /* Other letter */
-  ucp_Lt,    /* Title case letter */
-  ucp_Lu,    /* Upper case letter */
-  ucp_Mc,    /* Spacing mark */
-  ucp_Me,    /* Enclosing mark */
-  ucp_Mn,    /* Non-spacing mark */
-  ucp_Nd,    /* Decimal number */
-  ucp_Nl,    /* Letter number */
-  ucp_No,    /* Other number */
-  ucp_Pc,    /* Connector punctuation */
-  ucp_Pd,    /* Dash punctuation */
-  ucp_Pe,    /* Close punctuation */
-  ucp_Pf,    /* Final punctuation */
-  ucp_Pi,    /* Initial punctuation */
-  ucp_Po,    /* Other punctuation */
-  ucp_Ps,    /* Open punctuation */
-  ucp_Sc,    /* Currency symbol */
-  ucp_Sk,    /* Modifier symbol */
-  ucp_Sm,    /* Mathematical symbol */
-  ucp_So,    /* Other symbol */
-  ucp_Zl,    /* Line separator */
-  ucp_Zp,    /* Paragraph separator */
-  ucp_Zs     /* Space separator */
+  ucp_Cc = G_UNICODE_CONTROL,                   /* Control */
+  ucp_Cf = G_UNICODE_FORMAT,                    /* Format */
+  ucp_Cn = G_UNICODE_UNASSIGNED,                /* Unassigned */
+  ucp_Co = G_UNICODE_PRIVATE_USE,               /* Private use */
+  ucp_Cs = G_UNICODE_SURROGATE,                 /* Surrogate */
+  ucp_Ll = G_UNICODE_LOWERCASE_LETTER,          /* Lower case letter */
+  ucp_Lm = G_UNICODE_MODIFIER_LETTER,           /* Modifier letter */
+  ucp_Lo = G_UNICODE_OTHER_LETTER,              /* Other letter */
+  ucp_Lt = G_UNICODE_TITLECASE_LETTER,          /* Title case letter */
+  ucp_Lu = G_UNICODE_UPPERCASE_LETTER,          /* Upper case letter */
+  ucp_Mc = G_UNICODE_SPACING_MARK,              /* Spacing mark */
+  ucp_Me = G_UNICODE_ENCLOSING_MARK,            /* Enclosing mark */
+  ucp_Mn = G_UNICODE_NON_SPACING_MARK,          /* Non-spacing mark */
+  ucp_Nd = G_UNICODE_DECIMAL_NUMBER,            /* Decimal number */
+  ucp_Nl = G_UNICODE_LETTER_NUMBER,             /* Letter number */
+  ucp_No = G_UNICODE_OTHER_NUMBER,              /* Other number */
+  ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION,       /* Connector punctuation */
+  ucp_Pd = G_UNICODE_DASH_PUNCTUATION,          /* Dash punctuation */
+  ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION,         /* Close punctuation */
+  ucp_Pf = G_UNICODE_FINAL_PUNCTUATION,         /* Final punctuation */
+  ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION,       /* Initial punctuation */
+  ucp_Po = G_UNICODE_OTHER_PUNCTUATION,         /* Other punctuation */
+  ucp_Ps = G_UNICODE_OPEN_PUNCTUATION,          /* Open punctuation */
+  ucp_Sc = G_UNICODE_CURRENCY_SYMBOL,           /* Currency symbol */
+  ucp_Sk = G_UNICODE_MODIFIER_SYMBOL,           /* Modifier symbol */
+  ucp_Sm = G_UNICODE_MATH_SYMBOL,               /* Mathematical symbol */
+  ucp_So = G_UNICODE_OTHER_SYMBOL,              /* Other symbol */
+  ucp_Zl = G_UNICODE_LINE_SEPARATOR,            /* Line separator */
+  ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR,       /* Paragraph separator */
+  ucp_Zs = G_UNICODE_SPACE_SEPARATOR            /* Space separator */
  };
  
  /* These are the script identifications. */
  
  enum {
  };
  
  /* These are the script identifications. */
  
  enum {
-  ucp_Arabic,
-  ucp_Armenian,
-  ucp_Bengali,
-  ucp_Bopomofo,
-  ucp_Braille,
-  ucp_Buginese,
-  ucp_Buhid,
-  ucp_Canadian_Aboriginal,
-  ucp_Cherokee,
-  ucp_Common,
-  ucp_Coptic,
-  ucp_Cypriot,
-  ucp_Cyrillic,
-  ucp_Deseret,
-  ucp_Devanagari,
-  ucp_Ethiopic,
-  ucp_Georgian,
-  ucp_Glagolitic,
-  ucp_Gothic,
-  ucp_Greek,
-  ucp_Gujarati,
-  ucp_Gurmukhi,
-  ucp_Han,
-  ucp_Hangul,
-  ucp_Hanunoo,
-  ucp_Hebrew,
-  ucp_Hiragana,
-  ucp_Inherited,
-  ucp_Kannada,
-  ucp_Katakana,
-  ucp_Kharoshthi,
-  ucp_Khmer,
-  ucp_Lao,
-  ucp_Latin,
-  ucp_Limbu,
-  ucp_Linear_B,
-  ucp_Malayalam,
-  ucp_Mongolian,
-  ucp_Myanmar,
-  ucp_New_Tai_Lue,
-  ucp_Ogham,
-  ucp_Old_Italic,
-  ucp_Old_Persian,
-  ucp_Oriya,
-  ucp_Osmanya,
-  ucp_Runic,
-  ucp_Shavian,
-  ucp_Sinhala,
-  ucp_Syloti_Nagri,
-  ucp_Syriac,
-  ucp_Tagalog,
-  ucp_Tagbanwa,
-  ucp_Tai_Le,
-  ucp_Tamil,
-  ucp_Telugu,
-  ucp_Thaana,
-  ucp_Thai,
-  ucp_Tibetan,
-  ucp_Tifinagh,
-  ucp_Ugaritic,
-  ucp_Yi,
+  ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
+  ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
+  ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
+  ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
+  ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
+  ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
+  ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
+  ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
+  ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
+  ucp_Common = G_UNICODE_SCRIPT_COMMON,
+  ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
+  ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
+  ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
+  ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
+  ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
+  ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
+  ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
+  ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
+  ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
+  ucp_Greek = G_UNICODE_SCRIPT_GREEK,
+  ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
+  ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
+  ucp_Han = G_UNICODE_SCRIPT_HAN,
+  ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
+  ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
+  ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
+  ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
+  ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
+  ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
+  ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
+  ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
+  ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
+  ucp_Lao = G_UNICODE_SCRIPT_LAO,
+  ucp_Latin = G_UNICODE_SCRIPT_LATIN,
+  ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
+  ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
+  ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
+  ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
+  ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
+  ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
+  ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
+  ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
+  ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
+  ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
+  ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
+  ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
+  ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
+  ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
+  ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
+  ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
+  ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
+  ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
+  ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
+  ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
+  ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
+  ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
+  ucp_Thai = G_UNICODE_SCRIPT_THAI,
+  ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
+  ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
+  ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
+  ucp_Yi = G_UNICODE_SCRIPT_YI,
    /* New for Unicode 5.0: */
    /* New for Unicode 5.0: */
-  ucp_Balinese,
-  ucp_Cuneiform,
-  ucp_Nko,
-  ucp_Phags_Pa,
-  ucp_Phoenician,
+  ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
+  ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
+  ucp_Nko = G_UNICODE_SCRIPT_NKO,
+  ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
+  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
    /* New for Unicode 5.1: */
    /* New for Unicode 5.1: */
-  ucp_Carian,
-  ucp_Cham,
-  ucp_Kayah_Li,
-  ucp_Lepcha,
-  ucp_Lycian,
-  ucp_Lydian,
-  ucp_Ol_Chiki,
-  ucp_Rejang,
-  ucp_Saurashtra,
-  ucp_Sundanese,
-  ucp_Vai,
+  ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
+  ucp_Cham = G_UNICODE_SCRIPT_CHAM,
+  ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
+  ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
+  ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
+  ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
+  ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
+  ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
+  ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
+  ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
+  ucp_Vai = G_UNICODE_SCRIPT_VAI,
    /* New for Unicode 5.2: */
    /* New for Unicode 5.2: */
-  ucp_Avestan,
-  ucp_Bamum,
-  ucp_Egyptian_Hieroglyphs,
-  ucp_Imperial_Aramaic,
-  ucp_Inscriptional_Pahlavi,
-  ucp_Inscriptional_Parthian,
-  ucp_Javanese,
-  ucp_Kaithi,
-  ucp_Lisu,
-  ucp_Meetei_Mayek,
-  ucp_Old_South_Arabian,
-  ucp_Old_Turkic,
-  ucp_Samaritan,
-  ucp_Tai_Tham,
-  ucp_Tai_Viet,
+  ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
+  ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
+  ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
+  ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
+  ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
+  ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
+  ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
+  ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
+  ucp_Lisu = G_UNICODE_SCRIPT_LISU,
+  ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
+  ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
+  ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
+  ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
+  ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
+  ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
    /* New for Unicode 6.0.0: */
    /* New for Unicode 6.0.0: */
-  ucp_Batak,
-  ucp_Brahmi,
-  ucp_Mandaic,
+  ucp_Batak = G_UNICODE_SCRIPT_BATAK,
+  ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
+  ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
    /* New for Unicode 6.1.0: */
    /* New for Unicode 6.1.0: */
-  ucp_Chakma,
-  ucp_Meroitic_Cursive,
-  ucp_Meroitic_Hieroglyphs,
-  ucp_Miao,
-  ucp_Sharada,
-  ucp_Sora_Sompeng,
-  ucp_Takri
+  ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
+  ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
+  ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
+  ucp_Miao = G_UNICODE_SCRIPT_MIAO,
+  ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
+  ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
+  ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
  };
  
  #endif
  };
  
  #endif
diff --git a/glib/update-pcre/ucp.patch b/glib/update-pcre/ucp.patch

index 8abd812..1b82f27 100644 (file)
--- a/glib/update-pcre/ucp.patch
+++ b/glib/update-pcre/ucp.patch
@@ -1,6 +1,632 @@
---- pcre/ucp.h 2006-07-05 13:28:01.000000000 +0200
-+++ pcre/ucp.h 2006-10-09 16:27:19.000000000 +0200
-@@ -60,72 +60,72 @@ enum {
+From 384879be07418fc6224b6603a2e8ca6f11e178fc Mon Sep 17 00:00:00 2001
+From: Christian Persch <chpe@gnome.org>
+Date: Sun, 12 Feb 2012 21:20:33 +0100
+Subject: [PATCH] regex: Use glib for unicode data
+
+Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
+---
+ glib/pcre/Makefile.am     |    1 -
+ glib/pcre/pcre_compile.c  |   26 +++---
+ glib/pcre/pcre_dfa_exec.c |   96 ++++++++--------
+ glib/pcre/pcre_exec.c     |   26 +++---
+ glib/pcre/pcre_internal.h |   11 +--
+ glib/pcre/pcre_tables.c   |   16 +++
+ glib/pcre/pcre_xclass.c   |   24 ++--
+ glib/pcre/ucp.h           |  265 +++++++++++++++++++++++----------------------
+ 8 files changed, 239 insertions(+), 226 deletions(-)
+
+diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am
+index 21da5c5..1981953 100644
+--- a/glib/pcre/Makefile.am
++++ b/glib/pcre/Makefile.am
+@@ -51,7 +51,6 @@ libpcre_la_SOURCES = \
+       pcre_string_utils.c \
+       pcre_study.c \
+       pcre_tables.c \
+-      pcre_ucd.c \
+       pcre_valid_utf8.c \
+       pcre_version.c \
+       pcre_xclass.c \
+diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
+index eb985df..b44055a 100644
+--- a/glib/pcre/pcre_compile.c
++++ b/glib/pcre/pcre_compile.c
+@@ -2890,43 +2890,43 @@ Returns:       TRUE if auto-possessifying is OK
+ static BOOL
+ check_char_prop(int c, int ptype, int pdata, BOOL negated)
+ {
+-const ucd_record *prop = GET_UCD(c);
++const pcre_uint8 chartype = UCD_CHARTYPE(c);
+ switch(ptype)
+   {
+   case PT_LAMP:
+-  return (prop->chartype == ucp_Lu ||
+-          prop->chartype == ucp_Ll ||
+-          prop->chartype == ucp_Lt) == negated;
++  return (chartype == ucp_Lu ||
++          chartype == ucp_Ll ||
++          chartype == ucp_Lt) == negated;
+ 
+   case PT_GC:
+-  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
++  return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
+ 
+   case PT_PC:
+-  return (pdata == prop->chartype) == negated;
++  return (pdata == chartype) == negated;
+ 
+   case PT_SC:
+-  return (pdata == prop->script) == negated;
++  return (pdata == UCD_SCRIPT(c)) == negated;
+ 
+   /* These are specials */
+ 
+   case PT_ALNUM:
+-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
++  return (PRIV(ucp_gentype)[chartype] == ucp_L ||
++          PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
+ 
+   case PT_SPACE:    /* Perl space */
+-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++  return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
+           c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
+           == negated;
+ 
+   case PT_PXSPACE:  /* POSIX space */
+-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++  return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
+           c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+           c == CHAR_FF || c == CHAR_CR)
+           == negated;
+ 
+   case PT_WORD:
+-  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++  return (PRIV(ucp_gentype)[chartype] == ucp_L ||
++          PRIV(ucp_gentype)[chartype] == ucp_N ||
+           c == CHAR_UNDERSCORE) == negated;
+   }
+ return FALSE;
+diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
+index 21d7be6..41ff65b 100644
+--- a/glib/pcre/pcre_dfa_exec.c
++++ b/glib/pcre/pcre_dfa_exec.c
+@@ -1015,7 +1015,7 @@ for (;;)
+       if (clen > 0)
+         {
+         BOOL OK;
+-        const ucd_record * prop = GET_UCD(c);
++        const pcre_uint8 chartype = UCD_CHARTYPE(c);
+         switch(code[1])
+           {
+           case PT_ANY:
+@@ -1023,43 +1023,43 @@ for (;;)
+           break;
+ 
+           case PT_LAMP:
+-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+-               prop->chartype == ucp_Lt;
++          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
++               chartype == ucp_Lt;
+           break;
+ 
+           case PT_GC:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
++          OK = PRIV(ucp_gentype)[chartype] == code[2];
+           break;
+ 
+           case PT_PC:
+-          OK = prop->chartype == code[2];
++          OK = chartype == code[2];
+           break;
+ 
+           case PT_SC:
+-          OK = prop->script == code[2];
++          OK = UCD_SCRIPT(c) == code[2];
+           break;
+ 
+           /* These are specials for combination cases. */
+ 
+           case PT_ALNUM:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
++          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++               PRIV(ucp_gentype)[chartype] == ucp_N;
+           break;
+ 
+           case PT_SPACE:    /* Perl space */
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+           break;
+ 
+           case PT_PXSPACE:  /* POSIX space */
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+                c == CHAR_FF || c == CHAR_CR;
+           break;
+ 
+           case PT_WORD:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++               PRIV(ucp_gentype)[chartype] == ucp_N ||
+                c == CHAR_UNDERSCORE;
+           break;
+ 
+@@ -1209,7 +1209,7 @@ for (;;)
+       if (clen > 0)
+         {
+         BOOL OK;
+-        const ucd_record * prop = GET_UCD(c);
++        const pcre_uint8 chartype = UCD_CHARTYPE(c);
+         switch(code[2])
+           {
+           case PT_ANY:
+@@ -1217,43 +1217,43 @@ for (;;)
+           break;
+ 
+           case PT_LAMP:
+-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+-            prop->chartype == ucp_Lt;
++          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
++            chartype == ucp_Lt;
+           break;
+ 
+           case PT_GC:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
++          OK = PRIV(ucp_gentype)[chartype] == code[3];
+           break;
+ 
+           case PT_PC:
+-          OK = prop->chartype == code[3];
++          OK = chartype == code[3];
+           break;
+ 
+           case PT_SC:
+-          OK = prop->script == code[3];
++          OK = UCD_SCRIPT(c) == code[3];
+           break;
+ 
+           /* These are specials for combination cases. */
+ 
+           case PT_ALNUM:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
++          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++               PRIV(ucp_gentype)[chartype] == ucp_N;
+           break;
+ 
+           case PT_SPACE:    /* Perl space */
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+           break;
+ 
+           case PT_PXSPACE:  /* POSIX space */
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+                c == CHAR_FF || c == CHAR_CR;
+           break;
+ 
+           case PT_WORD:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++               PRIV(ucp_gentype)[chartype] == ucp_N ||
+                c == CHAR_UNDERSCORE;
+           break;
+ 
+@@ -1456,7 +1456,7 @@ for (;;)
+       if (clen > 0)
+         {
+         BOOL OK;
+-        const ucd_record * prop = GET_UCD(c);
++        const pcre_uint8 chartype = UCD_CHARTYPE(c);
+         switch(code[2])
+           {
+           case PT_ANY:
+@@ -1464,43 +1464,43 @@ for (;;)
+           break;
+ 
+           case PT_LAMP:
+-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+-            prop->chartype == ucp_Lt;
++          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
++            chartype == ucp_Lt;
+           break;
+ 
+           case PT_GC:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
++          OK = PRIV(ucp_gentype)[chartype] == code[3];
+           break;
+ 
+           case PT_PC:
+-          OK = prop->chartype == code[3];
++          OK = chartype == code[3];
+           break;
+ 
+           case PT_SC:
+-          OK = prop->script == code[3];
++          OK = UCD_SCRIPT(c) == code[3];
+           break;
+ 
+           /* These are specials for combination cases. */
+ 
+           case PT_ALNUM:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
++          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++               PRIV(ucp_gentype)[chartype] == ucp_N;
+           break;
+ 
+           case PT_SPACE:    /* Perl space */
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+           break;
+ 
+           case PT_PXSPACE:  /* POSIX space */
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+                c == CHAR_FF || c == CHAR_CR;
+           break;
+ 
+           case PT_WORD:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++               PRIV(ucp_gentype)[chartype] == ucp_N ||
+                c == CHAR_UNDERSCORE;
+           break;
+ 
+@@ -1728,7 +1728,7 @@ for (;;)
+       if (clen > 0)
+         {
+         BOOL OK;
+-        const ucd_record * prop = GET_UCD(c);
++        const pcre_uint8 chartype = UCD_CHARTYPE(c);
+         switch(code[1 + IMM2_SIZE + 1])
+           {
+           case PT_ANY:
+@@ -1736,43 +1736,43 @@ for (;;)
+           break;
+ 
+           case PT_LAMP:
+-          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+-            prop->chartype == ucp_Lt;
++          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
++            chartype == ucp_Lt;
+           break;
+ 
+           case PT_GC:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
++          OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
+           break;
+ 
+           case PT_PC:
+-          OK = prop->chartype == code[1 + IMM2_SIZE + 2];
++          OK = chartype == code[1 + IMM2_SIZE + 2];
+           break;
+ 
+           case PT_SC:
+-          OK = prop->script == code[1 + IMM2_SIZE + 2];
++          OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
+           break;
+ 
+           /* These are specials for combination cases. */
+ 
+           case PT_ALNUM:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
++          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++               PRIV(ucp_gentype)[chartype] == ucp_N;
+           break;
+ 
+           case PT_SPACE:    /* Perl space */
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+                c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
+           break;
+ 
+           case PT_PXSPACE:  /* POSIX space */
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
+                c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+                c == CHAR_FF || c == CHAR_CR;
+           break;
+ 
+           case PT_WORD:
+-          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
++               PRIV(ucp_gentype)[chartype] == ucp_N ||
+                c == CHAR_UNDERSCORE;
+           break;
+ 
+diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
+index b715353..8eb3162 100644
+--- a/glib/pcre/pcre_exec.c
++++ b/glib/pcre/pcre_exec.c
+@@ -2507,7 +2507,7 @@ for (;;)
+       }
+     GETCHARINCTEST(c, eptr);
+       {
+-      const ucd_record *prop = GET_UCD(c);
++      const pcre_uint8 chartype = UCD_CHARTYPE(c);
+ 
+       switch(ecode[1])
+         {
+@@ -2516,44 +2516,44 @@ for (;;)
+         break;
+ 
+         case PT_LAMP:
+-        if ((prop->chartype == ucp_Lu ||
+-             prop->chartype == ucp_Ll ||
+-             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
++        if ((chartype == ucp_Lu ||
++             chartype == ucp_Ll ||
++             chartype == ucp_Lt) == (op == OP_NOTPROP))
+           RRETURN(MATCH_NOMATCH);
+         break;
+ 
+         case PT_GC:
+-        if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
++        if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
+           RRETURN(MATCH_NOMATCH);
+         break;
+ 
+         case PT_PC:
+-        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
++        if ((ecode[2] != chartype) == (op == OP_PROP))
+           RRETURN(MATCH_NOMATCH);
+         break;
+ 
+         case PT_SC:
+-        if ((ecode[2] != prop->script) == (op == OP_PROP))
++        if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
+           RRETURN(MATCH_NOMATCH);
+         break;
+ 
+         /* These are specials */
+ 
+         case PT_ALNUM:
+-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
++        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
++             PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
+           RRETURN(MATCH_NOMATCH);
+         break;
+ 
+         case PT_SPACE:    /* Perl space */
+-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
+              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
+                == (op == OP_NOTPROP))
+           RRETURN(MATCH_NOMATCH);
+         break;
+ 
+         case PT_PXSPACE:  /* POSIX space */
+-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
+              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+              c == CHAR_FF || c == CHAR_CR)
+                == (op == OP_NOTPROP))
+@@ -2561,8 +2561,8 @@ for (;;)
+         break;
+ 
+         case PT_WORD:
+-        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
++        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
++             PRIV(ucp_gentype)[chartype] == ucp_N ||
+              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
+           RRETURN(MATCH_NOMATCH);
+         break;
+diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
+index e5a4b6a..41c7ee3 100644
+--- a/glib/pcre/pcre_internal.h
++++ b/glib/pcre/pcre_internal.h
+@@ -2315,15 +2315,12 @@ extern const int         PRIV(ucp_typerange)[];
+ #ifdef SUPPORT_UCP
+ /* UCD access macros */
+ 
+-#define UCD_BLOCK_SIZE 128
+-#define GET_UCD(ch) (PRIV(ucd_records) + \
+-        PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
+-        UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
++unsigned int _pcre_ucp_othercase(const unsigned int c);
+ 
+-#define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype
+-#define UCD_SCRIPT(ch)    GET_UCD(ch)->script
++#define UCD_CHARTYPE(ch)  (pcre_uint8)g_unichar_type((gunichar)(ch))
++#define UCD_SCRIPT(ch)    (pcre_uint8)g_unichar_get_script((gunichar)(ch))
+ #define UCD_CATEGORY(ch)  PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
+-#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
++#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
+ 
+ #endif /* SUPPORT_UCP */
+ 
+diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
+index c8134ec..47becc7 100644
+--- a/glib/pcre/pcre_tables.c
++++ b/glib/pcre/pcre_tables.c
+@@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = {
+ 
+ const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
+ 
++unsigned int 
++_pcre_ucp_othercase(const unsigned int c)
++{
++  int other_case = NOTACHAR;
++
++  if (g_unichar_islower(c))
++    other_case = g_unichar_toupper(c);
++  else if (g_unichar_isupper(c))
++    other_case = g_unichar_tolower(c);
++
++  if (other_case == c)
++    other_case = NOTACHAR;
++
++  return other_case;
++}
++
+ #endif /* SUPPORT_UTF */
+ 
+ /* End of pcre_tables.c */
+diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
+index dca7a39..e5a55d7 100644
+--- a/glib/pcre/pcre_xclass.c
++++ b/glib/pcre/pcre_xclass.c
+@@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
+ #ifdef SUPPORT_UCP
+   else  /* XCL_PROP & XCL_NOTPROP */
+     {
+-    const ucd_record *prop = GET_UCD(c);
++    const pcre_uint8 chartype = UCD_CHARTYPE(c);
+ 
+     switch(*data)
+       {
+@@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
+       break;
+ 
+       case PT_LAMP:
+-      if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+-           prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
++      if ((chartype == ucp_Lu || chartype == ucp_Ll ||
++           chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
+       break;
+ 
+       case PT_GC:
+-      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
++      if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
+         return !negated;
+       break;
+ 
+       case PT_PC:
+-      if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
++      if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
+       break;
+ 
+       case PT_SC:
+-      if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
++      if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
+       break;
+ 
+       case PT_ALNUM:
+-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
++      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
++           PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
+         return !negated;
+       break;
+ 
+       case PT_SPACE:    /* Perl space */
+-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++      if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
+            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
+              == (t == XCL_PROP))
+         return !negated;
+       break;
+ 
+       case PT_PXSPACE:  /* POSIX space */
+-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
++      if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
+            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
+            c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
+         return !negated;
+       break;
+ 
+       case PT_WORD:
+-      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+-           PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
++      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
++           PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
+              == (t == XCL_PROP))
+         return !negated;
+       break;
+diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
+index 59c3bec..53a48c9 100644
+--- a/glib/pcre/ucp.h
++++ b/glib/pcre/ucp.h
+@@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
+ should always be at the end of each enum, for backwards compatibility. */
+ 
+ /* These are the general character categories. */
++#include "gunicode.h"
+ 
+ enum {
+   ucp_C,     /* Other */
+@@ -24,148 +25,148 @@ enum {
+ /* These are the particular character types. */
+ 
+ enum {
+-  ucp_Cc,    /* Control */
+-  ucp_Cf,    /* Format */
+-  ucp_Cn,    /* Unassigned */
+-  ucp_Co,    /* Private use */
+-  ucp_Cs,    /* Surrogate */
+-  ucp_Ll,    /* Lower case letter */
+-  ucp_Lm,    /* Modifier letter */
+-  ucp_Lo,    /* Other letter */
+-  ucp_Lt,    /* Title case letter */
+-  ucp_Lu,    /* Upper case letter */
+-  ucp_Mc,    /* Spacing mark */
+-  ucp_Me,    /* Enclosing mark */
+-  ucp_Mn,    /* Non-spacing mark */
+-  ucp_Nd,    /* Decimal number */
+-  ucp_Nl,    /* Letter number */
+-  ucp_No,    /* Other number */
+-  ucp_Pc,    /* Connector punctuation */
+-  ucp_Pd,    /* Dash punctuation */
+-  ucp_Pe,    /* Close punctuation */
+-  ucp_Pf,    /* Final punctuation */
+-  ucp_Pi,    /* Initial punctuation */
+-  ucp_Po,    /* Other punctuation */
+-  ucp_Ps,    /* Open punctuation */
+-  ucp_Sc,    /* Currency symbol */
+-  ucp_Sk,    /* Modifier symbol */
+-  ucp_Sm,    /* Mathematical symbol */
+-  ucp_So,    /* Other symbol */
+-  ucp_Zl,    /* Line separator */
+-  ucp_Zp,    /* Paragraph separator */
+-  ucp_Zs     /* Space separator */
++  ucp_Cc = G_UNICODE_CONTROL,                   /* Control */
++  ucp_Cf = G_UNICODE_FORMAT,                    /* Format */
++  ucp_Cn = G_UNICODE_UNASSIGNED,                /* Unassigned */
++  ucp_Co = G_UNICODE_PRIVATE_USE,               /* Private use */
++  ucp_Cs = G_UNICODE_SURROGATE,                 /* Surrogate */
++  ucp_Ll = G_UNICODE_LOWERCASE_LETTER,          /* Lower case letter */
++  ucp_Lm = G_UNICODE_MODIFIER_LETTER,           /* Modifier letter */
++  ucp_Lo = G_UNICODE_OTHER_LETTER,              /* Other letter */
++  ucp_Lt = G_UNICODE_TITLECASE_LETTER,          /* Title case letter */
++  ucp_Lu = G_UNICODE_UPPERCASE_LETTER,          /* Upper case letter */
++  ucp_Mc = G_UNICODE_SPACING_MARK,              /* Spacing mark */
++  ucp_Me = G_UNICODE_ENCLOSING_MARK,            /* Enclosing mark */
++  ucp_Mn = G_UNICODE_NON_SPACING_MARK,          /* Non-spacing mark */
++  ucp_Nd = G_UNICODE_DECIMAL_NUMBER,            /* Decimal number */
++  ucp_Nl = G_UNICODE_LETTER_NUMBER,             /* Letter number */
++  ucp_No = G_UNICODE_OTHER_NUMBER,              /* Other number */
++  ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION,       /* Connector punctuation */
++  ucp_Pd = G_UNICODE_DASH_PUNCTUATION,          /* Dash punctuation */
++  ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION,         /* Close punctuation */
++  ucp_Pf = G_UNICODE_FINAL_PUNCTUATION,         /* Final punctuation */
++  ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION,       /* Initial punctuation */
++  ucp_Po = G_UNICODE_OTHER_PUNCTUATION,         /* Other punctuation */
++  ucp_Ps = G_UNICODE_OPEN_PUNCTUATION,          /* Open punctuation */
++  ucp_Sc = G_UNICODE_CURRENCY_SYMBOL,           /* Currency symbol */
++  ucp_Sk = G_UNICODE_MODIFIER_SYMBOL,           /* Modifier symbol */
++  ucp_Sm = G_UNICODE_MATH_SYMBOL,               /* Mathematical symbol */
++  ucp_So = G_UNICODE_OTHER_SYMBOL,              /* Other symbol */
++  ucp_Zl = G_UNICODE_LINE_SEPARATOR,            /* Line separator */
++  ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR,       /* Paragraph separator */
++  ucp_Zs = G_UNICODE_SPACE_SEPARATOR            /* Space separator */
+ };
+ 
   /* These are the script identifications. */
   
   enum {
   /* These are the script identifications. */
   
   enum {
@@ -65,11 +691,6 @@
  -  ucp_Tifinagh,
  -  ucp_Ugaritic,
  -  ucp_Yi,
  -  ucp_Tifinagh,
  -  ucp_Ugaritic,
  -  ucp_Yi,
--  ucp_Balinese,      /* New for Unicode 5.0.0 */
--  ucp_Cuneiform,     /* New for Unicode 5.0.0 */
--  ucp_Nko,           /* New for Unicode 5.0.0 */
--  ucp_Phags_Pa,      /* New for Unicode 5.0.0 */
--  ucp_Phoenician     /* New for Unicode 5.0.0 */
  +  ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
  +  ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
  +  ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
  +  ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
  +  ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
  +  ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
@@ -131,11 +752,96 @@
  +  ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
  +  ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
  +  ucp_Yi = G_UNICODE_SCRIPT_YI,
  +  ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
  +  ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
  +  ucp_Yi = G_UNICODE_SCRIPT_YI,
-+  ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,   /* New for Unicode 5.0.0 */
-+  ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,   /* New for Unicode 5.0.0 */
-+  ucp_Nko = G_UNICODE_SCRIPT_NKO,             /* New for Unicode 5.0.0 */
-+  ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,     /* New for Unicode 5.0.0 */
-+  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN  /* New for Unicode 5.0.0 */
+   /* New for Unicode 5.0: */
+-  ucp_Balinese,
+-  ucp_Cuneiform,
+-  ucp_Nko,
+-  ucp_Phags_Pa,
+-  ucp_Phoenician,
++  ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
++  ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
++  ucp_Nko = G_UNICODE_SCRIPT_NKO,
++  ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
++  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
+   /* New for Unicode 5.1: */
+-  ucp_Carian,
+-  ucp_Cham,
+-  ucp_Kayah_Li,
+-  ucp_Lepcha,
+-  ucp_Lycian,
+-  ucp_Lydian,
+-  ucp_Ol_Chiki,
+-  ucp_Rejang,
+-  ucp_Saurashtra,
+-  ucp_Sundanese,
+-  ucp_Vai,
++  ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
++  ucp_Cham = G_UNICODE_SCRIPT_CHAM,
++  ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
++  ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
++  ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
++  ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
++  ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
++  ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
++  ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
++  ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
++  ucp_Vai = G_UNICODE_SCRIPT_VAI,
+   /* New for Unicode 5.2: */
+-  ucp_Avestan,
+-  ucp_Bamum,
+-  ucp_Egyptian_Hieroglyphs,
+-  ucp_Imperial_Aramaic,
+-  ucp_Inscriptional_Pahlavi,
+-  ucp_Inscriptional_Parthian,
+-  ucp_Javanese,
+-  ucp_Kaithi,
+-  ucp_Lisu,
+-  ucp_Meetei_Mayek,
+-  ucp_Old_South_Arabian,
+-  ucp_Old_Turkic,
+-  ucp_Samaritan,
+-  ucp_Tai_Tham,
+-  ucp_Tai_Viet,
++  ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
++  ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
++  ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
++  ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
++  ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
++  ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
++  ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
++  ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
++  ucp_Lisu = G_UNICODE_SCRIPT_LISU,
++  ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
++  ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
++  ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
++  ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
++  ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
++  ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
+   /* New for Unicode 6.0.0: */
+-  ucp_Batak,
+-  ucp_Brahmi,
+-  ucp_Mandaic,
++  ucp_Batak = G_UNICODE_SCRIPT_BATAK,
++  ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
++  ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
+   /* New for Unicode 6.1.0: */
+-  ucp_Chakma,
+-  ucp_Meroitic_Cursive,
+-  ucp_Meroitic_Hieroglyphs,
+-  ucp_Miao,
+-  ucp_Sharada,
+-  ucp_Sora_Sompeng,
+-  ucp_Takri
++  ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
++  ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
++  ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
++  ucp_Miao = G_UNICODE_SCRIPT_MIAO,
++  ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
++  ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
++  ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
   };
   
   #endif
   };
   
   #endif
+-- 
+1.7.5.1.217.g4e3aa.dirty
+
author	Christian Persch <chpe@gnome.org>
	Sun, 12 Feb 2012 20:20:33 +0000 (21:20 +0100)
committer	Matthias Clasen <mclasen@redhat.com>
	Mon, 27 Feb 2012 03:09:05 +0000 (22:09 -0500)
glib/pcre/Makefile.am		patch \| blob \| history
glib/pcre/pcre_compile.c		patch \| blob \| history
glib/pcre/pcre_dfa_exec.c		patch \| blob \| history
glib/pcre/pcre_exec.c		patch \| blob \| history
glib/pcre/pcre_internal.h		patch \| blob \| history
glib/pcre/pcre_tables.c		patch \| blob \| history
glib/pcre/pcre_xclass.c		patch \| blob \| history
glib/pcre/ucp.h		patch \| blob \| history
glib/update-pcre/ucp.patch		patch \| blob \| history